Home | History | Annotate | Download | only in libyuv
      1 /*
      2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS. All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #ifndef INCLUDE_LIBYUV_ROW_H_  // NOLINT
     12 #define INCLUDE_LIBYUV_ROW_H_
     13 
     14 #include <stdlib.h>  // For malloc.
     15 
     16 #include "libyuv/basic_types.h"
     17 
     18 #ifdef __cplusplus
     19 namespace libyuv {
     20 extern "C" {
     21 #endif
     22 
     23 #define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
     24 
     25 #ifdef __cplusplus
     26 #define align_buffer_64(var, size)                                             \
     27   uint8* var##_mem = reinterpret_cast<uint8*>(malloc((size) + 63));            \
     28   uint8* var = reinterpret_cast<uint8*>                                        \
     29       ((reinterpret_cast<intptr_t>(var##_mem) + 63) & ~63)
     30 #else
     31 #define align_buffer_64(var, size)                                             \
     32   uint8* var##_mem = (uint8*)(malloc((size) + 63));               /* NOLINT */ \
     33   uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63)       /* NOLINT */
     34 #endif
     35 
     36 #define free_aligned_buffer_64(var) \
     37   free(var##_mem);  \
     38   var = 0
     39 
     40 #if defined(__pnacl__) || defined(__CLR_VER) || defined(COVERAGE_ENABLED) || \
     41     defined(TARGET_IPHONE_SIMULATOR)
     42 #define LIBYUV_DISABLE_X86
     43 #endif
     44 // True if compiling for SSSE3 as a requirement.
     45 #if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3))
     46 #define LIBYUV_SSSE3_ONLY
     47 #endif
     48 
     49 // Enable for NaCL pepper 33 for bundle and AVX2 support.
     50 //  #define NEW_BINUTILS
     51 
     52 // The following are available on all x86 platforms:
     53 #if !defined(LIBYUV_DISABLE_X86) && \
     54     (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
     55 // Effects:
     56 #define HAS_ARGBADDROW_SSE2
     57 #define HAS_ARGBAFFINEROW_SSE2
     58 #define HAS_ARGBATTENUATEROW_SSSE3
     59 #define HAS_ARGBBLENDROW_SSSE3
     60 #define HAS_ARGBCOLORMATRIXROW_SSSE3
     61 #define HAS_ARGBCOLORTABLEROW_X86
     62 #define HAS_ARGBCOPYALPHAROW_SSE2
     63 #define HAS_ARGBCOPYYTOALPHAROW_SSE2
     64 #define HAS_ARGBGRAYROW_SSSE3
     65 #define HAS_ARGBLUMACOLORTABLEROW_SSSE3
     66 #define HAS_ARGBMIRRORROW_SSSE3
     67 #define HAS_ARGBMULTIPLYROW_SSE2
     68 #define HAS_ARGBPOLYNOMIALROW_SSE2
     69 #define HAS_ARGBQUANTIZEROW_SSE2
     70 #define HAS_ARGBSEPIAROW_SSSE3
     71 #define HAS_ARGBSHADEROW_SSE2
     72 #define HAS_ARGBSUBTRACTROW_SSE2
     73 #define HAS_ARGBTOUVROW_SSSE3
     74 #define HAS_ARGBUNATTENUATEROW_SSE2
     75 #define HAS_COMPUTECUMULATIVESUMROW_SSE2
     76 #define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
     77 #define HAS_INTERPOLATEROW_SSE2
     78 #define HAS_INTERPOLATEROW_SSSE3
     79 #define HAS_RGBCOLORTABLEROW_X86
     80 #define HAS_SOBELROW_SSE2
     81 #define HAS_SOBELTOPLANEROW_SSE2
     82 #define HAS_SOBELXROW_SSE2
     83 #define HAS_SOBELXYROW_SSE2
     84 #define HAS_SOBELYROW_SSE2
     85 
     86 // Conversions:
     87 #define HAS_ABGRTOUVROW_SSSE3
     88 #define HAS_ABGRTOYROW_SSSE3
     89 #define HAS_ARGB1555TOARGBROW_SSE2
     90 #define HAS_ARGB4444TOARGBROW_SSE2
     91 #define HAS_ARGBSHUFFLEROW_SSE2
     92 #define HAS_ARGBSHUFFLEROW_SSSE3
     93 #define HAS_ARGBTOARGB1555ROW_SSE2
     94 #define HAS_ARGBTOARGB4444ROW_SSE2
     95 #define HAS_ARGBTOBAYERGGROW_SSE2
     96 #define HAS_ARGBTOBAYERROW_SSSE3
     97 #define HAS_ARGBTORAWROW_SSSE3
     98 #define HAS_ARGBTORGB24ROW_SSSE3
     99 #define HAS_ARGBTORGB565ROW_SSE2
    100 #define HAS_ARGBTOUV422ROW_SSSE3
    101 #define HAS_ARGBTOUV444ROW_SSSE3
    102 #define HAS_ARGBTOUVJROW_SSSE3
    103 #define HAS_ARGBTOYJROW_SSSE3
    104 #define HAS_ARGBTOYROW_SSSE3
    105 #define HAS_BGRATOUVROW_SSSE3
    106 #define HAS_BGRATOYROW_SSSE3
    107 #define HAS_COPYROW_ERMS
    108 #define HAS_COPYROW_SSE2
    109 #define HAS_COPYROW_X86
    110 #define HAS_HALFROW_SSE2
    111 #define HAS_I400TOARGBROW_SSE2
    112 #define HAS_I411TOARGBROW_SSSE3
    113 #define HAS_I422TOARGB1555ROW_SSSE3
    114 #define HAS_I422TOABGRROW_SSSE3
    115 #define HAS_I422TOARGB1555ROW_SSSE3
    116 #define HAS_I422TOARGB4444ROW_SSSE3
    117 #define HAS_I422TOARGBROW_SSSE3
    118 #define HAS_I422TOBGRAROW_SSSE3
    119 #define HAS_I422TORAWROW_SSSE3
    120 #define HAS_I422TORGB24ROW_SSSE3
    121 #define HAS_I422TORGB565ROW_SSSE3
    122 #define HAS_I422TORGBAROW_SSSE3
    123 #define HAS_I422TOUYVYROW_SSE2
    124 #define HAS_I422TOYUY2ROW_SSE2
    125 #define HAS_I444TOARGBROW_SSSE3
    126 #define HAS_MERGEUVROW_SSE2
    127 #define HAS_MIRRORROW_SSE2
    128 #define HAS_MIRRORROW_SSSE3
    129 #define HAS_MIRRORROW_UV_SSSE3
    130 #define HAS_MIRRORUVROW_SSSE3
    131 #define HAS_NV12TOARGBROW_SSSE3
    132 #define HAS_NV12TORGB565ROW_SSSE3
    133 #define HAS_NV21TOARGBROW_SSSE3
    134 #define HAS_NV21TORGB565ROW_SSSE3
    135 #define HAS_RAWTOARGBROW_SSSE3
    136 #define HAS_RAWTOYROW_SSSE3
    137 #define HAS_RGB24TOARGBROW_SSSE3
    138 #define HAS_RGB24TOYROW_SSSE3
    139 #define HAS_RGB565TOARGBROW_SSE2
    140 #define HAS_RGBATOUVROW_SSSE3
    141 #define HAS_RGBATOYROW_SSSE3
    142 #define HAS_SETROW_X86
    143 #define HAS_SPLITUVROW_SSE2
    144 #define HAS_UYVYTOARGBROW_SSSE3
    145 #define HAS_UYVYTOUV422ROW_SSE2
    146 #define HAS_UYVYTOUVROW_SSE2
    147 #define HAS_UYVYTOYROW_SSE2
    148 #define HAS_YTOARGBROW_SSE2
    149 #define HAS_YUY2TOARGBROW_SSSE3
    150 #define HAS_YUY2TOUV422ROW_SSE2
    151 #define HAS_YUY2TOUVROW_SSE2
    152 #define HAS_YUY2TOYROW_SSE2
    153 #endif
    154 
    155 // GCC >= 4.7.0 required for AVX2.
    156 #if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
    157 #if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
    158 #define GCC_HAS_AVX2 1
    159 #endif  // GNUC >= 4.7
    160 #endif  // __GNUC__
    161 
    162 // clang >= 3.4.0 required for AVX2.
    163 #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
    164 #if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
    165 #define CLANG_HAS_AVX2 1
    166 #endif  // clang >= 3.4
    167 #endif  // __clang__
    168 
    169 // Visual C 2012 required for AVX2.
    170 #if defined(_M_IX86) && defined(_MSC_VER) && _MSC_VER >= 1700
    171 #define VISUALC_HAS_AVX2 1
    172 #endif  // VisualStudio >= 2012
    173 
    174 // The following are available on all x86 platforms, but
    175 // require VS2012, clang 3.4 or gcc 4.7.
    176 // The code supports NaCL but requires a new compiler and validator.
    177 #if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
    178     defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
    179 // Effects:
    180 #define HAS_ARGBPOLYNOMIALROW_AVX2
    181 #define HAS_ARGBSHUFFLEROW_AVX2
    182 #define HAS_ARGBCOPYALPHAROW_AVX2
    183 #define HAS_ARGBCOPYYTOALPHAROW_AVX2
    184 #endif
    185 
    186 // The following are require VS2012.
    187 // TODO(fbarchard): Port to gcc.
    188 #if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
    189 #define HAS_ARGBTOUVROW_AVX2
    190 #define HAS_ARGBTOYJROW_AVX2
    191 #define HAS_ARGBTOYROW_AVX2
    192 #define HAS_HALFROW_AVX2
    193 #define HAS_I422TOARGBROW_AVX2
    194 #define HAS_INTERPOLATEROW_AVX2
    195 #define HAS_MERGEUVROW_AVX2
    196 #define HAS_MIRRORROW_AVX2
    197 #define HAS_SPLITUVROW_AVX2
    198 #define HAS_UYVYTOUV422ROW_AVX2
    199 #define HAS_UYVYTOUVROW_AVX2
    200 #define HAS_UYVYTOYROW_AVX2
    201 #define HAS_YUY2TOUV422ROW_AVX2
    202 #define HAS_YUY2TOUVROW_AVX2
    203 #define HAS_YUY2TOYROW_AVX2
    204 
    205 // Effects:
    206 #define HAS_ARGBADDROW_AVX2
    207 #define HAS_ARGBATTENUATEROW_AVX2
    208 #define HAS_ARGBMIRRORROW_AVX2
    209 #define HAS_ARGBMULTIPLYROW_AVX2
    210 #define HAS_ARGBSUBTRACTROW_AVX2
    211 #define HAS_ARGBUNATTENUATEROW_AVX2
    212 #endif  // defined(VISUALC_HAS_AVX2)
    213 
    214 // The following are Yasm x86 only:
    215 // TODO(fbarchard): Port AVX2 to inline.
    216 #if !defined(LIBYUV_DISABLE_X86) && defined(HAVE_YASM)
    217     (defined(_M_IX86) || defined(_M_X64) || \
    218     defined(__x86_64__) || defined(__i386__))
    219 #define HAS_MERGEUVROW_AVX2
    220 #define HAS_MERGEUVROW_MMX
    221 #define HAS_SPLITUVROW_AVX2
    222 #define HAS_SPLITUVROW_MMX
    223 #define HAS_UYVYTOYROW_AVX2
    224 #define HAS_UYVYTOYROW_MMX
    225 #define HAS_YUY2TOYROW_AVX2
    226 #define HAS_YUY2TOYROW_MMX
    227 #endif
    228 
    229 // The following are disabled when SSSE3 is available:
    230 #if !defined(LIBYUV_DISABLE_X86) && \
    231     (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) && \
    232     !defined(LIBYUV_SSSE3_ONLY)
    233 #define HAS_ARGBBLENDROW_SSE2
    234 #define HAS_ARGBATTENUATEROW_SSE2
    235 #define HAS_MIRRORROW_SSE2
    236 #endif
    237 
    238 // The following are available on Neon platforms:
    239 #if !defined(LIBYUV_DISABLE_NEON) && \
    240     (defined(__ARM_NEON__) || defined(LIBYUV_NEON)) && \
    241     !defined(__native_client__)
    242 #define HAS_ABGRTOUVROW_NEON
    243 #define HAS_ABGRTOYROW_NEON
    244 #define HAS_ARGB1555TOARGBROW_NEON
    245 #define HAS_ARGB1555TOUVROW_NEON
    246 #define HAS_ARGB1555TOYROW_NEON
    247 #define HAS_ARGB4444TOARGBROW_NEON
    248 #define HAS_ARGB4444TOUVROW_NEON
    249 #define HAS_ARGB4444TOYROW_NEON
    250 #define HAS_ARGBTOARGB1555ROW_NEON
    251 #define HAS_ARGBTOARGB4444ROW_NEON
    252 #define HAS_ARGBTOBAYERROW_NEON
    253 #define HAS_ARGBTOBAYERGGROW_NEON
    254 #define HAS_ARGBTORAWROW_NEON
    255 #define HAS_ARGBTORGB24ROW_NEON
    256 #define HAS_ARGBTORGB565ROW_NEON
    257 #define HAS_ARGBTOUV411ROW_NEON
    258 #define HAS_ARGBTOUV422ROW_NEON
    259 #define HAS_ARGBTOUV444ROW_NEON
    260 #define HAS_ARGBTOUVROW_NEON
    261 #define HAS_ARGBTOUVJROW_NEON
    262 #define HAS_ARGBTOYROW_NEON
    263 #define HAS_ARGBTOYJROW_NEON
    264 #define HAS_BGRATOUVROW_NEON
    265 #define HAS_BGRATOYROW_NEON
    266 #define HAS_COPYROW_NEON
    267 #define HAS_HALFROW_NEON
    268 #define HAS_I400TOARGBROW_NEON
    269 #define HAS_I411TOARGBROW_NEON
    270 #define HAS_I422TOABGRROW_NEON
    271 #define HAS_I422TOARGB1555ROW_NEON
    272 #define HAS_I422TOARGB4444ROW_NEON
    273 #define HAS_I422TOARGBROW_NEON
    274 #define HAS_I422TOBGRAROW_NEON
    275 #define HAS_I422TORAWROW_NEON
    276 #define HAS_I422TORGB24ROW_NEON
    277 #define HAS_I422TORGB565ROW_NEON
    278 #define HAS_I422TORGBAROW_NEON
    279 #define HAS_I422TOUYVYROW_NEON
    280 #define HAS_I422TOYUY2ROW_NEON
    281 #define HAS_I444TOARGBROW_NEON
    282 #define HAS_MERGEUVROW_NEON
    283 #define HAS_MIRRORROW_NEON
    284 #define HAS_MIRRORUVROW_NEON
    285 #define HAS_NV12TOARGBROW_NEON
    286 #define HAS_NV12TORGB565ROW_NEON
    287 #define HAS_NV21TOARGBROW_NEON
    288 #define HAS_NV21TORGB565ROW_NEON
    289 #define HAS_RAWTOARGBROW_NEON
    290 #define HAS_RAWTOUVROW_NEON
    291 #define HAS_RAWTOYROW_NEON
    292 #define HAS_RGB24TOARGBROW_NEON
    293 #define HAS_RGB24TOUVROW_NEON
    294 #define HAS_RGB24TOYROW_NEON
    295 #define HAS_RGB565TOARGBROW_NEON
    296 #define HAS_RGB565TOUVROW_NEON
    297 #define HAS_RGB565TOYROW_NEON
    298 #define HAS_RGBATOUVROW_NEON
    299 #define HAS_RGBATOYROW_NEON
    300 #define HAS_SETROW_NEON
    301 #define HAS_SPLITUVROW_NEON
    302 #define HAS_UYVYTOARGBROW_NEON
    303 #define HAS_UYVYTOUV422ROW_NEON
    304 #define HAS_UYVYTOUVROW_NEON
    305 #define HAS_UYVYTOYROW_NEON
    306 #define HAS_YTOARGBROW_NEON
    307 #define HAS_YUY2TOARGBROW_NEON
    308 #define HAS_YUY2TOUV422ROW_NEON
    309 #define HAS_YUY2TOUVROW_NEON
    310 #define HAS_YUY2TOYROW_NEON
    311 
    312 // Effects:
    313 #define HAS_ARGBADDROW_NEON
    314 #define HAS_ARGBATTENUATEROW_NEON
    315 #define HAS_ARGBBLENDROW_NEON
    316 #define HAS_ARGBGRAYROW_NEON
    317 #define HAS_ARGBMIRRORROW_NEON
    318 #define HAS_ARGBMULTIPLYROW_NEON
    319 #define HAS_ARGBQUANTIZEROW_NEON
    320 #define HAS_ARGBSEPIAROW_NEON
    321 #define HAS_ARGBSHADEROW_NEON
    322 #define HAS_ARGBSUBTRACTROW_NEON
    323 #define HAS_SOBELROW_NEON
    324 #define HAS_SOBELTOPLANEROW_NEON
    325 #define HAS_SOBELXYROW_NEON
    326 #define HAS_SOBELXROW_NEON
    327 #define HAS_SOBELYROW_NEON
    328 #define HAS_INTERPOLATEROW_NEON
    329 // TODO(fbarchard): Investigate neon unittest failure.
    330 // #define HAS_ARGBCOLORMATRIXROW_NEON
    331 #endif
    332 
    333 // The following are available on Mips platforms:
    334 #if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__)
    335 #define HAS_COPYROW_MIPS
    336 #if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
    337 #define HAS_I422TOABGRROW_MIPS_DSPR2
    338 #define HAS_I422TOARGBROW_MIPS_DSPR2
    339 #define HAS_I422TOBGRAROW_MIPS_DSPR2
    340 #define HAS_INTERPOLATEROWS_MIPS_DSPR2
    341 #define HAS_MIRRORROW_MIPS_DSPR2
    342 #define HAS_MIRRORUVROW_MIPS_DSPR2
    343 #define HAS_SPLITUVROW_MIPS_DSPR2
    344 #endif
    345 #endif
    346 
    347 #if defined(_MSC_VER) && !defined(__CLR_VER)
    348 #define SIMD_ALIGNED(var) __declspec(align(16)) var
    349 typedef __declspec(align(16)) int16 vec16[8];
    350 typedef __declspec(align(16)) int32 vec32[4];
    351 typedef __declspec(align(16)) int8 vec8[16];
    352 typedef __declspec(align(16)) uint16 uvec16[8];
    353 typedef __declspec(align(16)) uint32 uvec32[4];
    354 typedef __declspec(align(16)) uint8 uvec8[16];
    355 typedef __declspec(align(32)) int16 lvec16[16];
    356 typedef __declspec(align(32)) int32 lvec32[8];
    357 typedef __declspec(align(32)) int8 lvec8[32];
    358 typedef __declspec(align(32)) uint16 ulvec16[16];
    359 typedef __declspec(align(32)) uint32 ulvec32[8];
    360 typedef __declspec(align(32)) uint8 ulvec8[32];
    361 
    362 #elif defined(__GNUC__)
    363 // Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
    364 #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
    365 typedef int16 __attribute__((vector_size(16))) vec16;
    366 typedef int32 __attribute__((vector_size(16))) vec32;
    367 typedef int8 __attribute__((vector_size(16))) vec8;
    368 typedef uint16 __attribute__((vector_size(16))) uvec16;
    369 typedef uint32 __attribute__((vector_size(16))) uvec32;
    370 typedef uint8 __attribute__((vector_size(16))) uvec8;
    371 #else
    372 #define SIMD_ALIGNED(var) var
    373 typedef int16 vec16[8];
    374 typedef int32 vec32[4];
    375 typedef int8 vec8[16];
    376 typedef uint16 uvec16[8];
    377 typedef uint32 uvec32[4];
    378 typedef uint8 uvec8[16];
    379 #endif
    380 
    381 #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
    382 #define OMITFP
    383 #else
    384 #define OMITFP __attribute__((optimize("omit-frame-pointer")))
    385 #endif
    386 
    387 // NaCL macros for GCC x86 and x64.
    388 
    389 // TODO(nfullagar): When pepper_33 toolchain is distributed, default to
    390 // NEW_BINUTILS and remove all BUNDLEALIGN occurances.
    391 #if defined(__native_client__)
    392 #define LABELALIGN ".p2align 5\n"
    393 #else
    394 #define LABELALIGN ".p2align 2\n"
    395 #endif
    396 #if defined(__native_client__) && defined(__x86_64__)
    397 #if defined(NEW_BINUTILS)
    398 #define BUNDLELOCK ".bundle_lock\n"
    399 #define BUNDLEUNLOCK ".bundle_unlock\n"
    400 #define BUNDLEALIGN "\n"
    401 #else
    402 #define BUNDLELOCK "\n"
    403 #define BUNDLEUNLOCK "\n"
    404 #define BUNDLEALIGN ".p2align 5\n"
    405 #endif
    406 #define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")"
    407 #define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")"
    408 #define MEMLEA(offset, base) #offset "(%q" #base ")"
    409 #define MEMLEA3(offset, index, scale) \
    410     #offset "(,%q" #index "," #scale ")"
    411 #define MEMLEA4(offset, base, index, scale) \
    412     #offset "(%q" #base ",%q" #index "," #scale ")"
    413 #define MEMMOVESTRING(s, d) "%%nacl:(%q" #s "),%%nacl:(%q" #d "), %%r15"
    414 #define MEMSTORESTRING(reg, d) "%%" #reg ",%%nacl:(%q" #d "), %%r15"
    415 #define MEMOPREG(opcode, offset, base, index, scale, reg) \
    416     BUNDLELOCK \
    417     "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
    418     #opcode " (%%r15,%%r14),%%" #reg "\n" \
    419     BUNDLEUNLOCK
    420 #define MEMOPMEM(opcode, reg, offset, base, index, scale) \
    421     BUNDLELOCK \
    422     "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
    423     #opcode " %%" #reg ",(%%r15,%%r14)\n" \
    424     BUNDLEUNLOCK
    425 #define MEMOPARG(opcode, offset, base, index, scale, arg) \
    426     BUNDLELOCK \
    427     "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
    428     #opcode " (%%r15,%%r14),%" #arg "\n" \
    429     BUNDLEUNLOCK
    430 #else
    431 #define BUNDLEALIGN "\n"
    432 #define MEMACCESS(base) "(%" #base ")"
    433 #define MEMACCESS2(offset, base) #offset "(%" #base ")"
    434 #define MEMLEA(offset, base) #offset "(%" #base ")"
    435 #define MEMLEA3(offset, index, scale) \
    436     #offset "(,%" #index "," #scale ")"
    437 #define MEMLEA4(offset, base, index, scale) \
    438     #offset "(%" #base ",%" #index "," #scale ")"
    439 #define MEMMOVESTRING(s, d)
    440 #define MEMSTORESTRING(reg, d)
    441 #define MEMOPREG(opcode, offset, base, index, scale, reg) \
    442     #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n"
    443 #define MEMOPMEM(opcode, reg, offset, base, index, scale) \
    444     #opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
    445 #define MEMOPARG(opcode, offset, base, index, scale, arg) \
    446     #opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n"
    447 #endif
    448 
    449 void I444ToARGBRow_NEON(const uint8* src_y,
    450                         const uint8* src_u,
    451                         const uint8* src_v,
    452                         uint8* dst_argb,
    453                         int width);
    454 void I422ToARGBRow_NEON(const uint8* src_y,
    455                         const uint8* src_u,
    456                         const uint8* src_v,
    457                         uint8* dst_argb,
    458                         int width);
    459 void I411ToARGBRow_NEON(const uint8* src_y,
    460                         const uint8* src_u,
    461                         const uint8* src_v,
    462                         uint8* dst_argb,
    463                         int width);
    464 void I422ToBGRARow_NEON(const uint8* src_y,
    465                         const uint8* src_u,
    466                         const uint8* src_v,
    467                         uint8* dst_bgra,
    468                         int width);
    469 void I422ToABGRRow_NEON(const uint8* src_y,
    470                         const uint8* src_u,
    471                         const uint8* src_v,
    472                         uint8* dst_abgr,
    473                         int width);
    474 void I422ToRGBARow_NEON(const uint8* src_y,
    475                         const uint8* src_u,
    476                         const uint8* src_v,
    477                         uint8* dst_rgba,
    478                         int width);
    479 void I422ToRGB24Row_NEON(const uint8* src_y,
    480                          const uint8* src_u,
    481                          const uint8* src_v,
    482                          uint8* dst_rgb24,
    483                          int width);
    484 void I422ToRAWRow_NEON(const uint8* src_y,
    485                        const uint8* src_u,
    486                        const uint8* src_v,
    487                        uint8* dst_raw,
    488                        int width);
    489 void I422ToRGB565Row_NEON(const uint8* src_y,
    490                           const uint8* src_u,
    491                           const uint8* src_v,
    492                           uint8* dst_rgb565,
    493                           int width);
    494 void I422ToARGB1555Row_NEON(const uint8* src_y,
    495                             const uint8* src_u,
    496                             const uint8* src_v,
    497                             uint8* dst_argb1555,
    498                             int width);
    499 void I422ToARGB4444Row_NEON(const uint8* src_y,
    500                             const uint8* src_u,
    501                             const uint8* src_v,
    502                             uint8* dst_argb4444,
    503                             int width);
    504 void NV12ToARGBRow_NEON(const uint8* src_y,
    505                         const uint8* src_uv,
    506                         uint8* dst_argb,
    507                         int width);
    508 void NV21ToARGBRow_NEON(const uint8* src_y,
    509                         const uint8* src_vu,
    510                         uint8* dst_argb,
    511                         int width);
    512 void NV12ToRGB565Row_NEON(const uint8* src_y,
    513                           const uint8* src_uv,
    514                           uint8* dst_rgb565,
    515                           int width);
    516 void NV21ToRGB565Row_NEON(const uint8* src_y,
    517                           const uint8* src_vu,
    518                           uint8* dst_rgb565,
    519                           int width);
    520 void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
    521                         uint8* dst_argb,
    522                         int width);
    523 void UYVYToARGBRow_NEON(const uint8* src_uyvy,
    524                         uint8* dst_argb,
    525                         int width);
    526 
    527 void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
    528 void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
    529 void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
    530 void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
    531 void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int pix);
    532 void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
    533 void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
    534 void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
    535 void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
    536 void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
    537 void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
    538 void ARGBToYRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
    539 void ARGBToYJRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
    540 void BGRAToYRow_Unaligned_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
    541 void ABGRToYRow_Unaligned_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
    542 void RGBAToYRow_Unaligned_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
    543 void RGB24ToYRow_Unaligned_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
    544 void RAWToYRow_Unaligned_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
    545 void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
    546 void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int pix);
    547 void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
    548                          int pix);
    549 void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
    550                          int pix);
    551 void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
    552                          int pix);
    553 void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
    554                       uint8* dst_u, uint8* dst_v, int pix);
    555 void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
    556                        uint8* dst_u, uint8* dst_v, int pix);
    557 void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
    558                       uint8* dst_u, uint8* dst_v, int pix);
    559 void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
    560                       uint8* dst_u, uint8* dst_v, int pix);
    561 void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
    562                       uint8* dst_u, uint8* dst_v, int pix);
    563 void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
    564                        uint8* dst_u, uint8* dst_v, int pix);
    565 void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
    566                      uint8* dst_u, uint8* dst_v, int pix);
    567 void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
    568                         uint8* dst_u, uint8* dst_v, int pix);
    569 void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
    570                           uint8* dst_u, uint8* dst_v, int pix);
    571 void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
    572                           uint8* dst_u, uint8* dst_v, int pix);
    573 void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
    574 void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
    575 void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
    576 void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
    577 void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int pix);
    578 void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
    579 void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int pix);
    580 void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int pix);
    581 void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int pix);
    582 void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int pix);
    583 void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int pix);
    584 void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int pix);
    585 void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int pix);
    586 void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int pix);
    587 void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int pix);
    588 void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int pix);
    589 void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int pix);
    590 void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int pix);
    591 void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
    592 void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int pix);
    593 void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int pix);
    594 void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int pix);
    595 void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int pix);
    596 void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int pix);
    597 void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int pix);
    598 void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
    599 void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int pix);
    600 void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int pix);
    601 void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int pix);
    602 void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int pix);
    603 void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int pix);
    604 void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int pix);
    605 void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int pix);
    606 void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y, int pix);
    607 void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y, int pix);
    608 
    609 void ARGBToUVRow_AVX2(const uint8* src_argb, int src_stride_argb,
    610                       uint8* dst_u, uint8* dst_v, int width);
    611 void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb,
    612                           uint8* dst_u, uint8* dst_v, int width);
    613 void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
    614                        uint8* dst_u, uint8* dst_v, int width);
    615 void ARGBToUVJRow_SSSE3(const uint8* src_argb, int src_stride_argb,
    616                         uint8* dst_u, uint8* dst_v, int width);
    617 void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra,
    618                        uint8* dst_u, uint8* dst_v, int width);
    619 void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr,
    620                        uint8* dst_u, uint8* dst_v, int width);
    621 void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba,
    622                        uint8* dst_u, uint8* dst_v, int width);
    623 void ARGBToUVRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
    624                                  uint8* dst_u, uint8* dst_v, int width);
    625 void ARGBToUVJRow_Unaligned_SSSE3(const uint8* src_argb, int src_stride_argb,
    626                                   uint8* dst_u, uint8* dst_v, int width);
    627 void BGRAToUVRow_Unaligned_SSSE3(const uint8* src_bgra, int src_stride_bgra,
    628                                  uint8* dst_u, uint8* dst_v, int width);
    629 void ABGRToUVRow_Unaligned_SSSE3(const uint8* src_abgr, int src_stride_abgr,
    630                                  uint8* dst_u, uint8* dst_v, int width);
    631 void RGBAToUVRow_Unaligned_SSSE3(const uint8* src_rgba, int src_stride_rgba,
    632                                  uint8* dst_u, uint8* dst_v, int width);
    633 void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
    634                            uint8* dst_u, uint8* dst_v, int width);
    635 void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
    636                             uint8* dst_u, uint8* dst_v, int width);
    637 void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra,
    638                            uint8* dst_u, uint8* dst_v, int width);
    639 void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr,
    640                            uint8* dst_u, uint8* dst_v, int width);
    641 void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba,
    642                            uint8* dst_u, uint8* dst_v, int width);
    643 void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
    644                              int pix);
    645 void ARGBToUV422Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
    646                              int pix);
    647 void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
    648                              int pix);
    649 void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
    650                           uint8* dst_u, uint8* dst_v, int pix);
    651 void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
    652                            uint8* dst_u, uint8* dst_v, int pix);
    653 void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra,
    654                           uint8* dst_u, uint8* dst_v, int pix);
    655 void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr,
    656                           uint8* dst_u, uint8* dst_v, int pix);
    657 void RGBAToUVRow_Any_NEON(const uint8* src_rgba, int src_stride_rgba,
    658                           uint8* dst_u, uint8* dst_v, int pix);
    659 void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24, int src_stride_rgb24,
    660                            uint8* dst_u, uint8* dst_v, int pix);
    661 void RAWToUVRow_Any_NEON(const uint8* src_raw, int src_stride_raw,
    662                          uint8* dst_u, uint8* dst_v, int pix);
    663 void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565,
    664                             uint8* dst_u, uint8* dst_v, int pix);
    665 void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555,
    666                               int src_stride_argb1555,
    667                               uint8* dst_u, uint8* dst_v, int pix);
    668 void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
    669                               int src_stride_argb4444,
    670                               uint8* dst_u, uint8* dst_v, int pix);
    671 void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb,
    672                    uint8* dst_u, uint8* dst_v, int width);
    673 void ARGBToUVJRow_C(const uint8* src_argb, int src_stride_argb,
    674                     uint8* dst_u, uint8* dst_v, int width);
    675 void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra,
    676                    uint8* dst_u, uint8* dst_v, int width);
    677 void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr,
    678                    uint8* dst_u, uint8* dst_v, int width);
    679 void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba,
    680                    uint8* dst_u, uint8* dst_v, int width);
    681 void RGB24ToUVRow_C(const uint8* src_rgb24, int src_stride_rgb24,
    682                     uint8* dst_u, uint8* dst_v, int width);
    683 void RAWToUVRow_C(const uint8* src_raw, int src_stride_raw,
    684                   uint8* dst_u, uint8* dst_v, int width);
    685 void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
    686                      uint8* dst_u, uint8* dst_v, int width);
    687 void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
    688                        uint8* dst_u, uint8* dst_v, int width);
    689 void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
    690                        uint8* dst_u, uint8* dst_v, int width);
    691 
    692 void ARGBToUV444Row_SSSE3(const uint8* src_argb,
    693                           uint8* dst_u, uint8* dst_v, int width);
    694 void ARGBToUV444Row_Unaligned_SSSE3(const uint8* src_argb,
    695                                     uint8* dst_u, uint8* dst_v, int width);
    696 void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb,
    697                               uint8* dst_u, uint8* dst_v, int width);
    698 
    699 void ARGBToUV422Row_SSSE3(const uint8* src_argb,
    700                           uint8* dst_u, uint8* dst_v, int width);
    701 void ARGBToUV422Row_Unaligned_SSSE3(const uint8* src_argb,
    702                                     uint8* dst_u, uint8* dst_v, int width);
    703 void ARGBToUV422Row_Any_SSSE3(const uint8* src_argb,
    704                               uint8* dst_u, uint8* dst_v, int width);
    705 
    706 void ARGBToUV444Row_C(const uint8* src_argb,
    707                       uint8* dst_u, uint8* dst_v, int width);
    708 void ARGBToUV422Row_C(const uint8* src_argb,
    709                       uint8* dst_u, uint8* dst_v, int width);
    710 void ARGBToUV411Row_C(const uint8* src_argb,
    711                       uint8* dst_u, uint8* dst_v, int width);
    712 
    713 void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
    714 void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
    715 void MirrorRow_SSE2(const uint8* src, uint8* dst, int width);
    716 void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
    717 void MirrorRow_MIPS_DSPR2(const uint8* src, uint8* dst, int width);
    718 void MirrorRow_C(const uint8* src, uint8* dst, int width);
    719 
    720 void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    721                        int width);
    722 void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    723                       int width);
    724 void MirrorUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    725                             int width);
    726 void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    727                    int width);
    728 
    729 void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width);
    730 void ARGBMirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
    731 void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width);
    732 void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width);
    733 
    734 void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
    735 void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
    736 void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
    737 void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix);
    738 void SplitUVRow_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    739                            int pix);
    740 void SplitUVRow_Unaligned_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    741                                int pix);
    742 void SplitUVRow_Unaligned_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u,
    743                                      uint8* dst_v, int pix);
    744 void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    745                          int pix);
    746 void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    747                          int pix);
    748 void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    749                          int pix);
    750 void SplitUVRow_Any_MIPS_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    751                                int pix);
    752 
    753 void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    754                   int width);
    755 void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    756                      int width);
    757 void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    758                      int width);
    759 void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    760                      int width);
    761 void MergeUVRow_Unaligned_SSE2(const uint8* src_u, const uint8* src_v,
    762                                uint8* dst_uv, int width);
    763 void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    764                          int width);
    765 void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    766                          int width);
    767 void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    768                          int width);
    769 
    770 void CopyRow_SSE2(const uint8* src, uint8* dst, int count);
    771 void CopyRow_ERMS(const uint8* src, uint8* dst, int count);
    772 void CopyRow_X86(const uint8* src, uint8* dst, int count);
    773 void CopyRow_NEON(const uint8* src, uint8* dst, int count);
    774 void CopyRow_MIPS(const uint8* src, uint8* dst, int count);
    775 void CopyRow_C(const uint8* src, uint8* dst, int count);
    776 
    777 void CopyRow_16_C(const uint16* src, uint16* dst, int count);
    778 
    779 void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width);
    780 void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
    781 void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
    782 
    783 void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width);
    784 void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
    785 void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
    786 
    787 void SetRow_X86(uint8* dst, uint32 v32, int count);
    788 void ARGBSetRows_X86(uint8* dst, uint32 v32, int width,
    789                      int dst_stride, int height);
    790 void SetRow_NEON(uint8* dst, uint32 v32, int count);
    791 void ARGBSetRows_NEON(uint8* dst, uint32 v32, int width,
    792                       int dst_stride, int height);
    793 void SetRow_C(uint8* dst, uint32 v32, int count);
    794 void ARGBSetRows_C(uint8* dst, uint32 v32, int width, int dst_stride,
    795                    int height);
    796 
    797 // ARGBShufflers for BGRAToARGB etc.
    798 void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
    799                       const uint8* shuffler, int pix);
    800 void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
    801                          const uint8* shuffler, int pix);
    802 void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
    803                           const uint8* shuffler, int pix);
    804 void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
    805                          const uint8* shuffler, int pix);
    806 void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
    807                          const uint8* shuffler, int pix);
    808 void ARGBShuffleRow_Unaligned_SSSE3(const uint8* src_argb, uint8* dst_argb,
    809                                     const uint8* shuffler, int pix);
    810 void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
    811                              const uint8* shuffler, int pix);
    812 void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
    813                               const uint8* shuffler, int pix);
    814 void ARGBShuffleRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
    815                              const uint8* shuffler, int pix);
    816 void ARGBShuffleRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
    817                              const uint8* shuffler, int pix);
    818 
    819 void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
    820 void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
    821 void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int pix);
    822 void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
    823                             int pix);
    824 void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
    825                             int pix);
    826 
    827 void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
    828 void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
    829 void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int pix);
    830 void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
    831                             int pix);
    832 void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
    833                             int pix);
    834 void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int pix);
    835 void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int pix);
    836 void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int pix);
    837 void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
    838 void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int pix);
    839 void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int pix);
    840 void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int pix);
    841 void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb,
    842                               int pix);
    843 void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb,
    844                                 int pix);
    845 void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb,
    846                                 int pix);
    847 void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb, int pix);
    848 void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int pix);
    849 void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb,
    850                               int pix);
    851 void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb,
    852                                 int pix);
    853 void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb,
    854                                 int pix);
    855 
    856 void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
    857 void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
    858 void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
    859 void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
    860 void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
    861 
    862 void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
    863 void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
    864 void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
    865 void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
    866 void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
    867 
    868 void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
    869 void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
    870 void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int pix);
    871 void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
    872 void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
    873 void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int pix);
    874 
    875 void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
    876 void I400ToARGBRow_Unaligned_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
    877 void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int pix);
    878 void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int pix);
    879 void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int pix);
    880 void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int pix);
    881 
    882 void I444ToARGBRow_C(const uint8* src_y,
    883                      const uint8* src_u,
    884                      const uint8* src_v,
    885                      uint8* dst_argb,
    886                      int width);
    887 void I422ToARGBRow_C(const uint8* src_y,
    888                      const uint8* src_u,
    889                      const uint8* src_v,
    890                      uint8* dst_argb,
    891                      int width);
    892 void I411ToARGBRow_C(const uint8* src_y,
    893                      const uint8* src_u,
    894                      const uint8* src_v,
    895                      uint8* dst_argb,
    896                      int width);
    897 void NV12ToARGBRow_C(const uint8* src_y,
    898                      const uint8* src_uv,
    899                      uint8* dst_argb,
    900                      int width);
    901 void NV21ToRGB565Row_C(const uint8* src_y,
    902                        const uint8* src_vu,
    903                        uint8* dst_argb,
    904                        int width);
    905 void NV12ToRGB565Row_C(const uint8* src_y,
    906                        const uint8* src_uv,
    907                        uint8* dst_argb,
    908                        int width);
    909 void NV21ToARGBRow_C(const uint8* src_y,
    910                      const uint8* src_vu,
    911                      uint8* dst_argb,
    912                      int width);
    913 void YUY2ToARGBRow_C(const uint8* src_yuy2,
    914                      uint8* dst_argb,
    915                      int width);
    916 void UYVYToARGBRow_C(const uint8* src_uyvy,
    917                      uint8* dst_argb,
    918                      int width);
    919 void I422ToBGRARow_C(const uint8* src_y,
    920                      const uint8* src_u,
    921                      const uint8* src_v,
    922                      uint8* dst_bgra,
    923                      int width);
    924 void I422ToABGRRow_C(const uint8* src_y,
    925                      const uint8* src_u,
    926                      const uint8* src_v,
    927                      uint8* dst_abgr,
    928                      int width);
    929 void I422ToRGBARow_C(const uint8* src_y,
    930                      const uint8* src_u,
    931                      const uint8* src_v,
    932                      uint8* dst_rgba,
    933                      int width);
    934 void I422ToRGB24Row_C(const uint8* src_y,
    935                       const uint8* src_u,
    936                       const uint8* src_v,
    937                       uint8* dst_rgb24,
    938                       int width);
    939 void I422ToRAWRow_C(const uint8* src_y,
    940                     const uint8* src_u,
    941                     const uint8* src_v,
    942                     uint8* dst_raw,
    943                     int width);
    944 void I422ToARGB4444Row_C(const uint8* src_y,
    945                          const uint8* src_u,
    946                          const uint8* src_v,
    947                          uint8* dst_argb4444,
    948                          int width);
    949 void I422ToARGB1555Row_C(const uint8* src_y,
    950                          const uint8* src_u,
    951                          const uint8* src_v,
    952                          uint8* dst_argb4444,
    953                          int width);
    954 void I422ToRGB565Row_C(const uint8* src_y,
    955                        const uint8* src_u,
    956                        const uint8* src_v,
    957                        uint8* dst_rgb565,
    958                        int width);
    959 void YToARGBRow_C(const uint8* src_y,
    960                   uint8* dst_argb,
    961                   int width);
    962 void I422ToARGBRow_AVX2(const uint8* src_y,
    963                         const uint8* src_u,
    964                         const uint8* src_v,
    965                         uint8* dst_argb,
    966                         int width);
    967 void I444ToARGBRow_SSSE3(const uint8* src_y,
    968                          const uint8* src_u,
    969                          const uint8* src_v,
    970                          uint8* dst_argb,
    971                          int width);
    972 void I422ToARGBRow_SSSE3(const uint8* src_y,
    973                          const uint8* src_u,
    974                          const uint8* src_v,
    975                          uint8* dst_argb,
    976                          int width);
    977 void I411ToARGBRow_SSSE3(const uint8* src_y,
    978                          const uint8* src_u,
    979                          const uint8* src_v,
    980                          uint8* dst_argb,
    981                          int width);
    982 void NV12ToARGBRow_SSSE3(const uint8* src_y,
    983                          const uint8* src_uv,
    984                          uint8* dst_argb,
    985                          int width);
    986 void NV21ToARGBRow_SSSE3(const uint8* src_y,
    987                          const uint8* src_vu,
    988                          uint8* dst_argb,
    989                          int width);
    990 void NV12ToRGB565Row_SSSE3(const uint8* src_y,
    991                            const uint8* src_uv,
    992                            uint8* dst_argb,
    993                            int width);
    994 void NV21ToRGB565Row_SSSE3(const uint8* src_y,
    995                            const uint8* src_vu,
    996                            uint8* dst_argb,
    997                            int width);
    998 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
    999                          uint8* dst_argb,
   1000                          int width);
   1001 void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
   1002                          uint8* dst_argb,
   1003                          int width);
   1004 void I422ToBGRARow_SSSE3(const uint8* src_y,
   1005                          const uint8* src_u,
   1006                          const uint8* src_v,
   1007                          uint8* dst_bgra,
   1008                          int width);
   1009 void I422ToABGRRow_SSSE3(const uint8* src_y,
   1010                          const uint8* src_u,
   1011                          const uint8* src_v,
   1012                          uint8* dst_abgr,
   1013                          int width);
   1014 void I422ToRGBARow_SSSE3(const uint8* src_y,
   1015                          const uint8* src_u,
   1016                          const uint8* src_v,
   1017                          uint8* dst_rgba,
   1018                          int width);
   1019 void I422ToARGB4444Row_SSSE3(const uint8* src_y,
   1020                              const uint8* src_u,
   1021                              const uint8* src_v,
   1022                              uint8* dst_argb,
   1023                              int width);
   1024 void I422ToARGB1555Row_SSSE3(const uint8* src_y,
   1025                              const uint8* src_u,
   1026                              const uint8* src_v,
   1027                              uint8* dst_argb,
   1028                              int width);
   1029 void I422ToRGB565Row_SSSE3(const uint8* src_y,
   1030                            const uint8* src_u,
   1031                            const uint8* src_v,
   1032                            uint8* dst_argb,
   1033                            int width);
   1034 // RGB24/RAW are unaligned.
   1035 void I422ToRGB24Row_SSSE3(const uint8* src_y,
   1036                           const uint8* src_u,
   1037                           const uint8* src_v,
   1038                           uint8* dst_rgb24,
   1039                           int width);
   1040 void I422ToRAWRow_SSSE3(const uint8* src_y,
   1041                         const uint8* src_u,
   1042                         const uint8* src_v,
   1043                         uint8* dst_raw,
   1044                         int width);
   1045 
   1046 void I444ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
   1047                                    const uint8* src_u,
   1048                                    const uint8* src_v,
   1049                                    uint8* dst_argb,
   1050                                    int width);
   1051 void I422ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
   1052                                    const uint8* src_u,
   1053                                    const uint8* src_v,
   1054                                    uint8* dst_argb,
   1055                                    int width);
   1056 void I411ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
   1057                                    const uint8* src_u,
   1058                                    const uint8* src_v,
   1059                                    uint8* dst_argb,
   1060                                    int width);
   1061 void NV12ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
   1062                                    const uint8* src_uv,
   1063                                    uint8* dst_argb,
   1064                                    int width);
   1065 void NV21ToARGBRow_Unaligned_SSSE3(const uint8* src_y,
   1066                                    const uint8* src_vu,
   1067                                    uint8* dst_argb,
   1068                                    int width);
   1069 void YUY2ToARGBRow_Unaligned_SSSE3(const uint8* src_yuy2,
   1070                                    uint8* dst_argb,
   1071                                    int width);
   1072 void UYVYToARGBRow_Unaligned_SSSE3(const uint8* src_uyvy,
   1073                                    uint8* dst_argb,
   1074                                    int width);
   1075 void I422ToBGRARow_Unaligned_SSSE3(const uint8* src_y,
   1076                                    const uint8* src_u,
   1077                                    const uint8* src_v,
   1078                                    uint8* dst_bgra,
   1079                                    int width);
   1080 void I422ToABGRRow_Unaligned_SSSE3(const uint8* src_y,
   1081                                    const uint8* src_u,
   1082                                    const uint8* src_v,
   1083                                    uint8* dst_abgr,
   1084                                    int width);
   1085 void I422ToRGBARow_Unaligned_SSSE3(const uint8* src_y,
   1086                                    const uint8* src_u,
   1087                                    const uint8* src_v,
   1088                                    uint8* dst_rgba,
   1089                                    int width);
   1090 void I422ToARGBRow_Any_AVX2(const uint8* src_y,
   1091                             const uint8* src_u,
   1092                             const uint8* src_v,
   1093                             uint8* dst_argb,
   1094                             int width);
   1095 void I444ToARGBRow_Any_SSSE3(const uint8* src_y,
   1096                              const uint8* src_u,
   1097                              const uint8* src_v,
   1098                              uint8* dst_argb,
   1099                              int width);
   1100 void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
   1101                              const uint8* src_u,
   1102                              const uint8* src_v,
   1103                              uint8* dst_argb,
   1104                              int width);
   1105 void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
   1106                              const uint8* src_u,
   1107                              const uint8* src_v,
   1108                              uint8* dst_argb,
   1109                              int width);
   1110 void NV12ToARGBRow_Any_SSSE3(const uint8* src_y,
   1111                              const uint8* src_uv,
   1112                              uint8* dst_argb,
   1113                              int width);
   1114 void NV21ToARGBRow_Any_SSSE3(const uint8* src_y,
   1115                              const uint8* src_vu,
   1116                              uint8* dst_argb,
   1117                              int width);
   1118 void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y,
   1119                                const uint8* src_uv,
   1120                                uint8* dst_argb,
   1121                                int width);
   1122 void NV21ToRGB565Row_Any_SSSE3(const uint8* src_y,
   1123                                const uint8* src_vu,
   1124                                uint8* dst_argb,
   1125                                int width);
   1126 void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2,
   1127                              uint8* dst_argb,
   1128                              int width);
   1129 void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy,
   1130                              uint8* dst_argb,
   1131                              int width);
   1132 void I422ToBGRARow_Any_SSSE3(const uint8* src_y,
   1133                              const uint8* src_u,
   1134                              const uint8* src_v,
   1135                              uint8* dst_bgra,
   1136                              int width);
   1137 void I422ToABGRRow_Any_SSSE3(const uint8* src_y,
   1138                              const uint8* src_u,
   1139                              const uint8* src_v,
   1140                              uint8* dst_abgr,
   1141                              int width);
   1142 void I422ToRGBARow_Any_SSSE3(const uint8* src_y,
   1143                              const uint8* src_u,
   1144                              const uint8* src_v,
   1145                              uint8* dst_rgba,
   1146                              int width);
   1147 void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y,
   1148                                  const uint8* src_u,
   1149                                  const uint8* src_v,
   1150                                  uint8* dst_rgba,
   1151                                  int width);
   1152 void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y,
   1153                                  const uint8* src_u,
   1154                                  const uint8* src_v,
   1155                                  uint8* dst_rgba,
   1156                                  int width);
   1157 void I422ToRGB565Row_Any_SSSE3(const uint8* src_y,
   1158                                const uint8* src_u,
   1159                                const uint8* src_v,
   1160                                uint8* dst_rgba,
   1161                                int width);
   1162 // RGB24/RAW are unaligned.
   1163 void I422ToRGB24Row_Any_SSSE3(const uint8* src_y,
   1164                               const uint8* src_u,
   1165                               const uint8* src_v,
   1166                               uint8* dst_argb,
   1167                               int width);
   1168 void I422ToRAWRow_Any_SSSE3(const uint8* src_y,
   1169                             const uint8* src_u,
   1170                             const uint8* src_v,
   1171                             uint8* dst_argb,
   1172                             int width);
   1173 void YToARGBRow_SSE2(const uint8* src_y,
   1174                      uint8* dst_argb,
   1175                      int width);
   1176 void YToARGBRow_NEON(const uint8* src_y,
   1177                      uint8* dst_argb,
   1178                      int width);
   1179 void YToARGBRow_Any_SSE2(const uint8* src_y,
   1180                          uint8* dst_argb,
   1181                          int width);
   1182 void YToARGBRow_Any_NEON(const uint8* src_y,
   1183                          uint8* dst_argb,
   1184                          int width);
   1185 
   1186 // ARGB preattenuated alpha blend.
   1187 void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1,
   1188                         uint8* dst_argb, int width);
   1189 void ARGBBlendRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1190                        uint8* dst_argb, int width);
   1191 void ARGBBlendRow_NEON(const uint8* src_argb, const uint8* src_argb1,
   1192                        uint8* dst_argb, int width);
   1193 void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1,
   1194                     uint8* dst_argb, int width);
   1195 
   1196 // ARGB multiply images. Same API as Blend, but these require
   1197 // pointer and width alignment for SSE2.
   1198 void ARGBMultiplyRow_C(const uint8* src_argb, const uint8* src_argb1,
   1199                        uint8* dst_argb, int width);
   1200 void ARGBMultiplyRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1201                           uint8* dst_argb, int width);
   1202 void ARGBMultiplyRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1203                               uint8* dst_argb, int width);
   1204 void ARGBMultiplyRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1205                           uint8* dst_argb, int width);
   1206 void ARGBMultiplyRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1207                               uint8* dst_argb, int width);
   1208 void ARGBMultiplyRow_NEON(const uint8* src_argb, const uint8* src_argb1,
   1209                           uint8* dst_argb, int width);
   1210 void ARGBMultiplyRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
   1211                               uint8* dst_argb, int width);
   1212 
   1213 // ARGB add images.
   1214 void ARGBAddRow_C(const uint8* src_argb, const uint8* src_argb1,
   1215                   uint8* dst_argb, int width);
   1216 void ARGBAddRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1217                      uint8* dst_argb, int width);
   1218 void ARGBAddRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1219                          uint8* dst_argb, int width);
   1220 void ARGBAddRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1221                      uint8* dst_argb, int width);
   1222 void ARGBAddRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1223                          uint8* dst_argb, int width);
   1224 void ARGBAddRow_NEON(const uint8* src_argb, const uint8* src_argb1,
   1225                      uint8* dst_argb, int width);
   1226 void ARGBAddRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
   1227                          uint8* dst_argb, int width);
   1228 
   1229 // ARGB subtract images. Same API as Blend, but these require
   1230 // pointer and width alignment for SSE2.
   1231 void ARGBSubtractRow_C(const uint8* src_argb, const uint8* src_argb1,
   1232                        uint8* dst_argb, int width);
   1233 void ARGBSubtractRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1234                           uint8* dst_argb, int width);
   1235 void ARGBSubtractRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1236                               uint8* dst_argb, int width);
   1237 void ARGBSubtractRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1238                           uint8* dst_argb, int width);
   1239 void ARGBSubtractRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1240                               uint8* dst_argb, int width);
   1241 void ARGBSubtractRow_NEON(const uint8* src_argb, const uint8* src_argb1,
   1242                           uint8* dst_argb, int width);
   1243 void ARGBSubtractRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
   1244                               uint8* dst_argb, int width);
   1245 
   1246 void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
   1247 void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int pix);
   1248 void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
   1249 void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
   1250 void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int pix);
   1251 
   1252 void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
   1253 void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
   1254 void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
   1255 void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
   1256 void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int pix);
   1257 
   1258 void I444ToARGBRow_Any_NEON(const uint8* src_y,
   1259                             const uint8* src_u,
   1260                             const uint8* src_v,
   1261                             uint8* dst_argb,
   1262                             int width);
   1263 void I422ToARGBRow_Any_NEON(const uint8* src_y,
   1264                             const uint8* src_u,
   1265                             const uint8* src_v,
   1266                             uint8* dst_argb,
   1267                             int width);
   1268 void I411ToARGBRow_Any_NEON(const uint8* src_y,
   1269                             const uint8* src_u,
   1270                             const uint8* src_v,
   1271                             uint8* dst_argb,
   1272                             int width);
   1273 void I422ToBGRARow_Any_NEON(const uint8* src_y,
   1274                             const uint8* src_u,
   1275                             const uint8* src_v,
   1276                             uint8* dst_argb,
   1277                             int width);
   1278 void I422ToABGRRow_Any_NEON(const uint8* src_y,
   1279                             const uint8* src_u,
   1280                             const uint8* src_v,
   1281                             uint8* dst_argb,
   1282                             int width);
   1283 void I422ToRGBARow_Any_NEON(const uint8* src_y,
   1284                             const uint8* src_u,
   1285                             const uint8* src_v,
   1286                             uint8* dst_argb,
   1287                             int width);
   1288 void I422ToRGB24Row_Any_NEON(const uint8* src_y,
   1289                              const uint8* src_u,
   1290                              const uint8* src_v,
   1291                              uint8* dst_argb,
   1292                              int width);
   1293 void I422ToRAWRow_Any_NEON(const uint8* src_y,
   1294                            const uint8* src_u,
   1295                            const uint8* src_v,
   1296                            uint8* dst_argb,
   1297                            int width);
   1298 void I422ToARGB4444Row_Any_NEON(const uint8* src_y,
   1299                                 const uint8* src_u,
   1300                                 const uint8* src_v,
   1301                                 uint8* dst_argb,
   1302                                 int width);
   1303 void I422ToARGB1555Row_Any_NEON(const uint8* src_y,
   1304                                 const uint8* src_u,
   1305                                 const uint8* src_v,
   1306                                 uint8* dst_argb,
   1307                                 int width);
   1308 void I422ToRGB565Row_Any_NEON(const uint8* src_y,
   1309                               const uint8* src_u,
   1310                               const uint8* src_v,
   1311                               uint8* dst_argb,
   1312                               int width);
   1313 void NV12ToARGBRow_Any_NEON(const uint8* src_y,
   1314                             const uint8* src_uv,
   1315                             uint8* dst_argb,
   1316                             int width);
   1317 void NV21ToARGBRow_Any_NEON(const uint8* src_y,
   1318                             const uint8* src_uv,
   1319                             uint8* dst_argb,
   1320                             int width);
   1321 void NV12ToRGB565Row_Any_NEON(const uint8* src_y,
   1322                               const uint8* src_uv,
   1323                               uint8* dst_argb,
   1324                               int width);
   1325 void NV21ToRGB565Row_Any_NEON(const uint8* src_y,
   1326                               const uint8* src_uv,
   1327                               uint8* dst_argb,
   1328                               int width);
   1329 void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2,
   1330                             uint8* dst_argb,
   1331                             int width);
   1332 void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
   1333                             uint8* dst_argb,
   1334                             int width);
   1335 void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
   1336                               const uint8* src_u,
   1337                               const uint8* src_v,
   1338                               uint8* dst_argb,
   1339                               int width);
   1340 void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
   1341                               const uint8* src_u,
   1342                               const uint8* src_v,
   1343                               uint8* dst_argb,
   1344                               int width);
   1345 void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
   1346                               const uint8* src_u,
   1347                               const uint8* src_v,
   1348                               uint8* dst_argb,
   1349                               int width);
   1350 void I422ToARGBRow_MIPS_DSPR2(const uint8* src_y,
   1351                               const uint8* src_u,
   1352                               const uint8* src_v,
   1353                               uint8* dst_argb,
   1354                               int width);
   1355 void I422ToBGRARow_MIPS_DSPR2(const uint8* src_y,
   1356                               const uint8* src_u,
   1357                               const uint8* src_v,
   1358                               uint8* dst_argb,
   1359                               int width);
   1360 void I422ToABGRRow_MIPS_DSPR2(const uint8* src_y,
   1361                               const uint8* src_u,
   1362                               const uint8* src_v,
   1363                               uint8* dst_argb,
   1364                               int width);
   1365 
   1366 void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
   1367 void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
   1368                       uint8* dst_u, uint8* dst_v, int pix);
   1369 void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
   1370                          uint8* dst_u, uint8* dst_v, int pix);
   1371 void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
   1372 void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
   1373                       uint8* dst_u, uint8* dst_v, int pix);
   1374 void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
   1375                          uint8* dst_u, uint8* dst_v, int pix);
   1376 void YUY2ToYRow_Unaligned_SSE2(const uint8* src_yuy2,
   1377                                uint8* dst_y, int pix);
   1378 void YUY2ToUVRow_Unaligned_SSE2(const uint8* src_yuy2, int stride_yuy2,
   1379                                 uint8* dst_u, uint8* dst_v, int pix);
   1380 void YUY2ToUV422Row_Unaligned_SSE2(const uint8* src_yuy2,
   1381                                    uint8* dst_u, uint8* dst_v, int pix);
   1382 void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
   1383 void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
   1384                       uint8* dst_u, uint8* dst_v, int pix);
   1385 void YUY2ToUV422Row_NEON(const uint8* src_yuy2,
   1386                          uint8* dst_u, uint8* dst_v, int pix);
   1387 void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int pix);
   1388 void YUY2ToUVRow_C(const uint8* src_yuy2, int stride_yuy2,
   1389                    uint8* dst_u, uint8* dst_v, int pix);
   1390 void YUY2ToUV422Row_C(const uint8* src_yuy2,
   1391                       uint8* dst_u, uint8* dst_v, int pix);
   1392 void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix);
   1393 void YUY2ToUVRow_Any_AVX2(const uint8* src_yuy2, int stride_yuy2,
   1394                           uint8* dst_u, uint8* dst_v, int pix);
   1395 void YUY2ToUV422Row_Any_AVX2(const uint8* src_yuy2,
   1396                              uint8* dst_u, uint8* dst_v, int pix);
   1397 void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int pix);
   1398 void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int stride_yuy2,
   1399                           uint8* dst_u, uint8* dst_v, int pix);
   1400 void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2,
   1401                              uint8* dst_u, uint8* dst_v, int pix);
   1402 void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int pix);
   1403 void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2, int stride_yuy2,
   1404                           uint8* dst_u, uint8* dst_v, int pix);
   1405 void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2,
   1406                              uint8* dst_u, uint8* dst_v, int pix);
   1407 void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
   1408 void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
   1409                       uint8* dst_u, uint8* dst_v, int pix);
   1410 void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
   1411                          uint8* dst_u, uint8* dst_v, int pix);
   1412 void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
   1413 void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
   1414                       uint8* dst_u, uint8* dst_v, int pix);
   1415 void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
   1416                          uint8* dst_u, uint8* dst_v, int pix);
   1417 void UYVYToYRow_Unaligned_SSE2(const uint8* src_uyvy,
   1418                                uint8* dst_y, int pix);
   1419 void UYVYToUVRow_Unaligned_SSE2(const uint8* src_uyvy, int stride_uyvy,
   1420                                 uint8* dst_u, uint8* dst_v, int pix);
   1421 void UYVYToUV422Row_Unaligned_SSE2(const uint8* src_uyvy,
   1422                                    uint8* dst_u, uint8* dst_v, int pix);
   1423 void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
   1424 void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
   1425                       uint8* dst_u, uint8* dst_v, int pix);
   1426 void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
   1427                          uint8* dst_u, uint8* dst_v, int pix);
   1428 void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
   1429 void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
   1430                       uint8* dst_u, uint8* dst_v, int pix);
   1431 void UYVYToUV422Row_NEON(const uint8* src_uyvy,
   1432                          uint8* dst_u, uint8* dst_v, int pix);
   1433 
   1434 void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int pix);
   1435 void UYVYToUVRow_C(const uint8* src_uyvy, int stride_uyvy,
   1436                    uint8* dst_u, uint8* dst_v, int pix);
   1437 void UYVYToUV422Row_C(const uint8* src_uyvy,
   1438                       uint8* dst_u, uint8* dst_v, int pix);
   1439 void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int pix);
   1440 void UYVYToUVRow_Any_AVX2(const uint8* src_uyvy, int stride_uyvy,
   1441                           uint8* dst_u, uint8* dst_v, int pix);
   1442 void UYVYToUV422Row_Any_AVX2(const uint8* src_uyvy,
   1443                              uint8* dst_u, uint8* dst_v, int pix);
   1444 void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int pix);
   1445 void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int stride_uyvy,
   1446                           uint8* dst_u, uint8* dst_v, int pix);
   1447 void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy,
   1448                              uint8* dst_u, uint8* dst_v, int pix);
   1449 void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int pix);
   1450 void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy,
   1451                           uint8* dst_u, uint8* dst_v, int pix);
   1452 void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy,
   1453                              uint8* dst_u, uint8* dst_v, int pix);
   1454 
   1455 void HalfRow_C(const uint8* src_uv, int src_uv_stride,
   1456                uint8* dst_uv, int pix);
   1457 void HalfRow_SSE2(const uint8* src_uv, int src_uv_stride,
   1458                   uint8* dst_uv, int pix);
   1459 void HalfRow_AVX2(const uint8* src_uv, int src_uv_stride,
   1460                   uint8* dst_uv, int pix);
   1461 void HalfRow_NEON(const uint8* src_uv, int src_uv_stride,
   1462                   uint8* dst_uv, int pix);
   1463 
   1464 void HalfRow_16_C(const uint16* src_uv, int src_uv_stride,
   1465                   uint16* dst_uv, int pix);
   1466 
   1467 void ARGBToBayerRow_C(const uint8* src_argb, uint8* dst_bayer,
   1468                       uint32 selector, int pix);
   1469 void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
   1470                           uint32 selector, int pix);
   1471 void ARGBToBayerRow_NEON(const uint8* src_argb, uint8* dst_bayer,
   1472                          uint32 selector, int pix);
   1473 void ARGBToBayerRow_Any_SSSE3(const uint8* src_argb, uint8* dst_bayer,
   1474                               uint32 selector, int pix);
   1475 void ARGBToBayerRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer,
   1476                              uint32 selector, int pix);
   1477 void ARGBToBayerGGRow_C(const uint8* src_argb, uint8* dst_bayer,
   1478                         uint32 /* selector */, int pix);
   1479 void ARGBToBayerGGRow_SSE2(const uint8* src_argb, uint8* dst_bayer,
   1480                            uint32 /* selector */, int pix);
   1481 void ARGBToBayerGGRow_NEON(const uint8* src_argb, uint8* dst_bayer,
   1482                            uint32 /* selector */, int pix);
   1483 void ARGBToBayerGGRow_Any_SSE2(const uint8* src_argb, uint8* dst_bayer,
   1484                                uint32 /* selector */, int pix);
   1485 void ARGBToBayerGGRow_Any_NEON(const uint8* src_argb, uint8* dst_bayer,
   1486                                uint32 /* selector */, int pix);
   1487 
   1488 void I422ToYUY2Row_C(const uint8* src_y,
   1489                      const uint8* src_u,
   1490                      const uint8* src_v,
   1491                      uint8* dst_yuy2, int width);
   1492 void I422ToUYVYRow_C(const uint8* src_y,
   1493                      const uint8* src_u,
   1494                      const uint8* src_v,
   1495                      uint8* dst_uyvy, int width);
   1496 void I422ToYUY2Row_SSE2(const uint8* src_y,
   1497                         const uint8* src_u,
   1498                         const uint8* src_v,
   1499                         uint8* dst_yuy2, int width);
   1500 void I422ToUYVYRow_SSE2(const uint8* src_y,
   1501                         const uint8* src_u,
   1502                         const uint8* src_v,
   1503                         uint8* dst_uyvy, int width);
   1504 void I422ToYUY2Row_Any_SSE2(const uint8* src_y,
   1505                             const uint8* src_u,
   1506                             const uint8* src_v,
   1507                             uint8* dst_yuy2, int width);
   1508 void I422ToUYVYRow_Any_SSE2(const uint8* src_y,
   1509                             const uint8* src_u,
   1510                             const uint8* src_v,
   1511                             uint8* dst_uyvy, int width);
   1512 void I422ToYUY2Row_NEON(const uint8* src_y,
   1513                         const uint8* src_u,
   1514                         const uint8* src_v,
   1515                         uint8* dst_yuy2, int width);
   1516 void I422ToUYVYRow_NEON(const uint8* src_y,
   1517                         const uint8* src_u,
   1518                         const uint8* src_v,
   1519                         uint8* dst_uyvy, int width);
   1520 void I422ToYUY2Row_Any_NEON(const uint8* src_y,
   1521                             const uint8* src_u,
   1522                             const uint8* src_v,
   1523                             uint8* dst_yuy2, int width);
   1524 void I422ToUYVYRow_Any_NEON(const uint8* src_y,
   1525                             const uint8* src_u,
   1526                             const uint8* src_v,
   1527                             uint8* dst_uyvy, int width);
   1528 
   1529 // Effects related row functions.
   1530 void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
   1531 void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
   1532 void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
   1533 void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
   1534 void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
   1535 void ARGBAttenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
   1536                                int width);
   1537 void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
   1538                                 int width);
   1539 void ARGBAttenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
   1540                                int width);
   1541 void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
   1542                                int width);
   1543 
   1544 // Inverse table for unattenuate, shared by C and SSE2.
   1545 extern const uint32 fixed_invtbl8[256];
   1546 void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
   1547 void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
   1548 void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
   1549 void ARGBUnattenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
   1550                                  int width);
   1551 void ARGBUnattenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
   1552                                  int width);
   1553 
   1554 void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width);
   1555 void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
   1556 void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
   1557 
   1558 void ARGBSepiaRow_C(uint8* dst_argb, int width);
   1559 void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width);
   1560 void ARGBSepiaRow_NEON(uint8* dst_argb, int width);
   1561 
   1562 void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
   1563                           const int8* matrix_argb, int width);
   1564 void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
   1565                               const int8* matrix_argb, int width);
   1566 void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
   1567                              const int8* matrix_argb, int width);
   1568 
   1569 void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
   1570 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
   1571 
   1572 void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
   1573 void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
   1574 
   1575 void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
   1576                        int interval_offset, int width);
   1577 void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
   1578                           int interval_offset, int width);
   1579 void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
   1580                           int interval_offset, int width);
   1581 
   1582 void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
   1583                     uint32 value);
   1584 void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
   1585                        uint32 value);
   1586 void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
   1587                        uint32 value);
   1588 
   1589 // Used for blur.
   1590 void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
   1591                                     int width, int area, uint8* dst, int count);
   1592 void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
   1593                                   const int32* previous_cumsum, int width);
   1594 
   1595 void CumulativeSumToAverageRow_C(const int32* topleft, const int32* botleft,
   1596                                  int width, int area, uint8* dst, int count);
   1597 void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
   1598                                const int32* previous_cumsum, int width);
   1599 
   1600 LIBYUV_API
   1601 void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
   1602                      uint8* dst_argb, const float* uv_dudv, int width);
   1603 LIBYUV_API
   1604 void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
   1605                         uint8* dst_argb, const float* uv_dudv, int width);
   1606 
   1607 // Used for I420Scale, ARGBScale, and ARGBInterpolate.
   1608 void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
   1609                       ptrdiff_t src_stride_ptr,
   1610                       int width, int source_y_fraction);
   1611 void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
   1612                          ptrdiff_t src_stride_ptr, int width,
   1613                          int source_y_fraction);
   1614 void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
   1615                           ptrdiff_t src_stride_ptr, int width,
   1616                           int source_y_fraction);
   1617 void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
   1618                          ptrdiff_t src_stride_ptr, int width,
   1619                          int source_y_fraction);
   1620 void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr,
   1621                          ptrdiff_t src_stride_ptr, int width,
   1622                          int source_y_fraction);
   1623 void InterpolateRows_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
   1624                                 ptrdiff_t src_stride_ptr, int width,
   1625                                 int source_y_fraction);
   1626 void InterpolateRow_Unaligned_SSE2(uint8* dst_ptr, const uint8* src_ptr,
   1627                                    ptrdiff_t src_stride_ptr, int width,
   1628                                    int source_y_fraction);
   1629 void InterpolateRow_Unaligned_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
   1630                                     ptrdiff_t src_stride_ptr, int width,
   1631                                     int source_y_fraction);
   1632 void InterpolateRow_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
   1633                              ptrdiff_t src_stride_ptr, int width,
   1634                              int source_y_fraction);
   1635 void InterpolateRow_Any_SSE2(uint8* dst_ptr, const uint8* src_ptr,
   1636                              ptrdiff_t src_stride_ptr, int width,
   1637                              int source_y_fraction);
   1638 void InterpolateRow_Any_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
   1639                               ptrdiff_t src_stride_ptr, int width,
   1640                               int source_y_fraction);
   1641 void InterpolateRow_Any_AVX2(uint8* dst_ptr, const uint8* src_ptr,
   1642                              ptrdiff_t src_stride_ptr, int width,
   1643                              int source_y_fraction);
   1644 void InterpolateRows_Any_MIPS_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
   1645                                     ptrdiff_t src_stride_ptr, int width,
   1646                                     int source_y_fraction);
   1647 
   1648 void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
   1649                          ptrdiff_t src_stride_ptr,
   1650                          int width, int source_y_fraction);
   1651 
   1652 // Sobel images.
   1653 void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
   1654                  uint8* dst_sobelx, int width);
   1655 void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
   1656                     const uint8* src_y2, uint8* dst_sobelx, int width);
   1657 void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
   1658                     const uint8* src_y2, uint8* dst_sobelx, int width);
   1659 void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
   1660                  uint8* dst_sobely, int width);
   1661 void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
   1662                     uint8* dst_sobely, int width);
   1663 void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
   1664                     uint8* dst_sobely, int width);
   1665 void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
   1666                 uint8* dst_argb, int width);
   1667 void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
   1668                    uint8* dst_argb, int width);
   1669 void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
   1670                    uint8* dst_argb, int width);
   1671 void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
   1672                        uint8* dst_y, int width);
   1673 void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
   1674                           uint8* dst_y, int width);
   1675 void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
   1676                           uint8* dst_y, int width);
   1677 void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
   1678                   uint8* dst_argb, int width);
   1679 void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
   1680                      uint8* dst_argb, int width);
   1681 void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
   1682                      uint8* dst_argb, int width);
   1683 
   1684 void ARGBPolynomialRow_C(const uint8* src_argb,
   1685                          uint8* dst_argb, const float* poly,
   1686                          int width);
   1687 void ARGBPolynomialRow_SSE2(const uint8* src_argb,
   1688                             uint8* dst_argb, const float* poly,
   1689                             int width);
   1690 void ARGBPolynomialRow_AVX2(const uint8* src_argb,
   1691                             uint8* dst_argb, const float* poly,
   1692                             int width);
   1693 
   1694 void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
   1695                              const uint8* luma, uint32 lumacoeff);
   1696 void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
   1697                                  int width,
   1698                                  const uint8* luma, uint32 lumacoeff);
   1699 
   1700 #ifdef __cplusplus
   1701 }  // extern "C"
   1702 }  // namespace libyuv
   1703 #endif
   1704 
   1705 #endif  // INCLUDE_LIBYUV_ROW_H_  NOLINT
   1706