Home | History | Annotate | Download | only in libyuv
      1 /*
      2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS. All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #ifndef INCLUDE_LIBYUV_ROW_H_  // NOLINT
     12 #define INCLUDE_LIBYUV_ROW_H_
     13 
     14 #include <stdlib.h>  // For malloc.
     15 
     16 #include "libyuv/basic_types.h"
     17 
     18 #ifdef __cplusplus
     19 namespace libyuv {
     20 extern "C" {
     21 #endif
     22 
     23 #define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1)))
     24 
     25 #ifdef __cplusplus
     26 #define align_buffer_64(var, size)                                             \
     27   uint8* var##_mem = reinterpret_cast<uint8*>(malloc((size) + 63));            \
     28   uint8* var = reinterpret_cast<uint8*>                                        \
     29       ((reinterpret_cast<intptr_t>(var##_mem) + 63) & ~63)
     30 #else
     31 #define align_buffer_64(var, size)                                             \
     32   uint8* var##_mem = (uint8*)(malloc((size) + 63));               /* NOLINT */ \
     33   uint8* var = (uint8*)(((intptr_t)(var##_mem) + 63) & ~63)       /* NOLINT */
     34 #endif
     35 
     36 #define free_aligned_buffer_64(var) \
     37   free(var##_mem);  \
     38   var = 0
     39 
     40 #if defined(__pnacl__) || defined(__CLR_VER) || \
     41     (defined(__i386__) && !defined(__SSE2__))
     42 #define LIBYUV_DISABLE_X86
     43 #endif
     44 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
     45 #if defined(__has_feature)
     46 #if __has_feature(memory_sanitizer)
     47 #define LIBYUV_DISABLE_X86
     48 #endif
     49 #endif
     50 // True if compiling for SSSE3 as a requirement.
     51 #if defined(__SSSE3__) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 3))
     52 #define LIBYUV_SSSE3_ONLY
     53 #endif
     54 
     55 #if defined(__native_client__)
     56 #define LIBYUV_DISABLE_NEON
     57 #endif
     58 // clang >= 3.5.0 required for Arm64.
     59 #if defined(__clang__) && defined(__aarch64__) && !defined(LIBYUV_DISABLE_NEON)
     60 #if (__clang_major__ < 3) || (__clang_major__ == 3 && (__clang_minor__ < 5))
     61 #define LIBYUV_DISABLE_NEON
     62 #endif  // clang >= 3.5
     63 #endif  // __clang__
     64 
     65 // GCC >= 4.7.0 required for AVX2.
     66 #if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
     67 #if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
     68 #define GCC_HAS_AVX2 1
     69 #endif  // GNUC >= 4.7
     70 #endif  // __GNUC__
     71 
     72 // clang >= 3.4.0 required for AVX2.
     73 #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
     74 #if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
     75 #define CLANG_HAS_AVX2 1
     76 #endif  // clang >= 3.4
     77 #endif  // __clang__
     78 
     79 // Visual C 2012 required for AVX2.
     80 #if defined(_M_IX86) && !defined(__clang__) && \
     81     defined(_MSC_VER) && _MSC_VER >= 1700
     82 #define VISUALC_HAS_AVX2 1
     83 #endif  // VisualStudio >= 2012
     84 
     85 // The following are available on all x86 platforms:
     86 #if !defined(LIBYUV_DISABLE_X86) && \
     87     (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
     88 // Conversions:
     89 #define HAS_ABGRTOUVROW_SSSE3
     90 #define HAS_ABGRTOYROW_SSSE3
     91 #define HAS_ARGB1555TOARGBROW_SSE2
     92 #define HAS_ARGB4444TOARGBROW_SSE2
     93 #define HAS_ARGBSETROW_X86
     94 #define HAS_ARGBSHUFFLEROW_SSE2
     95 #define HAS_ARGBSHUFFLEROW_SSSE3
     96 #define HAS_ARGBTOARGB1555ROW_SSE2
     97 #define HAS_ARGBTOARGB4444ROW_SSE2
     98 #define HAS_ARGBTORAWROW_SSSE3
     99 #define HAS_ARGBTORGB24ROW_SSSE3
    100 #define HAS_ARGBTORGB565DITHERROW_SSE2
    101 #define HAS_ARGBTORGB565ROW_SSE2
    102 #define HAS_ARGBTOUV444ROW_SSSE3
    103 #define HAS_ARGBTOUVJROW_SSSE3
    104 #define HAS_ARGBTOUVROW_SSSE3
    105 #define HAS_ARGBTOYJROW_SSSE3
    106 #define HAS_ARGBTOYROW_SSSE3
    107 #define HAS_ARGBEXTRACTALPHAROW_SSE2
    108 #define HAS_BGRATOUVROW_SSSE3
    109 #define HAS_BGRATOYROW_SSSE3
    110 #define HAS_COPYROW_ERMS
    111 #define HAS_COPYROW_SSE2
    112 #define HAS_H422TOARGBROW_SSSE3
    113 #define HAS_I400TOARGBROW_SSE2
    114 #define HAS_I422TOARGB1555ROW_SSSE3
    115 #define HAS_I422TOARGB4444ROW_SSSE3
    116 #define HAS_I422TOARGBROW_SSSE3
    117 #define HAS_I422TORGB24ROW_SSSE3
    118 #define HAS_I422TORGB565ROW_SSSE3
    119 #define HAS_I422TORGBAROW_SSSE3
    120 #define HAS_I422TOUYVYROW_SSE2
    121 #define HAS_I422TOYUY2ROW_SSE2
    122 #define HAS_I444TOARGBROW_SSSE3
    123 #define HAS_J400TOARGBROW_SSE2
    124 #define HAS_J422TOARGBROW_SSSE3
    125 #define HAS_MERGEUVROW_SSE2
    126 #define HAS_MIRRORROW_SSSE3
    127 #define HAS_MIRRORUVROW_SSSE3
    128 #define HAS_NV12TOARGBROW_SSSE3
    129 #define HAS_NV12TORGB565ROW_SSSE3
    130 #define HAS_NV21TOARGBROW_SSSE3
    131 #define HAS_RAWTOARGBROW_SSSE3
    132 #define HAS_RAWTORGB24ROW_SSSE3
    133 #define HAS_RAWTOYROW_SSSE3
    134 #define HAS_RGB24TOARGBROW_SSSE3
    135 #define HAS_RGB24TOYROW_SSSE3
    136 #define HAS_RGB565TOARGBROW_SSE2
    137 #define HAS_RGBATOUVROW_SSSE3
    138 #define HAS_RGBATOYROW_SSSE3
    139 #define HAS_SETROW_ERMS
    140 #define HAS_SETROW_X86
    141 #define HAS_SPLITUVROW_SSE2
    142 #define HAS_UYVYTOARGBROW_SSSE3
    143 #define HAS_UYVYTOUV422ROW_SSE2
    144 #define HAS_UYVYTOUVROW_SSE2
    145 #define HAS_UYVYTOYROW_SSE2
    146 #define HAS_YUY2TOARGBROW_SSSE3
    147 #define HAS_YUY2TOUV422ROW_SSE2
    148 #define HAS_YUY2TOUVROW_SSE2
    149 #define HAS_YUY2TOYROW_SSE2
    150 
    151 // Effects:
    152 #define HAS_ARGBADDROW_SSE2
    153 #define HAS_ARGBAFFINEROW_SSE2
    154 #define HAS_ARGBATTENUATEROW_SSSE3
    155 #define HAS_ARGBBLENDROW_SSSE3
    156 #define HAS_ARGBCOLORMATRIXROW_SSSE3
    157 #define HAS_ARGBCOLORTABLEROW_X86
    158 #define HAS_ARGBCOPYALPHAROW_SSE2
    159 #define HAS_ARGBCOPYYTOALPHAROW_SSE2
    160 #define HAS_ARGBGRAYROW_SSSE3
    161 #define HAS_ARGBLUMACOLORTABLEROW_SSSE3
    162 #define HAS_ARGBMIRRORROW_SSE2
    163 #define HAS_ARGBMULTIPLYROW_SSE2
    164 #define HAS_ARGBPOLYNOMIALROW_SSE2
    165 #define HAS_ARGBQUANTIZEROW_SSE2
    166 #define HAS_ARGBSEPIAROW_SSSE3
    167 #define HAS_ARGBSHADEROW_SSE2
    168 #define HAS_ARGBSUBTRACTROW_SSE2
    169 #define HAS_ARGBUNATTENUATEROW_SSE2
    170 #define HAS_BLENDPLANEROW_SSSE3
    171 #define HAS_COMPUTECUMULATIVESUMROW_SSE2
    172 #define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
    173 #define HAS_INTERPOLATEROW_SSSE3
    174 #define HAS_RGBCOLORTABLEROW_X86
    175 #define HAS_SOBELROW_SSE2
    176 #define HAS_SOBELTOPLANEROW_SSE2
    177 #define HAS_SOBELXROW_SSE2
    178 #define HAS_SOBELXYROW_SSE2
    179 #define HAS_SOBELYROW_SSE2
    180 
    181 // The following functions fail on gcc/clang 32 bit with fpic and framepointer.
    182 // caveat: clangcl uses row_win.cc which works.
    183 #if defined(NDEBUG) || !(defined(_DEBUG) && defined(__i386__)) || \
    184     !defined(__i386__) || defined(_MSC_VER)
    185 // TODO(fbarchard): fix build error on x86 debug
    186 // https://code.google.com/p/libyuv/issues/detail?id=524
    187 #define HAS_I411TOARGBROW_SSSE3
    188 // TODO(fbarchard): fix build error on android_full_debug=1
    189 // https://code.google.com/p/libyuv/issues/detail?id=517
    190 #define HAS_I422ALPHATOARGBROW_SSSE3
    191 #endif
    192 #endif
    193 
    194 // The following are available on all x86 platforms, but
    195 // require VS2012, clang 3.4 or gcc 4.7.
    196 // The code supports NaCL but requires a new compiler and validator.
    197 #if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
    198     defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
    199 #define HAS_ARGBCOPYALPHAROW_AVX2
    200 #define HAS_ARGBCOPYYTOALPHAROW_AVX2
    201 #define HAS_ARGBMIRRORROW_AVX2
    202 #define HAS_ARGBPOLYNOMIALROW_AVX2
    203 #define HAS_ARGBSHUFFLEROW_AVX2
    204 #define HAS_ARGBTORGB565DITHERROW_AVX2
    205 #define HAS_ARGBTOUVJROW_AVX2
    206 #define HAS_ARGBTOUVROW_AVX2
    207 #define HAS_ARGBTOYJROW_AVX2
    208 #define HAS_ARGBTOYROW_AVX2
    209 #define HAS_COPYROW_AVX
    210 #define HAS_H422TOARGBROW_AVX2
    211 #define HAS_I400TOARGBROW_AVX2
    212 #if !(defined(_DEBUG) && defined(__i386__))
    213 // TODO(fbarchard): fix build error on android_full_debug=1
    214 // https://code.google.com/p/libyuv/issues/detail?id=517
    215 #define HAS_I422ALPHATOARGBROW_AVX2
    216 #endif
    217 #define HAS_I411TOARGBROW_AVX2
    218 #define HAS_I422TOARGB1555ROW_AVX2
    219 #define HAS_I422TOARGB4444ROW_AVX2
    220 #define HAS_I422TOARGBROW_AVX2
    221 #define HAS_I422TORGB24ROW_AVX2
    222 #define HAS_I422TORGB565ROW_AVX2
    223 #define HAS_I422TORGBAROW_AVX2
    224 #define HAS_I444TOARGBROW_AVX2
    225 #define HAS_INTERPOLATEROW_AVX2
    226 #define HAS_J422TOARGBROW_AVX2
    227 #define HAS_MERGEUVROW_AVX2
    228 #define HAS_MIRRORROW_AVX2
    229 #define HAS_NV12TOARGBROW_AVX2
    230 #define HAS_NV12TORGB565ROW_AVX2
    231 #define HAS_NV21TOARGBROW_AVX2
    232 #define HAS_SPLITUVROW_AVX2
    233 #define HAS_UYVYTOARGBROW_AVX2
    234 #define HAS_UYVYTOUV422ROW_AVX2
    235 #define HAS_UYVYTOUVROW_AVX2
    236 #define HAS_UYVYTOYROW_AVX2
    237 #define HAS_YUY2TOARGBROW_AVX2
    238 #define HAS_YUY2TOUV422ROW_AVX2
    239 #define HAS_YUY2TOUVROW_AVX2
    240 #define HAS_YUY2TOYROW_AVX2
    241 
    242 // Effects:
    243 #define HAS_ARGBADDROW_AVX2
    244 #define HAS_ARGBATTENUATEROW_AVX2
    245 #define HAS_ARGBMULTIPLYROW_AVX2
    246 #define HAS_ARGBSUBTRACTROW_AVX2
    247 #define HAS_ARGBUNATTENUATEROW_AVX2
    248 #define HAS_BLENDPLANEROW_AVX2
    249 #endif
    250 
    251 // The following are available for AVX2 Visual C and clangcl 32 bit:
    252 // TODO(fbarchard): Port to gcc.
    253 #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
    254     (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
    255 #define HAS_ARGB1555TOARGBROW_AVX2
    256 #define HAS_ARGB4444TOARGBROW_AVX2
    257 #define HAS_ARGBTOARGB1555ROW_AVX2
    258 #define HAS_ARGBTOARGB4444ROW_AVX2
    259 #define HAS_ARGBTORGB565ROW_AVX2
    260 #define HAS_J400TOARGBROW_AVX2
    261 #define HAS_RGB565TOARGBROW_AVX2
    262 #endif
    263 
    264 // The following are also available on x64 Visual C.
    265 #if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && defined(_M_X64) && \
    266     (!defined(__clang__) || defined(__SSSE3__))
    267 #define HAS_I422ALPHATOARGBROW_SSSE3
    268 #define HAS_I422TOARGBROW_SSSE3
    269 #endif
    270 
    271 // The following are available on Neon platforms:
    272 #if !defined(LIBYUV_DISABLE_NEON) && \
    273     (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
    274 #define HAS_ABGRTOUVROW_NEON
    275 #define HAS_ABGRTOYROW_NEON
    276 #define HAS_ARGB1555TOARGBROW_NEON
    277 #define HAS_ARGB1555TOUVROW_NEON
    278 #define HAS_ARGB1555TOYROW_NEON
    279 #define HAS_ARGB4444TOARGBROW_NEON
    280 #define HAS_ARGB4444TOUVROW_NEON
    281 #define HAS_ARGB4444TOYROW_NEON
    282 #define HAS_ARGBSETROW_NEON
    283 #define HAS_ARGBTOARGB1555ROW_NEON
    284 #define HAS_ARGBTOARGB4444ROW_NEON
    285 #define HAS_ARGBTORAWROW_NEON
    286 #define HAS_ARGBTORGB24ROW_NEON
    287 #define HAS_ARGBTORGB565DITHERROW_NEON
    288 #define HAS_ARGBTORGB565ROW_NEON
    289 #define HAS_ARGBTOUV411ROW_NEON
    290 #define HAS_ARGBTOUV444ROW_NEON
    291 #define HAS_ARGBTOUVJROW_NEON
    292 #define HAS_ARGBTOUVROW_NEON
    293 #define HAS_ARGBTOYJROW_NEON
    294 #define HAS_ARGBTOYROW_NEON
    295 #define HAS_ARGBEXTRACTALPHAROW_NEON
    296 #define HAS_BGRATOUVROW_NEON
    297 #define HAS_BGRATOYROW_NEON
    298 #define HAS_COPYROW_NEON
    299 #define HAS_I400TOARGBROW_NEON
    300 #define HAS_I411TOARGBROW_NEON
    301 #define HAS_I422ALPHATOARGBROW_NEON
    302 #define HAS_I422TOARGB1555ROW_NEON
    303 #define HAS_I422TOARGB4444ROW_NEON
    304 #define HAS_I422TOARGBROW_NEON
    305 #define HAS_I422TORGB24ROW_NEON
    306 #define HAS_I422TORGB565ROW_NEON
    307 #define HAS_I422TORGBAROW_NEON
    308 #define HAS_I422TOUYVYROW_NEON
    309 #define HAS_I422TOYUY2ROW_NEON
    310 #define HAS_I444TOARGBROW_NEON
    311 #define HAS_J400TOARGBROW_NEON
    312 #define HAS_MERGEUVROW_NEON
    313 #define HAS_MIRRORROW_NEON
    314 #define HAS_MIRRORUVROW_NEON
    315 #define HAS_NV12TOARGBROW_NEON
    316 #define HAS_NV12TORGB565ROW_NEON
    317 #define HAS_NV21TOARGBROW_NEON
    318 #define HAS_RAWTOARGBROW_NEON
    319 #define HAS_RAWTORGB24ROW_NEON
    320 #define HAS_RAWTOUVROW_NEON
    321 #define HAS_RAWTOYROW_NEON
    322 #define HAS_RGB24TOARGBROW_NEON
    323 #define HAS_RGB24TOUVROW_NEON
    324 #define HAS_RGB24TOYROW_NEON
    325 #define HAS_RGB565TOARGBROW_NEON
    326 #define HAS_RGB565TOUVROW_NEON
    327 #define HAS_RGB565TOYROW_NEON
    328 #define HAS_RGBATOUVROW_NEON
    329 #define HAS_RGBATOYROW_NEON
    330 #define HAS_SETROW_NEON
    331 #define HAS_SPLITUVROW_NEON
    332 #define HAS_UYVYTOARGBROW_NEON
    333 #define HAS_UYVYTOUV422ROW_NEON
    334 #define HAS_UYVYTOUVROW_NEON
    335 #define HAS_UYVYTOYROW_NEON
    336 #define HAS_YUY2TOARGBROW_NEON
    337 #define HAS_YUY2TOUV422ROW_NEON
    338 #define HAS_YUY2TOUVROW_NEON
    339 #define HAS_YUY2TOYROW_NEON
    340 
    341 // Effects:
    342 #define HAS_ARGBADDROW_NEON
    343 #define HAS_ARGBATTENUATEROW_NEON
    344 #define HAS_ARGBBLENDROW_NEON
    345 #define HAS_ARGBCOLORMATRIXROW_NEON
    346 #define HAS_ARGBGRAYROW_NEON
    347 #define HAS_ARGBMIRRORROW_NEON
    348 #define HAS_ARGBMULTIPLYROW_NEON
    349 #define HAS_ARGBQUANTIZEROW_NEON
    350 #define HAS_ARGBSEPIAROW_NEON
    351 #define HAS_ARGBSHADEROW_NEON
    352 #define HAS_ARGBSHUFFLEROW_NEON
    353 #define HAS_ARGBSUBTRACTROW_NEON
    354 #define HAS_INTERPOLATEROW_NEON
    355 #define HAS_SOBELROW_NEON
    356 #define HAS_SOBELTOPLANEROW_NEON
    357 #define HAS_SOBELXROW_NEON
    358 #define HAS_SOBELXYROW_NEON
    359 #define HAS_SOBELYROW_NEON
    360 #endif
    361 
    362 // The following are available on Mips platforms:
    363 #if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) && \
    364     (_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6)
    365 #define HAS_COPYROW_MIPS
    366 #if defined(__mips_dsp) && (__mips_dsp_rev >= 2)
    367 #define HAS_I422TOARGBROW_DSPR2
    368 #define HAS_INTERPOLATEROW_DSPR2
    369 #define HAS_MIRRORROW_DSPR2
    370 #define HAS_MIRRORUVROW_DSPR2
    371 #define HAS_SPLITUVROW_DSPR2
    372 #endif
    373 #endif
    374 
    375 #if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
    376 #if defined(VISUALC_HAS_AVX2)
    377 #define SIMD_ALIGNED(var) __declspec(align(32)) var
    378 #else
    379 #define SIMD_ALIGNED(var) __declspec(align(16)) var
    380 #endif
    381 typedef __declspec(align(16)) int16 vec16[8];
    382 typedef __declspec(align(16)) int32 vec32[4];
    383 typedef __declspec(align(16)) int8 vec8[16];
    384 typedef __declspec(align(16)) uint16 uvec16[8];
    385 typedef __declspec(align(16)) uint32 uvec32[4];
    386 typedef __declspec(align(16)) uint8 uvec8[16];
    387 typedef __declspec(align(32)) int16 lvec16[16];
    388 typedef __declspec(align(32)) int32 lvec32[8];
    389 typedef __declspec(align(32)) int8 lvec8[32];
    390 typedef __declspec(align(32)) uint16 ulvec16[16];
    391 typedef __declspec(align(32)) uint32 ulvec32[8];
    392 typedef __declspec(align(32)) uint8 ulvec8[32];
    393 #elif !defined(__pnacl__) && (defined(__GNUC__) || defined(__clang__))
    394 // Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
    395 #if defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)
    396 #define SIMD_ALIGNED(var) var __attribute__((aligned(32)))
    397 #else
    398 #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
    399 #endif
    400 typedef int16 __attribute__((vector_size(16))) vec16;
    401 typedef int32 __attribute__((vector_size(16))) vec32;
    402 typedef int8 __attribute__((vector_size(16))) vec8;
    403 typedef uint16 __attribute__((vector_size(16))) uvec16;
    404 typedef uint32 __attribute__((vector_size(16))) uvec32;
    405 typedef uint8 __attribute__((vector_size(16))) uvec8;
    406 typedef int16 __attribute__((vector_size(32))) lvec16;
    407 typedef int32 __attribute__((vector_size(32))) lvec32;
    408 typedef int8 __attribute__((vector_size(32))) lvec8;
    409 typedef uint16 __attribute__((vector_size(32))) ulvec16;
    410 typedef uint32 __attribute__((vector_size(32))) ulvec32;
    411 typedef uint8 __attribute__((vector_size(32))) ulvec8;
    412 #else
    413 #define SIMD_ALIGNED(var) var
    414 typedef int16 vec16[8];
    415 typedef int32 vec32[4];
    416 typedef int8 vec8[16];
    417 typedef uint16 uvec16[8];
    418 typedef uint32 uvec32[4];
    419 typedef uint8 uvec8[16];
    420 typedef int16 lvec16[16];
    421 typedef int32 lvec32[8];
    422 typedef int8 lvec8[32];
    423 typedef uint16 ulvec16[16];
    424 typedef uint32 ulvec32[8];
    425 typedef uint8 ulvec8[32];
    426 #endif
    427 
    428 #if defined(__aarch64__)
    429 // This struct is for Arm64 color conversion.
    430 struct YuvConstants {
    431   uvec16 kUVToRB;
    432   uvec16 kUVToRB2;
    433   uvec16 kUVToG;
    434   uvec16 kUVToG2;
    435   vec16 kUVBiasBGR;
    436   vec32 kYToRgb;
    437 };
    438 #elif defined(__arm__)
    439 // This struct is for ArmV7 color conversion.
    440 struct YuvConstants {
    441   uvec8 kUVToRB;
    442   uvec8 kUVToG;
    443   vec16 kUVBiasBGR;
    444   vec32 kYToRgb;
    445 };
    446 #else
    447 // This struct is for Intel color conversion.
    448 struct YuvConstants {
    449   int8 kUVToB[32];
    450   int8 kUVToG[32];
    451   int8 kUVToR[32];
    452   int16 kUVBiasB[16];
    453   int16 kUVBiasG[16];
    454   int16 kUVBiasR[16];
    455   int16 kYToRgb[16];
    456 };
    457 
    458 // Offsets into YuvConstants structure
    459 #define KUVTOB   0
    460 #define KUVTOG   32
    461 #define KUVTOR   64
    462 #define KUVBIASB 96
    463 #define KUVBIASG 128
    464 #define KUVBIASR 160
    465 #define KYTORGB  192
    466 #endif
    467 
    468 // Conversion matrix for YUV to RGB
    469 extern const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants);  // BT.601
    470 extern const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants);  // JPeg
    471 extern const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants);  // BT.709
    472 
    473 // Conversion matrix for YVU to BGR
    474 extern const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants);  // BT.601
    475 extern const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants);  // JPeg
    476 extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants);  // BT.709
    477 
    478 #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
    479 #define OMITFP
    480 #else
    481 #define OMITFP __attribute__((optimize("omit-frame-pointer")))
    482 #endif
    483 
    484 // NaCL macros for GCC x86 and x64.
    485 #if defined(__native_client__)
    486 #define LABELALIGN ".p2align 5\n"
    487 #else
    488 #define LABELALIGN
    489 #endif
    490 #if defined(__native_client__) && defined(__x86_64__)
    491 // r14 is used for MEMOP macros.
    492 #define NACL_R14 "r14",
    493 #define BUNDLELOCK ".bundle_lock\n"
    494 #define BUNDLEUNLOCK ".bundle_unlock\n"
    495 #define MEMACCESS(base) "%%nacl:(%%r15,%q" #base ")"
    496 #define MEMACCESS2(offset, base) "%%nacl:" #offset "(%%r15,%q" #base ")"
    497 #define MEMLEA(offset, base) #offset "(%q" #base ")"
    498 #define MEMLEA3(offset, index, scale) \
    499     #offset "(,%q" #index "," #scale ")"
    500 #define MEMLEA4(offset, base, index, scale) \
    501     #offset "(%q" #base ",%q" #index "," #scale ")"
    502 #define MEMMOVESTRING(s, d) "%%nacl:(%q" #s "),%%nacl:(%q" #d "), %%r15"
    503 #define MEMSTORESTRING(reg, d) "%%" #reg ",%%nacl:(%q" #d "), %%r15"
    504 #define MEMOPREG(opcode, offset, base, index, scale, reg) \
    505     BUNDLELOCK \
    506     "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
    507     #opcode " (%%r15,%%r14),%%" #reg "\n" \
    508     BUNDLEUNLOCK
    509 #define MEMOPMEM(opcode, reg, offset, base, index, scale) \
    510     BUNDLELOCK \
    511     "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
    512     #opcode " %%" #reg ",(%%r15,%%r14)\n" \
    513     BUNDLEUNLOCK
    514 #define MEMOPARG(opcode, offset, base, index, scale, arg) \
    515     BUNDLELOCK \
    516     "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
    517     #opcode " (%%r15,%%r14),%" #arg "\n" \
    518     BUNDLEUNLOCK
    519 #define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \
    520     BUNDLELOCK \
    521     "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
    522     #opcode " (%%r15,%%r14),%%" #reg1 ",%%" #reg2 "\n" \
    523     BUNDLEUNLOCK
    524 #define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \
    525     BUNDLELOCK \
    526     "lea " #offset "(%q" #base ",%q" #index "," #scale "),%%r14d\n" \
    527     #op " $" #sel ",%%" #reg ",(%%r15,%%r14)\n" \
    528     BUNDLEUNLOCK
    529 #else  // defined(__native_client__) && defined(__x86_64__)
    530 #define NACL_R14
    531 #define BUNDLEALIGN
    532 #define MEMACCESS(base) "(%" #base ")"
    533 #define MEMACCESS2(offset, base) #offset "(%" #base ")"
    534 #define MEMLEA(offset, base) #offset "(%" #base ")"
    535 #define MEMLEA3(offset, index, scale) \
    536     #offset "(,%" #index "," #scale ")"
    537 #define MEMLEA4(offset, base, index, scale) \
    538     #offset "(%" #base ",%" #index "," #scale ")"
    539 #define MEMMOVESTRING(s, d)
    540 #define MEMSTORESTRING(reg, d)
    541 #define MEMOPREG(opcode, offset, base, index, scale, reg) \
    542     #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg "\n"
    543 #define MEMOPMEM(opcode, reg, offset, base, index, scale) \
    544     #opcode " %%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
    545 #define MEMOPARG(opcode, offset, base, index, scale, arg) \
    546     #opcode " " #offset "(%" #base ",%" #index "," #scale "),%" #arg "\n"
    547 #define VMEMOPREG(opcode, offset, base, index, scale, reg1, reg2) \
    548     #opcode " " #offset "(%" #base ",%" #index "," #scale "),%%" #reg1 ",%%" \
    549     #reg2 "\n"
    550 #define VEXTOPMEM(op, sel, reg, offset, base, index, scale) \
    551     #op " $" #sel ",%%" #reg ","#offset "(%" #base ",%" #index "," #scale ")\n"
    552 #endif  // defined(__native_client__) && defined(__x86_64__)
    553 
    554 #if defined(__arm__) || defined(__aarch64__)
    555 #undef MEMACCESS
    556 #if defined(__native_client__)
    557 #define MEMACCESS(base) ".p2align 3\nbic %" #base ", #0xc0000000\n"
    558 #else
    559 #define MEMACCESS(base)
    560 #endif
    561 #endif
    562 
    563 void I444ToARGBRow_NEON(const uint8* src_y,
    564                         const uint8* src_u,
    565                         const uint8* src_v,
    566                         uint8* dst_argb,
    567                         const struct YuvConstants* yuvconstants,
    568                         int width);
    569 void I422ToARGBRow_NEON(const uint8* src_y,
    570                         const uint8* src_u,
    571                         const uint8* src_v,
    572                         uint8* dst_argb,
    573                         const struct YuvConstants* yuvconstants,
    574                         int width);
    575 void I422AlphaToARGBRow_NEON(const uint8* y_buf,
    576                              const uint8* u_buf,
    577                              const uint8* v_buf,
    578                              const uint8* a_buf,
    579                              uint8* dst_argb,
    580                              const struct YuvConstants* yuvconstants,
    581                              int width);
    582 void I422ToARGBRow_NEON(const uint8* src_y,
    583                         const uint8* src_u,
    584                         const uint8* src_v,
    585                         uint8* dst_argb,
    586                         const struct YuvConstants* yuvconstants,
    587                         int width);
    588 void I411ToARGBRow_NEON(const uint8* src_y,
    589                         const uint8* src_u,
    590                         const uint8* src_v,
    591                         uint8* dst_argb,
    592                         const struct YuvConstants* yuvconstants,
    593                         int width);
    594 void I422ToRGBARow_NEON(const uint8* src_y,
    595                         const uint8* src_u,
    596                         const uint8* src_v,
    597                         uint8* dst_rgba,
    598                         const struct YuvConstants* yuvconstants,
    599                         int width);
    600 void I422ToRGB24Row_NEON(const uint8* src_y,
    601                          const uint8* src_u,
    602                          const uint8* src_v,
    603                          uint8* dst_rgb24,
    604                          const struct YuvConstants* yuvconstants,
    605                          int width);
    606 void I422ToRGB565Row_NEON(const uint8* src_y,
    607                           const uint8* src_u,
    608                           const uint8* src_v,
    609                           uint8* dst_rgb565,
    610                           const struct YuvConstants* yuvconstants,
    611                           int width);
    612 void I422ToARGB1555Row_NEON(const uint8* src_y,
    613                             const uint8* src_u,
    614                             const uint8* src_v,
    615                             uint8* dst_argb1555,
    616                             const struct YuvConstants* yuvconstants,
    617                             int width);
    618 void I422ToARGB4444Row_NEON(const uint8* src_y,
    619                             const uint8* src_u,
    620                             const uint8* src_v,
    621                             uint8* dst_argb4444,
    622                             const struct YuvConstants* yuvconstants,
    623                             int width);
    624 void NV12ToARGBRow_NEON(const uint8* src_y,
    625                         const uint8* src_uv,
    626                         uint8* dst_argb,
    627                         const struct YuvConstants* yuvconstants,
    628                         int width);
    629 void NV12ToRGB565Row_NEON(const uint8* src_y,
    630                           const uint8* src_uv,
    631                           uint8* dst_rgb565,
    632                           const struct YuvConstants* yuvconstants,
    633                           int width);
    634 void NV21ToARGBRow_NEON(const uint8* src_y,
    635                         const uint8* src_vu,
    636                         uint8* dst_argb,
    637                         const struct YuvConstants* yuvconstants,
    638                         int width);
    639 void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
    640                         uint8* dst_argb,
    641                         const struct YuvConstants* yuvconstants,
    642                         int width);
    643 void UYVYToARGBRow_NEON(const uint8* src_uyvy,
    644                         uint8* dst_argb,
    645                         const struct YuvConstants* yuvconstants,
    646                         int width);
    647 
    648 void ARGBToYRow_AVX2(const uint8* src_argb, uint8* dst_y, int width);
    649 void ARGBToYRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int width);
    650 void ARGBToYRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width);
    651 void ARGBToYJRow_AVX2(const uint8* src_argb, uint8* dst_y, int width);
    652 void ARGBToYJRow_Any_AVX2(const uint8* src_argb, uint8* dst_y, int width);
    653 void ARGBToYJRow_SSSE3(const uint8* src_argb, uint8* dst_y, int width);
    654 void BGRAToYRow_SSSE3(const uint8* src_bgra, uint8* dst_y, int width);
    655 void ABGRToYRow_SSSE3(const uint8* src_abgr, uint8* dst_y, int width);
    656 void RGBAToYRow_SSSE3(const uint8* src_rgba, uint8* dst_y, int width);
    657 void RGB24ToYRow_SSSE3(const uint8* src_rgb24, uint8* dst_y, int width);
    658 void RAWToYRow_SSSE3(const uint8* src_raw, uint8* dst_y, int width);
    659 void ARGBToYRow_NEON(const uint8* src_argb, uint8* dst_y, int width);
    660 void ARGBToYJRow_NEON(const uint8* src_argb, uint8* dst_y, int width);
    661 void ARGBToUV444Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
    662                          int width);
    663 void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
    664                          int width);
    665 void ARGBToUVRow_NEON(const uint8* src_argb, int src_stride_argb,
    666                       uint8* dst_u, uint8* dst_v, int width);
    667 void ARGBToUVJRow_NEON(const uint8* src_argb, int src_stride_argb,
    668                        uint8* dst_u, uint8* dst_v, int width);
    669 void BGRAToUVRow_NEON(const uint8* src_bgra, int src_stride_bgra,
    670                       uint8* dst_u, uint8* dst_v, int width);
    671 void ABGRToUVRow_NEON(const uint8* src_abgr, int src_stride_abgr,
    672                       uint8* dst_u, uint8* dst_v, int width);
    673 void RGBAToUVRow_NEON(const uint8* src_rgba, int src_stride_rgba,
    674                       uint8* dst_u, uint8* dst_v, int width);
    675 void RGB24ToUVRow_NEON(const uint8* src_rgb24, int src_stride_rgb24,
    676                        uint8* dst_u, uint8* dst_v, int width);
    677 void RAWToUVRow_NEON(const uint8* src_raw, int src_stride_raw,
    678                      uint8* dst_u, uint8* dst_v, int width);
    679 void RGB565ToUVRow_NEON(const uint8* src_rgb565, int src_stride_rgb565,
    680                         uint8* dst_u, uint8* dst_v, int width);
    681 void ARGB1555ToUVRow_NEON(const uint8* src_argb1555, int src_stride_argb1555,
    682                           uint8* dst_u, uint8* dst_v, int width);
    683 void ARGB4444ToUVRow_NEON(const uint8* src_argb4444, int src_stride_argb4444,
    684                           uint8* dst_u, uint8* dst_v, int width);
    685 void BGRAToYRow_NEON(const uint8* src_bgra, uint8* dst_y, int width);
    686 void ABGRToYRow_NEON(const uint8* src_abgr, uint8* dst_y, int width);
    687 void RGBAToYRow_NEON(const uint8* src_rgba, uint8* dst_y, int width);
    688 void RGB24ToYRow_NEON(const uint8* src_rgb24, uint8* dst_y, int width);
    689 void RAWToYRow_NEON(const uint8* src_raw, uint8* dst_y, int width);
    690 void RGB565ToYRow_NEON(const uint8* src_rgb565, uint8* dst_y, int width);
    691 void ARGB1555ToYRow_NEON(const uint8* src_argb1555, uint8* dst_y, int width);
    692 void ARGB4444ToYRow_NEON(const uint8* src_argb4444, uint8* dst_y, int width);
    693 void ARGBToYRow_C(const uint8* src_argb, uint8* dst_y, int width);
    694 void ARGBToYJRow_C(const uint8* src_argb, uint8* dst_y, int width);
    695 void BGRAToYRow_C(const uint8* src_bgra, uint8* dst_y, int width);
    696 void ABGRToYRow_C(const uint8* src_abgr, uint8* dst_y, int width);
    697 void RGBAToYRow_C(const uint8* src_rgba, uint8* dst_y, int width);
    698 void RGB24ToYRow_C(const uint8* src_rgb24, uint8* dst_y, int width);
    699 void RAWToYRow_C(const uint8* src_raw, uint8* dst_y, int width);
    700 void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width);
    701 void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width);
    702 void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width);
    703 void ARGBToYRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int width);
    704 void ARGBToYJRow_Any_SSSE3(const uint8* src_argb, uint8* dst_y, int width);
    705 void BGRAToYRow_Any_SSSE3(const uint8* src_bgra, uint8* dst_y, int width);
    706 void ABGRToYRow_Any_SSSE3(const uint8* src_abgr, uint8* dst_y, int width);
    707 void RGBAToYRow_Any_SSSE3(const uint8* src_rgba, uint8* dst_y, int width);
    708 void RGB24ToYRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_y, int width);
    709 void RAWToYRow_Any_SSSE3(const uint8* src_raw, uint8* dst_y, int width);
    710 void ARGBToYRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int width);
    711 void ARGBToYJRow_Any_NEON(const uint8* src_argb, uint8* dst_y, int width);
    712 void BGRAToYRow_Any_NEON(const uint8* src_bgra, uint8* dst_y, int width);
    713 void ABGRToYRow_Any_NEON(const uint8* src_abgr, uint8* dst_y, int width);
    714 void RGBAToYRow_Any_NEON(const uint8* src_rgba, uint8* dst_y, int width);
    715 void RGB24ToYRow_Any_NEON(const uint8* src_rgb24, uint8* dst_y, int width);
    716 void RAWToYRow_Any_NEON(const uint8* src_raw, uint8* dst_y, int width);
    717 void RGB565ToYRow_Any_NEON(const uint8* src_rgb565, uint8* dst_y, int width);
    718 void ARGB1555ToYRow_Any_NEON(const uint8* src_argb1555, uint8* dst_y,
    719                              int width);
    720 void ARGB4444ToYRow_Any_NEON(const uint8* src_argb4444, uint8* dst_y,
    721                              int width);
    722 
    723 void ARGBToUVRow_AVX2(const uint8* src_argb, int src_stride_argb,
    724                       uint8* dst_u, uint8* dst_v, int width);
    725 void ARGBToUVJRow_AVX2(const uint8* src_argb, int src_stride_argb,
    726                        uint8* dst_u, uint8* dst_v, int width);
    727 void ARGBToUVRow_SSSE3(const uint8* src_argb, int src_stride_argb,
    728                        uint8* dst_u, uint8* dst_v, int width);
    729 void ARGBToUVJRow_SSSE3(const uint8* src_argb, int src_stride_argb,
    730                         uint8* dst_u, uint8* dst_v, int width);
    731 void BGRAToUVRow_SSSE3(const uint8* src_bgra, int src_stride_bgra,
    732                        uint8* dst_u, uint8* dst_v, int width);
    733 void ABGRToUVRow_SSSE3(const uint8* src_abgr, int src_stride_abgr,
    734                        uint8* dst_u, uint8* dst_v, int width);
    735 void RGBAToUVRow_SSSE3(const uint8* src_rgba, int src_stride_rgba,
    736                        uint8* dst_u, uint8* dst_v, int width);
    737 void ARGBToUVRow_Any_AVX2(const uint8* src_argb, int src_stride_argb,
    738                           uint8* dst_u, uint8* dst_v, int width);
    739 void ARGBToUVJRow_Any_AVX2(const uint8* src_argb, int src_stride_argb,
    740                            uint8* dst_u, uint8* dst_v, int width);
    741 void ARGBToUVRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
    742                            uint8* dst_u, uint8* dst_v, int width);
    743 void ARGBToUVJRow_Any_SSSE3(const uint8* src_argb, int src_stride_argb,
    744                             uint8* dst_u, uint8* dst_v, int width);
    745 void BGRAToUVRow_Any_SSSE3(const uint8* src_bgra, int src_stride_bgra,
    746                            uint8* dst_u, uint8* dst_v, int width);
    747 void ABGRToUVRow_Any_SSSE3(const uint8* src_abgr, int src_stride_abgr,
    748                            uint8* dst_u, uint8* dst_v, int width);
    749 void RGBAToUVRow_Any_SSSE3(const uint8* src_rgba, int src_stride_rgba,
    750                            uint8* dst_u, uint8* dst_v, int width);
    751 void ARGBToUV444Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
    752                              int width);
    753 void ARGBToUV411Row_Any_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
    754                              int width);
    755 void ARGBToUVRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
    756                           uint8* dst_u, uint8* dst_v, int width);
    757 void ARGBToUVJRow_Any_NEON(const uint8* src_argb, int src_stride_argb,
    758                            uint8* dst_u, uint8* dst_v, int width);
    759 void BGRAToUVRow_Any_NEON(const uint8* src_bgra, int src_stride_bgra,
    760                           uint8* dst_u, uint8* dst_v, int width);
    761 void ABGRToUVRow_Any_NEON(const uint8* src_abgr, int src_stride_abgr,
    762                           uint8* dst_u, uint8* dst_v, int width);
    763 void RGBAToUVRow_Any_NEON(const uint8* src_rgba, int src_stride_rgba,
    764                           uint8* dst_u, uint8* dst_v, int width);
    765 void RGB24ToUVRow_Any_NEON(const uint8* src_rgb24, int src_stride_rgb24,
    766                            uint8* dst_u, uint8* dst_v, int width);
    767 void RAWToUVRow_Any_NEON(const uint8* src_raw, int src_stride_raw,
    768                          uint8* dst_u, uint8* dst_v, int width);
    769 void RGB565ToUVRow_Any_NEON(const uint8* src_rgb565, int src_stride_rgb565,
    770                             uint8* dst_u, uint8* dst_v, int width);
    771 void ARGB1555ToUVRow_Any_NEON(const uint8* src_argb1555,
    772                               int src_stride_argb1555,
    773                               uint8* dst_u, uint8* dst_v, int width);
    774 void ARGB4444ToUVRow_Any_NEON(const uint8* src_argb4444,
    775                               int src_stride_argb4444,
    776                               uint8* dst_u, uint8* dst_v, int width);
    777 void ARGBToUVRow_C(const uint8* src_argb, int src_stride_argb,
    778                    uint8* dst_u, uint8* dst_v, int width);
    779 void ARGBToUVJRow_C(const uint8* src_argb, int src_stride_argb,
    780                     uint8* dst_u, uint8* dst_v, int width);
    781 void BGRAToUVRow_C(const uint8* src_bgra, int src_stride_bgra,
    782                    uint8* dst_u, uint8* dst_v, int width);
    783 void ABGRToUVRow_C(const uint8* src_abgr, int src_stride_abgr,
    784                    uint8* dst_u, uint8* dst_v, int width);
    785 void RGBAToUVRow_C(const uint8* src_rgba, int src_stride_rgba,
    786                    uint8* dst_u, uint8* dst_v, int width);
    787 void RGB24ToUVRow_C(const uint8* src_rgb24, int src_stride_rgb24,
    788                     uint8* dst_u, uint8* dst_v, int width);
    789 void RAWToUVRow_C(const uint8* src_raw, int src_stride_raw,
    790                   uint8* dst_u, uint8* dst_v, int width);
    791 void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
    792                      uint8* dst_u, uint8* dst_v, int width);
    793 void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
    794                        uint8* dst_u, uint8* dst_v, int width);
    795 void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
    796                        uint8* dst_u, uint8* dst_v, int width);
    797 
    798 void ARGBToUV444Row_SSSE3(const uint8* src_argb,
    799                           uint8* dst_u, uint8* dst_v, int width);
    800 void ARGBToUV444Row_Any_SSSE3(const uint8* src_argb,
    801                               uint8* dst_u, uint8* dst_v, int width);
    802 
    803 void ARGBToUV444Row_C(const uint8* src_argb,
    804                       uint8* dst_u, uint8* dst_v, int width);
    805 void ARGBToUV411Row_C(const uint8* src_argb,
    806                       uint8* dst_u, uint8* dst_v, int width);
    807 
    808 void MirrorRow_AVX2(const uint8* src, uint8* dst, int width);
    809 void MirrorRow_SSSE3(const uint8* src, uint8* dst, int width);
    810 void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
    811 void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width);
    812 void MirrorRow_C(const uint8* src, uint8* dst, int width);
    813 void MirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
    814 void MirrorRow_Any_SSSE3(const uint8* src, uint8* dst, int width);
    815 void MirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width);
    816 void MirrorRow_Any_NEON(const uint8* src, uint8* dst, int width);
    817 
    818 void MirrorUVRow_SSSE3(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    819                        int width);
    820 void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    821                       int width);
    822 void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    823                        int width);
    824 void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width);
    825 
    826 void ARGBMirrorRow_AVX2(const uint8* src, uint8* dst, int width);
    827 void ARGBMirrorRow_SSE2(const uint8* src, uint8* dst, int width);
    828 void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width);
    829 void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width);
    830 void ARGBMirrorRow_Any_AVX2(const uint8* src, uint8* dst, int width);
    831 void ARGBMirrorRow_Any_SSE2(const uint8* src, uint8* dst, int width);
    832 void ARGBMirrorRow_Any_NEON(const uint8* src, uint8* dst, int width);
    833 
    834 void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width);
    835 void SplitUVRow_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    836                      int width);
    837 void SplitUVRow_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    838                      int width);
    839 void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    840                      int width);
    841 void SplitUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    842                       int width);
    843 void SplitUVRow_Any_SSE2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    844                          int width);
    845 void SplitUVRow_Any_AVX2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    846                          int width);
    847 void SplitUVRow_Any_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    848                          int width);
    849 void SplitUVRow_Any_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
    850                           int width);
    851 
    852 void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    853                   int width);
    854 void MergeUVRow_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    855                      int width);
    856 void MergeUVRow_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    857                      int width);
    858 void MergeUVRow_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    859                      int width);
    860 void MergeUVRow_Any_SSE2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    861                          int width);
    862 void MergeUVRow_Any_AVX2(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    863                          int width);
    864 void MergeUVRow_Any_NEON(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
    865                          int width);
    866 
    867 void CopyRow_SSE2(const uint8* src, uint8* dst, int count);
    868 void CopyRow_AVX(const uint8* src, uint8* dst, int count);
    869 void CopyRow_ERMS(const uint8* src, uint8* dst, int count);
    870 void CopyRow_NEON(const uint8* src, uint8* dst, int count);
    871 void CopyRow_MIPS(const uint8* src, uint8* dst, int count);
    872 void CopyRow_C(const uint8* src, uint8* dst, int count);
    873 void CopyRow_Any_SSE2(const uint8* src, uint8* dst, int count);
    874 void CopyRow_Any_AVX(const uint8* src, uint8* dst, int count);
    875 void CopyRow_Any_NEON(const uint8* src, uint8* dst, int count);
    876 
    877 void CopyRow_16_C(const uint16* src, uint16* dst, int count);
    878 
    879 void ARGBCopyAlphaRow_C(const uint8* src_argb, uint8* dst_argb, int width);
    880 void ARGBCopyAlphaRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
    881 void ARGBCopyAlphaRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
    882 void ARGBCopyAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
    883                                int width);
    884 void ARGBCopyAlphaRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
    885                                int width);
    886 
    887 void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width);
    888 void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width);
    889 void ARGBExtractAlphaRow_NEON(const uint8* src_argb, uint8* dst_a, int width);
    890 void ARGBExtractAlphaRow_Any_SSE2(const uint8* src_argb, uint8* dst_a,
    891                                   int width);
    892 void ARGBExtractAlphaRow_Any_NEON(const uint8* src_argb, uint8* dst_a,
    893                                   int width);
    894 
    895 void ARGBCopyYToAlphaRow_C(const uint8* src_y, uint8* dst_argb, int width);
    896 void ARGBCopyYToAlphaRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
    897 void ARGBCopyYToAlphaRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
    898 void ARGBCopyYToAlphaRow_Any_SSE2(const uint8* src_y, uint8* dst_argb,
    899                                   int width);
    900 void ARGBCopyYToAlphaRow_Any_AVX2(const uint8* src_y, uint8* dst_argb,
    901                                   int width);
    902 
    903 void SetRow_C(uint8* dst, uint8 v8, int count);
    904 void SetRow_X86(uint8* dst, uint8 v8, int count);
    905 void SetRow_ERMS(uint8* dst, uint8 v8, int count);
    906 void SetRow_NEON(uint8* dst, uint8 v8, int count);
    907 void SetRow_Any_X86(uint8* dst, uint8 v8, int count);
    908 void SetRow_Any_NEON(uint8* dst, uint8 v8, int count);
    909 
    910 void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int count);
    911 void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count);
    912 void ARGBSetRow_NEON(uint8* dst_argb, uint32 v32, int count);
    913 void ARGBSetRow_Any_NEON(uint8* dst_argb, uint32 v32, int count);
    914 
    915 // ARGBShufflers for BGRAToARGB etc.
    916 void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
    917                       const uint8* shuffler, int width);
    918 void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
    919                          const uint8* shuffler, int width);
    920 void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
    921                           const uint8* shuffler, int width);
    922 void ARGBShuffleRow_AVX2(const uint8* src_argb, uint8* dst_argb,
    923                          const uint8* shuffler, int width);
    924 void ARGBShuffleRow_NEON(const uint8* src_argb, uint8* dst_argb,
    925                          const uint8* shuffler, int width);
    926 void ARGBShuffleRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
    927                              const uint8* shuffler, int width);
    928 void ARGBShuffleRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
    929                               const uint8* shuffler, int width);
    930 void ARGBShuffleRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
    931                              const uint8* shuffler, int width);
    932 void ARGBShuffleRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
    933                              const uint8* shuffler, int width);
    934 
    935 void RGB24ToARGBRow_SSSE3(const uint8* src_rgb24, uint8* dst_argb, int width);
    936 void RAWToARGBRow_SSSE3(const uint8* src_raw, uint8* dst_argb, int width);
    937 void RAWToRGB24Row_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width);
    938 void RGB565ToARGBRow_SSE2(const uint8* src_rgb565, uint8* dst_argb, int width);
    939 void ARGB1555ToARGBRow_SSE2(const uint8* src_argb1555, uint8* dst_argb,
    940                             int width);
    941 void ARGB4444ToARGBRow_SSE2(const uint8* src_argb4444, uint8* dst_argb,
    942                             int width);
    943 void RGB565ToARGBRow_AVX2(const uint8* src_rgb565, uint8* dst_argb, int width);
    944 void ARGB1555ToARGBRow_AVX2(const uint8* src_argb1555, uint8* dst_argb,
    945                             int width);
    946 void ARGB4444ToARGBRow_AVX2(const uint8* src_argb4444, uint8* dst_argb,
    947                             int width);
    948 
    949 void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width);
    950 void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width);
    951 void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width);
    952 void RGB565ToARGBRow_NEON(const uint8* src_rgb565, uint8* dst_argb, int width);
    953 void ARGB1555ToARGBRow_NEON(const uint8* src_argb1555, uint8* dst_argb,
    954                             int width);
    955 void ARGB4444ToARGBRow_NEON(const uint8* src_argb4444, uint8* dst_argb,
    956                             int width);
    957 void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width);
    958 void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width);
    959 void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width);
    960 void RGB565ToARGBRow_C(const uint8* src_rgb, uint8* dst_argb, int width);
    961 void ARGB1555ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int width);
    962 void ARGB4444ToARGBRow_C(const uint8* src_argb, uint8* dst_argb, int width);
    963 void RGB24ToARGBRow_Any_SSSE3(const uint8* src_rgb24, uint8* dst_argb,
    964                               int width);
    965 void RAWToARGBRow_Any_SSSE3(const uint8* src_raw, uint8* dst_argb, int width);
    966 void RAWToRGB24Row_Any_SSSE3(const uint8* src_raw, uint8* dst_rgb24, int width);
    967 
    968 void RGB565ToARGBRow_Any_SSE2(const uint8* src_rgb565, uint8* dst_argb,
    969                               int width);
    970 void ARGB1555ToARGBRow_Any_SSE2(const uint8* src_argb1555, uint8* dst_argb,
    971                                 int width);
    972 void ARGB4444ToARGBRow_Any_SSE2(const uint8* src_argb4444, uint8* dst_argb,
    973                                 int width);
    974 void RGB565ToARGBRow_Any_AVX2(const uint8* src_rgb565, uint8* dst_argb,
    975                               int width);
    976 void ARGB1555ToARGBRow_Any_AVX2(const uint8* src_argb1555, uint8* dst_argb,
    977                                 int width);
    978 void ARGB4444ToARGBRow_Any_AVX2(const uint8* src_argb4444, uint8* dst_argb,
    979                                 int width);
    980 
    981 void RGB24ToARGBRow_Any_NEON(const uint8* src_rgb24, uint8* dst_argb,
    982                              int width);
    983 void RAWToARGBRow_Any_NEON(const uint8* src_raw, uint8* dst_argb, int width);
    984 void RAWToRGB24Row_Any_NEON(const uint8* src_raw, uint8* dst_rgb24, int width);
    985 void RGB565ToARGBRow_Any_NEON(const uint8* src_rgb565, uint8* dst_argb,
    986                               int width);
    987 void ARGB1555ToARGBRow_Any_NEON(const uint8* src_argb1555, uint8* dst_argb,
    988                                 int width);
    989 void ARGB4444ToARGBRow_Any_NEON(const uint8* src_argb4444, uint8* dst_argb,
    990                                 int width);
    991 
    992 void ARGBToRGB24Row_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
    993 void ARGBToRAWRow_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
    994 void ARGBToRGB565Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width);
    995 void ARGBToARGB1555Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width);
    996 void ARGBToARGB4444Row_SSE2(const uint8* src_argb, uint8* dst_rgb, int width);
    997 
    998 void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb,
    999                              const uint32 dither4, int width);
   1000 void ARGBToRGB565DitherRow_SSE2(const uint8* src_argb, uint8* dst_rgb,
   1001                                 const uint32 dither4, int width);
   1002 void ARGBToRGB565DitherRow_AVX2(const uint8* src_argb, uint8* dst_rgb,
   1003                                 const uint32 dither4, int width);
   1004 
   1005 void ARGBToRGB565Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
   1006 void ARGBToARGB1555Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
   1007 void ARGBToARGB4444Row_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
   1008 
   1009 void ARGBToRGB24Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
   1010 void ARGBToRAWRow_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
   1011 void ARGBToRGB565Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
   1012 void ARGBToARGB1555Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
   1013 void ARGBToARGB4444Row_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
   1014 void ARGBToRGB565DitherRow_NEON(const uint8* src_argb, uint8* dst_rgb,
   1015                                 const uint32 dither4, int width);
   1016 
   1017 void ARGBToRGBARow_C(const uint8* src_argb, uint8* dst_rgb, int width);
   1018 void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
   1019 void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width);
   1020 void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
   1021 void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
   1022 void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width);
   1023 
   1024 void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
   1025 void J400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
   1026 void J400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width);
   1027 void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width);
   1028 void J400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int width);
   1029 void J400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int width);
   1030 void J400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int width);
   1031 
   1032 void I444ToARGBRow_C(const uint8* src_y,
   1033                      const uint8* src_u,
   1034                      const uint8* src_v,
   1035                      uint8* dst_argb,
   1036                      const struct YuvConstants* yuvconstants,
   1037                      int width);
   1038 void I422ToARGBRow_C(const uint8* src_y,
   1039                      const uint8* src_u,
   1040                      const uint8* src_v,
   1041                      uint8* dst_argb,
   1042                      const struct YuvConstants* yuvconstants,
   1043                      int width);
   1044 void I422ToARGBRow_C(const uint8* src_y,
   1045                      const uint8* src_u,
   1046                      const uint8* src_v,
   1047                      uint8* dst_argb,
   1048                      const struct YuvConstants* yuvconstants,
   1049                      int width);
   1050 void I422AlphaToARGBRow_C(const uint8* y_buf,
   1051                           const uint8* u_buf,
   1052                           const uint8* v_buf,
   1053                           const uint8* a_buf,
   1054                           uint8* dst_argb,
   1055                           const struct YuvConstants* yuvconstants,
   1056                           int width);
   1057 void I411ToARGBRow_C(const uint8* src_y,
   1058                      const uint8* src_u,
   1059                      const uint8* src_v,
   1060                      uint8* dst_argb,
   1061                      const struct YuvConstants* yuvconstants,
   1062                      int width);
   1063 void NV12ToARGBRow_C(const uint8* src_y,
   1064                      const uint8* src_uv,
   1065                      uint8* dst_argb,
   1066                      const struct YuvConstants* yuvconstants,
   1067                      int width);
   1068 void NV12ToRGB565Row_C(const uint8* src_y,
   1069                        const uint8* src_uv,
   1070                        uint8* dst_argb,
   1071                        const struct YuvConstants* yuvconstants,
   1072                        int width);
   1073 void NV21ToARGBRow_C(const uint8* src_y,
   1074                      const uint8* src_uv,
   1075                      uint8* dst_argb,
   1076                      const struct YuvConstants* yuvconstants,
   1077                      int width);
   1078 void YUY2ToARGBRow_C(const uint8* src_yuy2,
   1079                      uint8* dst_argb,
   1080                      const struct YuvConstants* yuvconstants,
   1081                      int width);
   1082 void UYVYToARGBRow_C(const uint8* src_uyvy,
   1083                      uint8* dst_argb,
   1084                      const struct YuvConstants* yuvconstants,
   1085                      int width);
   1086 void I422ToRGBARow_C(const uint8* src_y,
   1087                      const uint8* src_u,
   1088                      const uint8* src_v,
   1089                      uint8* dst_rgba,
   1090                      const struct YuvConstants* yuvconstants,
   1091                      int width);
   1092 void I422ToRGB24Row_C(const uint8* src_y,
   1093                       const uint8* src_u,
   1094                       const uint8* src_v,
   1095                       uint8* dst_rgb24,
   1096                       const struct YuvConstants* yuvconstants,
   1097                       int width);
   1098 void I422ToARGB4444Row_C(const uint8* src_y,
   1099                          const uint8* src_u,
   1100                          const uint8* src_v,
   1101                          uint8* dst_argb4444,
   1102                          const struct YuvConstants* yuvconstants,
   1103                          int width);
   1104 void I422ToARGB1555Row_C(const uint8* src_y,
   1105                          const uint8* src_u,
   1106                          const uint8* src_v,
   1107                          uint8* dst_argb4444,
   1108                          const struct YuvConstants* yuvconstants,
   1109                          int width);
   1110 void I422ToRGB565Row_C(const uint8* src_y,
   1111                        const uint8* src_u,
   1112                        const uint8* src_v,
   1113                        uint8* dst_rgb565,
   1114                        const struct YuvConstants* yuvconstants,
   1115                        int width);
   1116 void I422ToARGBRow_AVX2(const uint8* src_y,
   1117                         const uint8* src_u,
   1118                         const uint8* src_v,
   1119                         uint8* dst_argb,
   1120                         const struct YuvConstants* yuvconstants,
   1121                         int width);
   1122 void I422ToARGBRow_AVX2(const uint8* src_y,
   1123                         const uint8* src_u,
   1124                         const uint8* src_v,
   1125                         uint8* dst_argb,
   1126                         const struct YuvConstants* yuvconstants,
   1127                         int width);
   1128 void I422ToRGBARow_AVX2(const uint8* src_y,
   1129                         const uint8* src_u,
   1130                         const uint8* src_v,
   1131                         uint8* dst_argb,
   1132                         const struct YuvConstants* yuvconstants,
   1133                         int width);
   1134 void I444ToARGBRow_SSSE3(const uint8* src_y,
   1135                          const uint8* src_u,
   1136                          const uint8* src_v,
   1137                          uint8* dst_argb,
   1138                          const struct YuvConstants* yuvconstants,
   1139                          int width);
   1140 void I444ToARGBRow_AVX2(const uint8* src_y,
   1141                         const uint8* src_u,
   1142                         const uint8* src_v,
   1143                         uint8* dst_argb,
   1144                         const struct YuvConstants* yuvconstants,
   1145                         int width);
   1146 void I444ToARGBRow_SSSE3(const uint8* src_y,
   1147                          const uint8* src_u,
   1148                          const uint8* src_v,
   1149                          uint8* dst_argb,
   1150                          const struct YuvConstants* yuvconstants,
   1151                          int width);
   1152 void I444ToARGBRow_AVX2(const uint8* src_y,
   1153                         const uint8* src_u,
   1154                         const uint8* src_v,
   1155                         uint8* dst_argb,
   1156                         const struct YuvConstants* yuvconstants,
   1157                         int width);
   1158 void I422ToARGBRow_SSSE3(const uint8* src_y,
   1159                          const uint8* src_u,
   1160                          const uint8* src_v,
   1161                          uint8* dst_argb,
   1162                          const struct YuvConstants* yuvconstants,
   1163                          int width);
   1164 void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
   1165                               const uint8* u_buf,
   1166                               const uint8* v_buf,
   1167                               const uint8* a_buf,
   1168                               uint8* dst_argb,
   1169                               const struct YuvConstants* yuvconstants,
   1170                               int width);
   1171 void I422AlphaToARGBRow_AVX2(const uint8* y_buf,
   1172                              const uint8* u_buf,
   1173                              const uint8* v_buf,
   1174                              const uint8* a_buf,
   1175                              uint8* dst_argb,
   1176                              const struct YuvConstants* yuvconstants,
   1177                              int width);
   1178 void I422ToARGBRow_SSSE3(const uint8* src_y,
   1179                          const uint8* src_u,
   1180                          const uint8* src_v,
   1181                          uint8* dst_argb,
   1182                          const struct YuvConstants* yuvconstants,
   1183                          int width);
   1184 void I411ToARGBRow_SSSE3(const uint8* src_y,
   1185                          const uint8* src_u,
   1186                          const uint8* src_v,
   1187                          uint8* dst_argb,
   1188                          const struct YuvConstants* yuvconstants,
   1189                          int width);
   1190 void I411ToARGBRow_AVX2(const uint8* src_y,
   1191                         const uint8* src_u,
   1192                         const uint8* src_v,
   1193                         uint8* dst_argb,
   1194                         const struct YuvConstants* yuvconstants,
   1195                         int width);
   1196 void NV12ToARGBRow_SSSE3(const uint8* src_y,
   1197                          const uint8* src_uv,
   1198                          uint8* dst_argb,
   1199                          const struct YuvConstants* yuvconstants,
   1200                          int width);
   1201 void NV12ToARGBRow_AVX2(const uint8* src_y,
   1202                         const uint8* src_uv,
   1203                         uint8* dst_argb,
   1204                         const struct YuvConstants* yuvconstants,
   1205                         int width);
   1206 void NV12ToRGB565Row_SSSE3(const uint8* src_y,
   1207                            const uint8* src_uv,
   1208                            uint8* dst_argb,
   1209                            const struct YuvConstants* yuvconstants,
   1210                            int width);
   1211 void NV12ToRGB565Row_AVX2(const uint8* src_y,
   1212                           const uint8* src_uv,
   1213                           uint8* dst_argb,
   1214                           const struct YuvConstants* yuvconstants,
   1215                           int width);
   1216 void NV21ToARGBRow_SSSE3(const uint8* src_y,
   1217                          const uint8* src_uv,
   1218                          uint8* dst_argb,
   1219                          const struct YuvConstants* yuvconstants,
   1220                          int width);
   1221 void NV21ToARGBRow_AVX2(const uint8* src_y,
   1222                         const uint8* src_uv,
   1223                         uint8* dst_argb,
   1224                         const struct YuvConstants* yuvconstants,
   1225                         int width);
   1226 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
   1227                          uint8* dst_argb,
   1228                          const struct YuvConstants* yuvconstants,
   1229                          int width);
   1230 void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
   1231                          uint8* dst_argb,
   1232                          const struct YuvConstants* yuvconstants,
   1233                          int width);
   1234 void YUY2ToARGBRow_AVX2(const uint8* src_yuy2,
   1235                         uint8* dst_argb,
   1236                         const struct YuvConstants* yuvconstants,
   1237                         int width);
   1238 void UYVYToARGBRow_AVX2(const uint8* src_uyvy,
   1239                         uint8* dst_argb,
   1240                         const struct YuvConstants* yuvconstants,
   1241                         int width);
   1242 void I422ToRGBARow_SSSE3(const uint8* src_y,
   1243                          const uint8* src_u,
   1244                          const uint8* src_v,
   1245                          uint8* dst_rgba,
   1246                          const struct YuvConstants* yuvconstants,
   1247                          int width);
   1248 void I422ToARGB4444Row_SSSE3(const uint8* src_y,
   1249                              const uint8* src_u,
   1250                              const uint8* src_v,
   1251                              uint8* dst_argb,
   1252                              const struct YuvConstants* yuvconstants,
   1253                              int width);
   1254 void I422ToARGB4444Row_AVX2(const uint8* src_y,
   1255                             const uint8* src_u,
   1256                             const uint8* src_v,
   1257                             uint8* dst_argb,
   1258                             const struct YuvConstants* yuvconstants,
   1259                             int width);
   1260 void I422ToARGB1555Row_SSSE3(const uint8* src_y,
   1261                              const uint8* src_u,
   1262                              const uint8* src_v,
   1263                              uint8* dst_argb,
   1264                              const struct YuvConstants* yuvconstants,
   1265                              int width);
   1266 void I422ToARGB1555Row_AVX2(const uint8* src_y,
   1267                             const uint8* src_u,
   1268                             const uint8* src_v,
   1269                             uint8* dst_argb,
   1270                             const struct YuvConstants* yuvconstants,
   1271                             int width);
   1272 void I422ToRGB565Row_SSSE3(const uint8* src_y,
   1273                            const uint8* src_u,
   1274                            const uint8* src_v,
   1275                            uint8* dst_argb,
   1276                            const struct YuvConstants* yuvconstants,
   1277                            int width);
   1278 void I422ToRGB565Row_AVX2(const uint8* src_y,
   1279                           const uint8* src_u,
   1280                           const uint8* src_v,
   1281                           uint8* dst_argb,
   1282                           const struct YuvConstants* yuvconstants,
   1283                           int width);
   1284 void I422ToRGB24Row_SSSE3(const uint8* src_y,
   1285                           const uint8* src_u,
   1286                           const uint8* src_v,
   1287                           uint8* dst_rgb24,
   1288                           const struct YuvConstants* yuvconstants,
   1289                           int width);
   1290 void I422ToRGB24Row_AVX2(const uint8* src_y,
   1291                          const uint8* src_u,
   1292                          const uint8* src_v,
   1293                          uint8* dst_rgb24,
   1294                          const struct YuvConstants* yuvconstants,
   1295                          int width);
   1296 void I422ToARGBRow_Any_AVX2(const uint8* src_y,
   1297                             const uint8* src_u,
   1298                             const uint8* src_v,
   1299                             uint8* dst_argb,
   1300                             const struct YuvConstants* yuvconstants,
   1301                             int width);
   1302 void I422ToRGBARow_Any_AVX2(const uint8* src_y,
   1303                             const uint8* src_u,
   1304                             const uint8* src_v,
   1305                             uint8* dst_argb,
   1306                             const struct YuvConstants* yuvconstants,
   1307                             int width);
   1308 void I444ToARGBRow_Any_SSSE3(const uint8* src_y,
   1309                              const uint8* src_u,
   1310                              const uint8* src_v,
   1311                              uint8* dst_argb,
   1312                              const struct YuvConstants* yuvconstants,
   1313                              int width);
   1314 void I444ToARGBRow_Any_AVX2(const uint8* src_y,
   1315                             const uint8* src_u,
   1316                             const uint8* src_v,
   1317                             uint8* dst_argb,
   1318                             const struct YuvConstants* yuvconstants,
   1319                             int width);
   1320 void I422ToARGBRow_Any_SSSE3(const uint8* src_y,
   1321                              const uint8* src_u,
   1322                              const uint8* src_v,
   1323                              uint8* dst_argb,
   1324                              const struct YuvConstants* yuvconstants,
   1325                              int width);
   1326 void I422AlphaToARGBRow_Any_SSSE3(const uint8* y_buf,
   1327                                   const uint8* u_buf,
   1328                                   const uint8* v_buf,
   1329                                   const uint8* a_buf,
   1330                                   uint8* dst_argb,
   1331                                   const struct YuvConstants* yuvconstants,
   1332                                   int width);
   1333 void I422AlphaToARGBRow_Any_AVX2(const uint8* y_buf,
   1334                                  const uint8* u_buf,
   1335                                  const uint8* v_buf,
   1336                                  const uint8* a_buf,
   1337                                  uint8* dst_argb,
   1338                                  const struct YuvConstants* yuvconstants,
   1339                                  int width);
   1340 void I411ToARGBRow_Any_SSSE3(const uint8* src_y,
   1341                              const uint8* src_u,
   1342                              const uint8* src_v,
   1343                              uint8* dst_argb,
   1344                              const struct YuvConstants* yuvconstants,
   1345                              int width);
   1346 void I411ToARGBRow_Any_AVX2(const uint8* src_y,
   1347                             const uint8* src_u,
   1348                             const uint8* src_v,
   1349                             uint8* dst_argb,
   1350                             const struct YuvConstants* yuvconstants,
   1351                             int width);
   1352 void NV12ToARGBRow_Any_SSSE3(const uint8* src_y,
   1353                              const uint8* src_uv,
   1354                              uint8* dst_argb,
   1355                              const struct YuvConstants* yuvconstants,
   1356                              int width);
   1357 void NV12ToARGBRow_Any_AVX2(const uint8* src_y,
   1358                             const uint8* src_uv,
   1359                             uint8* dst_argb,
   1360                             const struct YuvConstants* yuvconstants,
   1361                             int width);
   1362 void NV21ToARGBRow_Any_SSSE3(const uint8* src_y,
   1363                              const uint8* src_vu,
   1364                              uint8* dst_argb,
   1365                              const struct YuvConstants* yuvconstants,
   1366                              int width);
   1367 void NV21ToARGBRow_Any_AVX2(const uint8* src_y,
   1368                             const uint8* src_vu,
   1369                             uint8* dst_argb,
   1370                             const struct YuvConstants* yuvconstants,
   1371                             int width);
   1372 void NV12ToRGB565Row_Any_SSSE3(const uint8* src_y,
   1373                                const uint8* src_uv,
   1374                                uint8* dst_argb,
   1375                                const struct YuvConstants* yuvconstants,
   1376                                int width);
   1377 void NV12ToRGB565Row_Any_AVX2(const uint8* src_y,
   1378                               const uint8* src_uv,
   1379                               uint8* dst_argb,
   1380                               const struct YuvConstants* yuvconstants,
   1381                               int width);
   1382 void YUY2ToARGBRow_Any_SSSE3(const uint8* src_yuy2,
   1383                              uint8* dst_argb,
   1384                              const struct YuvConstants* yuvconstants,
   1385                              int width);
   1386 void UYVYToARGBRow_Any_SSSE3(const uint8* src_uyvy,
   1387                              uint8* dst_argb,
   1388                              const struct YuvConstants* yuvconstants,
   1389                              int width);
   1390 void YUY2ToARGBRow_Any_AVX2(const uint8* src_yuy2,
   1391                             uint8* dst_argb,
   1392                             const struct YuvConstants* yuvconstants,
   1393                             int width);
   1394 void UYVYToARGBRow_Any_AVX2(const uint8* src_uyvy,
   1395                             uint8* dst_argb,
   1396                             const struct YuvConstants* yuvconstants,
   1397                             int width);
   1398 void I422ToRGBARow_Any_SSSE3(const uint8* src_y,
   1399                              const uint8* src_u,
   1400                              const uint8* src_v,
   1401                              uint8* dst_rgba,
   1402                              const struct YuvConstants* yuvconstants,
   1403                              int width);
   1404 void I422ToARGB4444Row_Any_SSSE3(const uint8* src_y,
   1405                                  const uint8* src_u,
   1406                                  const uint8* src_v,
   1407                                  uint8* dst_rgba,
   1408                                  const struct YuvConstants* yuvconstants,
   1409                                  int width);
   1410 void I422ToARGB4444Row_Any_AVX2(const uint8* src_y,
   1411                                 const uint8* src_u,
   1412                                 const uint8* src_v,
   1413                                 uint8* dst_rgba,
   1414                                 const struct YuvConstants* yuvconstants,
   1415                                 int width);
   1416 void I422ToARGB1555Row_Any_SSSE3(const uint8* src_y,
   1417                                  const uint8* src_u,
   1418                                  const uint8* src_v,
   1419                                  uint8* dst_rgba,
   1420                                  const struct YuvConstants* yuvconstants,
   1421                                  int width);
   1422 void I422ToARGB1555Row_Any_AVX2(const uint8* src_y,
   1423                                 const uint8* src_u,
   1424                                 const uint8* src_v,
   1425                                 uint8* dst_rgba,
   1426                                 const struct YuvConstants* yuvconstants,
   1427                                 int width);
   1428 void I422ToRGB565Row_Any_SSSE3(const uint8* src_y,
   1429                                const uint8* src_u,
   1430                                const uint8* src_v,
   1431                                uint8* dst_rgba,
   1432                                const struct YuvConstants* yuvconstants,
   1433                                int width);
   1434 void I422ToRGB565Row_Any_AVX2(const uint8* src_y,
   1435                               const uint8* src_u,
   1436                               const uint8* src_v,
   1437                               uint8* dst_rgba,
   1438                               const struct YuvConstants* yuvconstants,
   1439                               int width);
   1440 void I422ToRGB24Row_Any_SSSE3(const uint8* src_y,
   1441                               const uint8* src_u,
   1442                               const uint8* src_v,
   1443                               uint8* dst_argb,
   1444                               const struct YuvConstants* yuvconstants,
   1445                               int width);
   1446 void I422ToRGB24Row_Any_AVX2(const uint8* src_y,
   1447                              const uint8* src_u,
   1448                              const uint8* src_v,
   1449                              uint8* dst_argb,
   1450                              const struct YuvConstants* yuvconstants,
   1451                              int width);
   1452 
   1453 void I400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width);
   1454 void I400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int width);
   1455 void I400ToARGBRow_AVX2(const uint8* src_y, uint8* dst_argb, int width);
   1456 void I400ToARGBRow_NEON(const uint8* src_y, uint8* dst_argb, int width);
   1457 void I400ToARGBRow_Any_SSE2(const uint8* src_y, uint8* dst_argb, int width);
   1458 void I400ToARGBRow_Any_AVX2(const uint8* src_y, uint8* dst_argb, int width);
   1459 void I400ToARGBRow_Any_NEON(const uint8* src_y, uint8* dst_argb, int width);
   1460 
   1461 // ARGB preattenuated alpha blend.
   1462 void ARGBBlendRow_SSSE3(const uint8* src_argb, const uint8* src_argb1,
   1463                         uint8* dst_argb, int width);
   1464 void ARGBBlendRow_NEON(const uint8* src_argb, const uint8* src_argb1,
   1465                        uint8* dst_argb, int width);
   1466 void ARGBBlendRow_C(const uint8* src_argb, const uint8* src_argb1,
   1467                     uint8* dst_argb, int width);
   1468 
   1469 // Unattenuated planar alpha blend.
   1470 void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1,
   1471                          const uint8* alpha, uint8* dst, int width);
   1472 void BlendPlaneRow_Any_SSSE3(const uint8* src0, const uint8* src1,
   1473                              const uint8* alpha, uint8* dst, int width);
   1474 void BlendPlaneRow_AVX2(const uint8* src0, const uint8* src1,
   1475                         const uint8* alpha, uint8* dst, int width);
   1476 void BlendPlaneRow_Any_AVX2(const uint8* src0, const uint8* src1,
   1477                             const uint8* alpha, uint8* dst, int width);
   1478 void BlendPlaneRow_C(const uint8* src0, const uint8* src1,
   1479                      const uint8* alpha, uint8* dst, int width);
   1480 
   1481 // ARGB multiply images. Same API as Blend, but these require
   1482 // pointer and width alignment for SSE2.
   1483 void ARGBMultiplyRow_C(const uint8* src_argb, const uint8* src_argb1,
   1484                        uint8* dst_argb, int width);
   1485 void ARGBMultiplyRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1486                           uint8* dst_argb, int width);
   1487 void ARGBMultiplyRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1488                               uint8* dst_argb, int width);
   1489 void ARGBMultiplyRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1490                           uint8* dst_argb, int width);
   1491 void ARGBMultiplyRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1492                               uint8* dst_argb, int width);
   1493 void ARGBMultiplyRow_NEON(const uint8* src_argb, const uint8* src_argb1,
   1494                           uint8* dst_argb, int width);
   1495 void ARGBMultiplyRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
   1496                               uint8* dst_argb, int width);
   1497 
   1498 // ARGB add images.
   1499 void ARGBAddRow_C(const uint8* src_argb, const uint8* src_argb1,
   1500                   uint8* dst_argb, int width);
   1501 void ARGBAddRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1502                      uint8* dst_argb, int width);
   1503 void ARGBAddRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1504                          uint8* dst_argb, int width);
   1505 void ARGBAddRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1506                      uint8* dst_argb, int width);
   1507 void ARGBAddRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1508                          uint8* dst_argb, int width);
   1509 void ARGBAddRow_NEON(const uint8* src_argb, const uint8* src_argb1,
   1510                      uint8* dst_argb, int width);
   1511 void ARGBAddRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
   1512                          uint8* dst_argb, int width);
   1513 
   1514 // ARGB subtract images. Same API as Blend, but these require
   1515 // pointer and width alignment for SSE2.
   1516 void ARGBSubtractRow_C(const uint8* src_argb, const uint8* src_argb1,
   1517                        uint8* dst_argb, int width);
   1518 void ARGBSubtractRow_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1519                           uint8* dst_argb, int width);
   1520 void ARGBSubtractRow_Any_SSE2(const uint8* src_argb, const uint8* src_argb1,
   1521                               uint8* dst_argb, int width);
   1522 void ARGBSubtractRow_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1523                           uint8* dst_argb, int width);
   1524 void ARGBSubtractRow_Any_AVX2(const uint8* src_argb, const uint8* src_argb1,
   1525                               uint8* dst_argb, int width);
   1526 void ARGBSubtractRow_NEON(const uint8* src_argb, const uint8* src_argb1,
   1527                           uint8* dst_argb, int width);
   1528 void ARGBSubtractRow_Any_NEON(const uint8* src_argb, const uint8* src_argb1,
   1529                               uint8* dst_argb, int width);
   1530 
   1531 void ARGBToRGB24Row_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
   1532 void ARGBToRAWRow_Any_SSSE3(const uint8* src_argb, uint8* dst_rgb, int width);
   1533 void ARGBToRGB565Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb, int width);
   1534 void ARGBToARGB1555Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb,
   1535                                 int width);
   1536 void ARGBToARGB4444Row_Any_SSE2(const uint8* src_argb, uint8* dst_rgb,
   1537                                 int width);
   1538 
   1539 void ARGBToRGB565DitherRow_Any_SSE2(const uint8* src_argb, uint8* dst_rgb,
   1540                                     const uint32 dither4, int width);
   1541 void ARGBToRGB565DitherRow_Any_AVX2(const uint8* src_argb, uint8* dst_rgb,
   1542                                     const uint32 dither4, int width);
   1543 
   1544 void ARGBToRGB565Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb, int width);
   1545 void ARGBToARGB1555Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb,
   1546                                 int width);
   1547 void ARGBToARGB4444Row_Any_AVX2(const uint8* src_argb, uint8* dst_rgb,
   1548                                 int width);
   1549 
   1550 void ARGBToRGB24Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
   1551 void ARGBToRAWRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
   1552 void ARGBToRGB565Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb, int width);
   1553 void ARGBToARGB1555Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb,
   1554                                 int width);
   1555 void ARGBToARGB4444Row_Any_NEON(const uint8* src_argb, uint8* dst_rgb,
   1556                                 int width);
   1557 void ARGBToRGB565DitherRow_Any_NEON(const uint8* src_argb, uint8* dst_rgb,
   1558                                     const uint32 dither4, int width);
   1559 
   1560 void I444ToARGBRow_Any_NEON(const uint8* src_y,
   1561                             const uint8* src_u,
   1562                             const uint8* src_v,
   1563                             uint8* dst_argb,
   1564                             const struct YuvConstants* yuvconstants,
   1565                             int width);
   1566 void I422ToARGBRow_Any_NEON(const uint8* src_y,
   1567                             const uint8* src_u,
   1568                             const uint8* src_v,
   1569                             uint8* dst_argb,
   1570                             const struct YuvConstants* yuvconstants,
   1571                             int width);
   1572 void I422AlphaToARGBRow_Any_NEON(const uint8* src_y,
   1573                                  const uint8* src_u,
   1574                                  const uint8* src_v,
   1575                                  const uint8* src_a,
   1576                                  uint8* dst_argb,
   1577                                  const struct YuvConstants* yuvconstants,
   1578                                  int width);
   1579 void I411ToARGBRow_Any_NEON(const uint8* src_y,
   1580                             const uint8* src_u,
   1581                             const uint8* src_v,
   1582                             uint8* dst_argb,
   1583                             const struct YuvConstants* yuvconstants,
   1584                             int width);
   1585 void I422ToRGBARow_Any_NEON(const uint8* src_y,
   1586                             const uint8* src_u,
   1587                             const uint8* src_v,
   1588                             uint8* dst_argb,
   1589                             const struct YuvConstants* yuvconstants,
   1590                             int width);
   1591 void I422ToRGB24Row_Any_NEON(const uint8* src_y,
   1592                              const uint8* src_u,
   1593                              const uint8* src_v,
   1594                              uint8* dst_argb,
   1595                              const struct YuvConstants* yuvconstants,
   1596                              int width);
   1597 void I422ToARGB4444Row_Any_NEON(const uint8* src_y,
   1598                                 const uint8* src_u,
   1599                                 const uint8* src_v,
   1600                                 uint8* dst_argb,
   1601                                 const struct YuvConstants* yuvconstants,
   1602                                 int width);
   1603 void I422ToARGB1555Row_Any_NEON(const uint8* src_y,
   1604                                 const uint8* src_u,
   1605                                 const uint8* src_v,
   1606                                 uint8* dst_argb,
   1607                                 const struct YuvConstants* yuvconstants,
   1608                                 int width);
   1609 void I422ToRGB565Row_Any_NEON(const uint8* src_y,
   1610                               const uint8* src_u,
   1611                               const uint8* src_v,
   1612                               uint8* dst_argb,
   1613                               const struct YuvConstants* yuvconstants,
   1614                               int width);
   1615 void NV12ToARGBRow_Any_NEON(const uint8* src_y,
   1616                             const uint8* src_uv,
   1617                             uint8* dst_argb,
   1618                             const struct YuvConstants* yuvconstants,
   1619                             int width);
   1620 void NV21ToARGBRow_Any_NEON(const uint8* src_y,
   1621                             const uint8* src_vu,
   1622                             uint8* dst_argb,
   1623                             const struct YuvConstants* yuvconstants,
   1624                             int width);
   1625 void NV12ToRGB565Row_Any_NEON(const uint8* src_y,
   1626                               const uint8* src_uv,
   1627                               uint8* dst_argb,
   1628                               const struct YuvConstants* yuvconstants,
   1629                               int width);
   1630 void YUY2ToARGBRow_Any_NEON(const uint8* src_yuy2,
   1631                             uint8* dst_argb,
   1632                             const struct YuvConstants* yuvconstants,
   1633                             int width);
   1634 void UYVYToARGBRow_Any_NEON(const uint8* src_uyvy,
   1635                             uint8* dst_argb,
   1636                             const struct YuvConstants* yuvconstants,
   1637                             int width);
   1638 void I422ToARGBRow_DSPR2(const uint8* src_y,
   1639                          const uint8* src_u,
   1640                          const uint8* src_v,
   1641                          uint8* dst_argb,
   1642                          const struct YuvConstants* yuvconstants,
   1643                          int width);
   1644 void I422ToARGBRow_DSPR2(const uint8* src_y,
   1645                          const uint8* src_u,
   1646                          const uint8* src_v,
   1647                          uint8* dst_argb,
   1648                          const struct YuvConstants* yuvconstants,
   1649                          int width);
   1650 
   1651 void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int width);
   1652 void YUY2ToUVRow_AVX2(const uint8* src_yuy2, int stride_yuy2,
   1653                       uint8* dst_u, uint8* dst_v, int width);
   1654 void YUY2ToUV422Row_AVX2(const uint8* src_yuy2,
   1655                          uint8* dst_u, uint8* dst_v, int width);
   1656 void YUY2ToYRow_SSE2(const uint8* src_yuy2, uint8* dst_y, int width);
   1657 void YUY2ToUVRow_SSE2(const uint8* src_yuy2, int stride_yuy2,
   1658                       uint8* dst_u, uint8* dst_v, int width);
   1659 void YUY2ToUV422Row_SSE2(const uint8* src_yuy2,
   1660                          uint8* dst_u, uint8* dst_v, int width);
   1661 void YUY2ToYRow_NEON(const uint8* src_yuy2, uint8* dst_y, int width);
   1662 void YUY2ToUVRow_NEON(const uint8* src_yuy2, int stride_yuy2,
   1663                       uint8* dst_u, uint8* dst_v, int width);
   1664 void YUY2ToUV422Row_NEON(const uint8* src_yuy2,
   1665                          uint8* dst_u, uint8* dst_v, int width);
   1666 void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width);
   1667 void YUY2ToUVRow_C(const uint8* src_yuy2, int stride_yuy2,
   1668                    uint8* dst_u, uint8* dst_v, int width);
   1669 void YUY2ToUV422Row_C(const uint8* src_yuy2,
   1670                       uint8* dst_u, uint8* dst_v, int width);
   1671 void YUY2ToYRow_Any_AVX2(const uint8* src_yuy2, uint8* dst_y, int width);
   1672 void YUY2ToUVRow_Any_AVX2(const uint8* src_yuy2, int stride_yuy2,
   1673                           uint8* dst_u, uint8* dst_v, int width);
   1674 void YUY2ToUV422Row_Any_AVX2(const uint8* src_yuy2,
   1675                              uint8* dst_u, uint8* dst_v, int width);
   1676 void YUY2ToYRow_Any_SSE2(const uint8* src_yuy2, uint8* dst_y, int width);
   1677 void YUY2ToUVRow_Any_SSE2(const uint8* src_yuy2, int stride_yuy2,
   1678                           uint8* dst_u, uint8* dst_v, int width);
   1679 void YUY2ToUV422Row_Any_SSE2(const uint8* src_yuy2,
   1680                              uint8* dst_u, uint8* dst_v, int width);
   1681 void YUY2ToYRow_Any_NEON(const uint8* src_yuy2, uint8* dst_y, int width);
   1682 void YUY2ToUVRow_Any_NEON(const uint8* src_yuy2, int stride_yuy2,
   1683                           uint8* dst_u, uint8* dst_v, int width);
   1684 void YUY2ToUV422Row_Any_NEON(const uint8* src_yuy2,
   1685                              uint8* dst_u, uint8* dst_v, int width);
   1686 void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width);
   1687 void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
   1688                       uint8* dst_u, uint8* dst_v, int width);
   1689 void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
   1690                          uint8* dst_u, uint8* dst_v, int width);
   1691 void UYVYToYRow_SSE2(const uint8* src_uyvy, uint8* dst_y, int width);
   1692 void UYVYToUVRow_SSE2(const uint8* src_uyvy, int stride_uyvy,
   1693                       uint8* dst_u, uint8* dst_v, int width);
   1694 void UYVYToUV422Row_SSE2(const uint8* src_uyvy,
   1695                          uint8* dst_u, uint8* dst_v, int width);
   1696 void UYVYToYRow_AVX2(const uint8* src_uyvy, uint8* dst_y, int width);
   1697 void UYVYToUVRow_AVX2(const uint8* src_uyvy, int stride_uyvy,
   1698                       uint8* dst_u, uint8* dst_v, int width);
   1699 void UYVYToUV422Row_AVX2(const uint8* src_uyvy,
   1700                          uint8* dst_u, uint8* dst_v, int width);
   1701 void UYVYToYRow_NEON(const uint8* src_uyvy, uint8* dst_y, int width);
   1702 void UYVYToUVRow_NEON(const uint8* src_uyvy, int stride_uyvy,
   1703                       uint8* dst_u, uint8* dst_v, int width);
   1704 void UYVYToUV422Row_NEON(const uint8* src_uyvy,
   1705                          uint8* dst_u, uint8* dst_v, int width);
   1706 
   1707 void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width);
   1708 void UYVYToUVRow_C(const uint8* src_uyvy, int stride_uyvy,
   1709                    uint8* dst_u, uint8* dst_v, int width);
   1710 void UYVYToUV422Row_C(const uint8* src_uyvy,
   1711                       uint8* dst_u, uint8* dst_v, int width);
   1712 void UYVYToYRow_Any_AVX2(const uint8* src_uyvy, uint8* dst_y, int width);
   1713 void UYVYToUVRow_Any_AVX2(const uint8* src_uyvy, int stride_uyvy,
   1714                           uint8* dst_u, uint8* dst_v, int width);
   1715 void UYVYToUV422Row_Any_AVX2(const uint8* src_uyvy,
   1716                              uint8* dst_u, uint8* dst_v, int width);
   1717 void UYVYToYRow_Any_SSE2(const uint8* src_uyvy, uint8* dst_y, int width);
   1718 void UYVYToUVRow_Any_SSE2(const uint8* src_uyvy, int stride_uyvy,
   1719                           uint8* dst_u, uint8* dst_v, int width);
   1720 void UYVYToUV422Row_Any_SSE2(const uint8* src_uyvy,
   1721                              uint8* dst_u, uint8* dst_v, int width);
   1722 void UYVYToYRow_Any_NEON(const uint8* src_uyvy, uint8* dst_y, int width);
   1723 void UYVYToUVRow_Any_NEON(const uint8* src_uyvy, int stride_uyvy,
   1724                           uint8* dst_u, uint8* dst_v, int width);
   1725 void UYVYToUV422Row_Any_NEON(const uint8* src_uyvy,
   1726                              uint8* dst_u, uint8* dst_v, int width);
   1727 
   1728 void I422ToYUY2Row_C(const uint8* src_y,
   1729                      const uint8* src_u,
   1730                      const uint8* src_v,
   1731                      uint8* dst_yuy2, int width);
   1732 void I422ToUYVYRow_C(const uint8* src_y,
   1733                      const uint8* src_u,
   1734                      const uint8* src_v,
   1735                      uint8* dst_uyvy, int width);
   1736 void I422ToYUY2Row_SSE2(const uint8* src_y,
   1737                         const uint8* src_u,
   1738                         const uint8* src_v,
   1739                         uint8* dst_yuy2, int width);
   1740 void I422ToUYVYRow_SSE2(const uint8* src_y,
   1741                         const uint8* src_u,
   1742                         const uint8* src_v,
   1743                         uint8* dst_uyvy, int width);
   1744 void I422ToYUY2Row_Any_SSE2(const uint8* src_y,
   1745                             const uint8* src_u,
   1746                             const uint8* src_v,
   1747                             uint8* dst_yuy2, int width);
   1748 void I422ToUYVYRow_Any_SSE2(const uint8* src_y,
   1749                             const uint8* src_u,
   1750                             const uint8* src_v,
   1751                             uint8* dst_uyvy, int width);
   1752 void I422ToYUY2Row_NEON(const uint8* src_y,
   1753                         const uint8* src_u,
   1754                         const uint8* src_v,
   1755                         uint8* dst_yuy2, int width);
   1756 void I422ToUYVYRow_NEON(const uint8* src_y,
   1757                         const uint8* src_u,
   1758                         const uint8* src_v,
   1759                         uint8* dst_uyvy, int width);
   1760 void I422ToYUY2Row_Any_NEON(const uint8* src_y,
   1761                             const uint8* src_u,
   1762                             const uint8* src_v,
   1763                             uint8* dst_yuy2, int width);
   1764 void I422ToUYVYRow_Any_NEON(const uint8* src_y,
   1765                             const uint8* src_u,
   1766                             const uint8* src_v,
   1767                             uint8* dst_uyvy, int width);
   1768 
   1769 // Effects related row functions.
   1770 void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
   1771 void ARGBAttenuateRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
   1772 void ARGBAttenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
   1773 void ARGBAttenuateRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
   1774 void ARGBAttenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
   1775                                int width);
   1776 void ARGBAttenuateRow_Any_SSSE3(const uint8* src_argb, uint8* dst_argb,
   1777                                 int width);
   1778 void ARGBAttenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
   1779                                int width);
   1780 void ARGBAttenuateRow_Any_NEON(const uint8* src_argb, uint8* dst_argb,
   1781                                int width);
   1782 
   1783 // Inverse table for unattenuate, shared by C and SSE2.
   1784 extern const uint32 fixed_invtbl8[256];
   1785 void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width);
   1786 void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width);
   1787 void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, int width);
   1788 void ARGBUnattenuateRow_Any_SSE2(const uint8* src_argb, uint8* dst_argb,
   1789                                  int width);
   1790 void ARGBUnattenuateRow_Any_AVX2(const uint8* src_argb, uint8* dst_argb,
   1791                                  int width);
   1792 
   1793 void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width);
   1794 void ARGBGrayRow_SSSE3(const uint8* src_argb, uint8* dst_argb, int width);
   1795 void ARGBGrayRow_NEON(const uint8* src_argb, uint8* dst_argb, int width);
   1796 
   1797 void ARGBSepiaRow_C(uint8* dst_argb, int width);
   1798 void ARGBSepiaRow_SSSE3(uint8* dst_argb, int width);
   1799 void ARGBSepiaRow_NEON(uint8* dst_argb, int width);
   1800 
   1801 void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
   1802                           const int8* matrix_argb, int width);
   1803 void ARGBColorMatrixRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
   1804                               const int8* matrix_argb, int width);
   1805 void ARGBColorMatrixRow_NEON(const uint8* src_argb, uint8* dst_argb,
   1806                              const int8* matrix_argb, int width);
   1807 
   1808 void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
   1809 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
   1810 
   1811 void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width);
   1812 void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width);
   1813 
   1814 void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
   1815                        int interval_offset, int width);
   1816 void ARGBQuantizeRow_SSE2(uint8* dst_argb, int scale, int interval_size,
   1817                           int interval_offset, int width);
   1818 void ARGBQuantizeRow_NEON(uint8* dst_argb, int scale, int interval_size,
   1819                           int interval_offset, int width);
   1820 
   1821 void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
   1822                     uint32 value);
   1823 void ARGBShadeRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width,
   1824                        uint32 value);
   1825 void ARGBShadeRow_NEON(const uint8* src_argb, uint8* dst_argb, int width,
   1826                        uint32 value);
   1827 
   1828 // Used for blur.
   1829 void CumulativeSumToAverageRow_SSE2(const int32* topleft, const int32* botleft,
   1830                                     int width, int area, uint8* dst, int count);
   1831 void ComputeCumulativeSumRow_SSE2(const uint8* row, int32* cumsum,
   1832                                   const int32* previous_cumsum, int width);
   1833 
   1834 void CumulativeSumToAverageRow_C(const int32* topleft, const int32* botleft,
   1835                                  int width, int area, uint8* dst, int count);
   1836 void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
   1837                                const int32* previous_cumsum, int width);
   1838 
   1839 LIBYUV_API
   1840 void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
   1841                      uint8* dst_argb, const float* uv_dudv, int width);
   1842 LIBYUV_API
   1843 void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
   1844                         uint8* dst_argb, const float* uv_dudv, int width);
   1845 
   1846 // Used for I420Scale, ARGBScale, and ARGBInterpolate.
   1847 void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
   1848                       ptrdiff_t src_stride_ptr,
   1849                       int width, int source_y_fraction);
   1850 void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
   1851                           ptrdiff_t src_stride_ptr, int width,
   1852                           int source_y_fraction);
   1853 void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
   1854                          ptrdiff_t src_stride_ptr, int width,
   1855                          int source_y_fraction);
   1856 void InterpolateRow_NEON(uint8* dst_ptr, const uint8* src_ptr,
   1857                          ptrdiff_t src_stride_ptr, int width,
   1858                          int source_y_fraction);
   1859 void InterpolateRow_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
   1860                           ptrdiff_t src_stride_ptr, int width,
   1861                           int source_y_fraction);
   1862 void InterpolateRow_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
   1863                              ptrdiff_t src_stride_ptr, int width,
   1864                              int source_y_fraction);
   1865 void InterpolateRow_Any_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
   1866                               ptrdiff_t src_stride_ptr, int width,
   1867                               int source_y_fraction);
   1868 void InterpolateRow_Any_AVX2(uint8* dst_ptr, const uint8* src_ptr,
   1869                              ptrdiff_t src_stride_ptr, int width,
   1870                              int source_y_fraction);
   1871 void InterpolateRow_Any_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
   1872                               ptrdiff_t src_stride_ptr, int width,
   1873                               int source_y_fraction);
   1874 
   1875 void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
   1876                          ptrdiff_t src_stride_ptr,
   1877                          int width, int source_y_fraction);
   1878 
   1879 // Sobel images.
   1880 void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
   1881                  uint8* dst_sobelx, int width);
   1882 void SobelXRow_SSE2(const uint8* src_y0, const uint8* src_y1,
   1883                     const uint8* src_y2, uint8* dst_sobelx, int width);
   1884 void SobelXRow_NEON(const uint8* src_y0, const uint8* src_y1,
   1885                     const uint8* src_y2, uint8* dst_sobelx, int width);
   1886 void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
   1887                  uint8* dst_sobely, int width);
   1888 void SobelYRow_SSE2(const uint8* src_y0, const uint8* src_y1,
   1889                     uint8* dst_sobely, int width);
   1890 void SobelYRow_NEON(const uint8* src_y0, const uint8* src_y1,
   1891                     uint8* dst_sobely, int width);
   1892 void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
   1893                 uint8* dst_argb, int width);
   1894 void SobelRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
   1895                    uint8* dst_argb, int width);
   1896 void SobelRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
   1897                    uint8* dst_argb, int width);
   1898 void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
   1899                        uint8* dst_y, int width);
   1900 void SobelToPlaneRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
   1901                           uint8* dst_y, int width);
   1902 void SobelToPlaneRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
   1903                           uint8* dst_y, int width);
   1904 void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
   1905                   uint8* dst_argb, int width);
   1906 void SobelXYRow_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
   1907                      uint8* dst_argb, int width);
   1908 void SobelXYRow_NEON(const uint8* src_sobelx, const uint8* src_sobely,
   1909                      uint8* dst_argb, int width);
   1910 void SobelRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
   1911                        uint8* dst_argb, int width);
   1912 void SobelRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
   1913                        uint8* dst_argb, int width);
   1914 void SobelToPlaneRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
   1915                               uint8* dst_y, int width);
   1916 void SobelToPlaneRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
   1917                               uint8* dst_y, int width);
   1918 void SobelXYRow_Any_SSE2(const uint8* src_sobelx, const uint8* src_sobely,
   1919                          uint8* dst_argb, int width);
   1920 void SobelXYRow_Any_NEON(const uint8* src_sobelx, const uint8* src_sobely,
   1921                          uint8* dst_argb, int width);
   1922 
   1923 void ARGBPolynomialRow_C(const uint8* src_argb,
   1924                          uint8* dst_argb, const float* poly,
   1925                          int width);
   1926 void ARGBPolynomialRow_SSE2(const uint8* src_argb,
   1927                             uint8* dst_argb, const float* poly,
   1928                             int width);
   1929 void ARGBPolynomialRow_AVX2(const uint8* src_argb,
   1930                             uint8* dst_argb, const float* poly,
   1931                             int width);
   1932 
   1933 void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
   1934                              const uint8* luma, uint32 lumacoeff);
   1935 void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
   1936                                  int width,
   1937                                  const uint8* luma, uint32 lumacoeff);
   1938 
   1939 #ifdef __cplusplus
   1940 }  // extern "C"
   1941 }  // namespace libyuv
   1942 #endif
   1943 
   1944 #endif  // INCLUDE_LIBYUV_ROW_H_  NOLINT
   1945