Home | History | Annotate | Download | only in libyuv
      1 /*
      2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS. All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #ifndef INCLUDE_LIBYUV_ROW_H_
     12 #define INCLUDE_LIBYUV_ROW_H_
     13 
     14 #include <stdlib.h>  // For malloc.
     15 
     16 #include "libyuv/basic_types.h"
     17 
     18 #ifdef __cplusplus
     19 namespace libyuv {
     20 extern "C" {
     21 #endif
     22 
     23 #if defined(__pnacl__) || defined(__CLR_VER) ||            \
     24     (defined(__native_client__) && defined(__x86_64__)) || \
     25     (defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
     26 #define LIBYUV_DISABLE_X86
     27 #endif
     28 #if defined(__native_client__)
     29 #define LIBYUV_DISABLE_NEON
     30 #endif
     31 // MemorySanitizer does not support assembly code yet. http://crbug.com/344505
     32 #if defined(__has_feature)
     33 #if __has_feature(memory_sanitizer)
     34 #define LIBYUV_DISABLE_X86
     35 #endif
     36 #endif
     37 // clang >= 3.5.0 required for Arm64.
     38 #if defined(__clang__) && defined(__aarch64__) && !defined(LIBYUV_DISABLE_NEON)
     39 #if (__clang_major__ < 3) || (__clang_major__ == 3 && (__clang_minor__ < 5))
     40 #define LIBYUV_DISABLE_NEON
     41 #endif  // clang >= 3.5
     42 #endif  // __clang__
     43 
     44 // GCC >= 4.7.0 required for AVX2.
     45 #if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
     46 #if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
     47 #define GCC_HAS_AVX2 1
     48 #endif  // GNUC >= 4.7
     49 #endif  // __GNUC__
     50 
     51 // clang >= 3.4.0 required for AVX2.
     52 #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
     53 #if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
     54 #define CLANG_HAS_AVX2 1
     55 #endif  // clang >= 3.4
     56 #endif  // __clang__
     57 
     58 // clang >= 6.0.0 required for AVX512.
     59 // TODO(fbarchard): fix xcode 9 ios b/789.
     60 #if 0  // Build fails in libvpx on Mac
     61 #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
     62 #if (__clang_major__ >= 7) && !defined(__APPLE_EMBEDDED_SIMULATOR__)
     63 #define CLANG_HAS_AVX512 1
     64 #endif  // clang >= 7
     65 #endif  // __clang__
     66 #endif  // 0
     67 
     68 // Visual C 2012 required for AVX2.
     69 #if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \
     70     _MSC_VER >= 1700
     71 #define VISUALC_HAS_AVX2 1
     72 #endif  // VisualStudio >= 2012
     73 
     74 // The following are available on all x86 platforms:
     75 #if !defined(LIBYUV_DISABLE_X86) && \
     76     (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
     77 // Conversions:
     78 #define HAS_ABGRTOUVROW_SSSE3
     79 #define HAS_ABGRTOYROW_SSSE3
     80 #define HAS_ARGB1555TOARGBROW_SSE2
     81 #define HAS_ARGB4444TOARGBROW_SSE2
     82 #define HAS_ARGBEXTRACTALPHAROW_SSE2
     83 #define HAS_ARGBSETROW_X86
     84 #define HAS_ARGBSHUFFLEROW_SSSE3
     85 #define HAS_ARGBTOARGB1555ROW_SSE2
     86 #define HAS_ARGBTOARGB4444ROW_SSE2
     87 #define HAS_ARGBTORAWROW_SSSE3
     88 #define HAS_ARGBTORGB24ROW_SSSE3
     89 #define HAS_ARGBTORGB565DITHERROW_SSE2
     90 #define HAS_ARGBTORGB565ROW_SSE2
     91 #define HAS_ARGBTOUV444ROW_SSSE3
     92 #define HAS_ARGBTOUVJROW_SSSE3
     93 #define HAS_ARGBTOUVROW_SSSE3
     94 #define HAS_ARGBTOYJROW_SSSE3
     95 #define HAS_ARGBTOYROW_SSSE3
     96 #define HAS_BGRATOUVROW_SSSE3
     97 #define HAS_BGRATOYROW_SSSE3
     98 #define HAS_COPYROW_ERMS
     99 #define HAS_COPYROW_SSE2
    100 #define HAS_H422TOARGBROW_SSSE3
    101 #define HAS_HALFFLOATROW_SSE2
    102 #define HAS_I400TOARGBROW_SSE2
    103 #define HAS_I422TOARGB1555ROW_SSSE3
    104 #define HAS_I422TOARGB4444ROW_SSSE3
    105 #define HAS_I422TOARGBROW_SSSE3
    106 #define HAS_I422TORGB24ROW_SSSE3
    107 #define HAS_I422TORGB565ROW_SSSE3
    108 #define HAS_I422TORGBAROW_SSSE3
    109 #define HAS_I422TOUYVYROW_SSE2
    110 #define HAS_I422TOYUY2ROW_SSE2
    111 #define HAS_I444TOARGBROW_SSSE3
    112 #define HAS_J400TOARGBROW_SSE2
    113 #define HAS_J422TOARGBROW_SSSE3
    114 #define HAS_MERGEUVROW_SSE2
    115 #define HAS_MIRRORROW_SSSE3
    116 #define HAS_MIRRORUVROW_SSSE3
    117 #define HAS_NV12TOARGBROW_SSSE3
    118 #define HAS_NV12TORGB24ROW_SSSE3
    119 #define HAS_NV12TORGB565ROW_SSSE3
    120 #define HAS_NV21TOARGBROW_SSSE3
    121 #define HAS_NV21TORGB24ROW_SSSE3
    122 #define HAS_RAWTOARGBROW_SSSE3
    123 #define HAS_RAWTORGB24ROW_SSSE3
    124 #define HAS_RAWTOYROW_SSSE3
    125 #define HAS_RGB24TOARGBROW_SSSE3
    126 #define HAS_RGB24TOYROW_SSSE3
    127 #define HAS_RGB565TOARGBROW_SSE2
    128 #define HAS_RGBATOUVROW_SSSE3
    129 #define HAS_RGBATOYROW_SSSE3
    130 #define HAS_SETROW_ERMS
    131 #define HAS_SETROW_X86
    132 #define HAS_SPLITUVROW_SSE2
    133 #define HAS_UYVYTOARGBROW_SSSE3
    134 #define HAS_UYVYTOUV422ROW_SSE2
    135 #define HAS_UYVYTOUVROW_SSE2
    136 #define HAS_UYVYTOYROW_SSE2
    137 #define HAS_YUY2TOARGBROW_SSSE3
    138 #define HAS_YUY2TOUV422ROW_SSE2
    139 #define HAS_YUY2TOUVROW_SSE2
    140 #define HAS_YUY2TOYROW_SSE2
    141 
    142 // Effects:
    143 #define HAS_ARGBADDROW_SSE2
    144 #define HAS_ARGBAFFINEROW_SSE2
    145 #define HAS_ARGBATTENUATEROW_SSSE3
    146 #define HAS_ARGBBLENDROW_SSSE3
    147 #define HAS_ARGBCOLORMATRIXROW_SSSE3
    148 #define HAS_ARGBCOLORTABLEROW_X86
    149 #define HAS_ARGBCOPYALPHAROW_SSE2
    150 #define HAS_ARGBCOPYYTOALPHAROW_SSE2
    151 #define HAS_ARGBGRAYROW_SSSE3
    152 #define HAS_ARGBLUMACOLORTABLEROW_SSSE3
    153 #define HAS_ARGBMIRRORROW_SSE2
    154 #define HAS_ARGBMULTIPLYROW_SSE2
    155 #define HAS_ARGBPOLYNOMIALROW_SSE2
    156 #define HAS_ARGBQUANTIZEROW_SSE2
    157 #define HAS_ARGBSEPIAROW_SSSE3
    158 #define HAS_ARGBSHADEROW_SSE2
    159 #define HAS_ARGBSUBTRACTROW_SSE2
    160 #define HAS_ARGBUNATTENUATEROW_SSE2
    161 #define HAS_BLENDPLANEROW_SSSE3
    162 #define HAS_COMPUTECUMULATIVESUMROW_SSE2
    163 #define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
    164 #define HAS_INTERPOLATEROW_SSSE3
    165 #define HAS_RGBCOLORTABLEROW_X86
    166 #define HAS_SOBELROW_SSE2
    167 #define HAS_SOBELTOPLANEROW_SSE2
    168 #define HAS_SOBELXROW_SSE2
    169 #define HAS_SOBELXYROW_SSE2
    170 #define HAS_SOBELYROW_SSE2
    171 
    172 // The following functions fail on gcc/clang 32 bit with fpic and framepointer.
    173 // caveat: clangcl uses row_win.cc which works.
    174 #if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
    175     defined(_MSC_VER)
    176 // TODO(fbarchard): fix build error on android_full_debug=1
    177 // https://code.google.com/p/libyuv/issues/detail?id=517
    178 #define HAS_I422ALPHATOARGBROW_SSSE3
    179 #endif
    180 #endif
    181 
    182 // The following are available on all x86 platforms, but
    183 // require VS2012, clang 3.4 or gcc 4.7.
    184 #if !defined(LIBYUV_DISABLE_X86) &&                          \
    185     (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \
    186      defined(GCC_HAS_AVX2))
    187 #define HAS_ARGBCOPYALPHAROW_AVX2
    188 #define HAS_ARGBCOPYYTOALPHAROW_AVX2
    189 #define HAS_ARGBEXTRACTALPHAROW_AVX2
    190 #define HAS_ARGBMIRRORROW_AVX2
    191 #define HAS_ARGBPOLYNOMIALROW_AVX2
    192 #define HAS_ARGBSHUFFLEROW_AVX2
    193 #define HAS_ARGBTORGB565DITHERROW_AVX2
    194 #define HAS_ARGBTOUVJROW_AVX2
    195 #define HAS_ARGBTOUVROW_AVX2
    196 #define HAS_ARGBTOYJROW_AVX2
    197 #define HAS_ARGBTOYROW_AVX2
    198 #define HAS_COPYROW_AVX
    199 #define HAS_H422TOARGBROW_AVX2
    200 #define HAS_HALFFLOATROW_AVX2
    201 //  #define HAS_HALFFLOATROW_F16C  // Enable to test halffloat cast
    202 #define HAS_I400TOARGBROW_AVX2
    203 #define HAS_I422TOARGB1555ROW_AVX2
    204 #define HAS_I422TOARGB4444ROW_AVX2
    205 #define HAS_I422TOARGBROW_AVX2
    206 #define HAS_I422TORGB24ROW_AVX2
    207 #define HAS_I422TORGB565ROW_AVX2
    208 #define HAS_I422TORGBAROW_AVX2
    209 #define HAS_I444TOARGBROW_AVX2
    210 #define HAS_INTERPOLATEROW_AVX2
    211 #define HAS_J422TOARGBROW_AVX2
    212 #define HAS_MERGEUVROW_AVX2
    213 #define HAS_MIRRORROW_AVX2
    214 #define HAS_NV12TOARGBROW_AVX2
    215 #define HAS_NV12TORGB24ROW_AVX2
    216 #define HAS_NV12TORGB565ROW_AVX2
    217 #define HAS_NV21TOARGBROW_AVX2
    218 #define HAS_NV21TORGB24ROW_AVX2
    219 #define HAS_SPLITUVROW_AVX2
    220 #define HAS_UYVYTOARGBROW_AVX2
    221 #define HAS_UYVYTOUV422ROW_AVX2
    222 #define HAS_UYVYTOUVROW_AVX2
    223 #define HAS_UYVYTOYROW_AVX2
    224 #define HAS_YUY2TOARGBROW_AVX2
    225 #define HAS_YUY2TOUV422ROW_AVX2
    226 #define HAS_YUY2TOUVROW_AVX2
    227 #define HAS_YUY2TOYROW_AVX2
    228 
    229 // Effects:
    230 #define HAS_ARGBADDROW_AVX2
    231 #define HAS_ARGBATTENUATEROW_AVX2
    232 #define HAS_ARGBMULTIPLYROW_AVX2
    233 #define HAS_ARGBSUBTRACTROW_AVX2
    234 #define HAS_ARGBUNATTENUATEROW_AVX2
    235 #define HAS_BLENDPLANEROW_AVX2
    236 
    237 #if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \
    238     defined(_MSC_VER)
    239 // TODO(fbarchard): fix build error on android_full_debug=1
    240 // https://code.google.com/p/libyuv/issues/detail?id=517
    241 #define HAS_I422ALPHATOARGBROW_AVX2
    242 #endif
    243 #endif
    244 
    245 // The following are available for AVX2 Visual C and clangcl 32 bit:
    246 // TODO(fbarchard): Port to gcc.
    247 #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) && \
    248     (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
    249 #define HAS_ARGB1555TOARGBROW_AVX2
    250 #define HAS_ARGB4444TOARGBROW_AVX2
    251 #define HAS_ARGBTOARGB1555ROW_AVX2
    252 #define HAS_ARGBTOARGB4444ROW_AVX2
    253 #define HAS_ARGBTORGB565ROW_AVX2
    254 #define HAS_J400TOARGBROW_AVX2
    255 #define HAS_RGB565TOARGBROW_AVX2
    256 #endif
    257 
    258 // The following are also available on x64 Visual C.
    259 #if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && defined(_M_X64) && \
    260     (!defined(__clang__) || defined(__SSSE3__))
    261 #define HAS_I422ALPHATOARGBROW_SSSE3
    262 #define HAS_I422TOARGBROW_SSSE3
    263 #endif
    264 
    265 // The following are available for gcc/clang x86 platforms:
    266 // TODO(fbarchard): Port to Visual C
    267 #if !defined(LIBYUV_DISABLE_X86) && \
    268     (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
    269 #define HAS_ABGRTOAR30ROW_SSSE3
    270 #define HAS_ARGBTOAR30ROW_SSSE3
    271 #define HAS_CONVERT16TO8ROW_SSSE3
    272 #define HAS_CONVERT8TO16ROW_SSE2
    273 // I210 is for H010.  2 = 422.  I for 601 vs H for 709.
    274 #define HAS_I210TOAR30ROW_SSSE3
    275 #define HAS_I210TOARGBROW_SSSE3
    276 #define HAS_I422TOAR30ROW_SSSE3
    277 #define HAS_MERGERGBROW_SSSE3
    278 #define HAS_SPLITRGBROW_SSSE3
    279 #endif
    280 
    281 // The following are available for AVX2 gcc/clang x86 platforms:
    282 // TODO(fbarchard): Port to Visual C
    283 #if !defined(LIBYUV_DISABLE_X86) &&                                       \
    284     (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \
    285     (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
    286 #define HAS_ABGRTOAR30ROW_AVX2
    287 #define HAS_ARGBTOAR30ROW_AVX2
    288 #define HAS_ARGBTORAWROW_AVX2
    289 #define HAS_ARGBTORGB24ROW_AVX2
    290 #define HAS_CONVERT16TO8ROW_AVX2
    291 #define HAS_CONVERT8TO16ROW_AVX2
    292 #define HAS_I210TOAR30ROW_AVX2
    293 #define HAS_I210TOARGBROW_AVX2
    294 #define HAS_I422TOAR30ROW_AVX2
    295 #define HAS_I422TOUYVYROW_AVX2
    296 #define HAS_I422TOYUY2ROW_AVX2
    297 #define HAS_MERGEUVROW_16_AVX2
    298 #define HAS_MULTIPLYROW_16_AVX2
    299 #endif
    300 
    301 // The following are available for AVX512 clang x86 platforms:
    302 // TODO(fbarchard): Port to GCC and Visual C
    303 // TODO(fbarchard): re-enable HAS_ARGBTORGB24ROW_AVX512VBMI. Issue libyuv:789
    304 #if !defined(LIBYUV_DISABLE_X86) &&                                       \
    305     (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \
    306     (defined(CLANG_HAS_AVX512))
    307 #define HAS_ARGBTORGB24ROW_AVX512VBMI
    308 #endif
    309 
    310 // The following are available on Neon platforms:
    311 #if !defined(LIBYUV_DISABLE_NEON) && \
    312     (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON))
    313 #define HAS_ABGRTOUVROW_NEON
    314 #define HAS_ABGRTOYROW_NEON
    315 #define HAS_ARGB1555TOARGBROW_NEON
    316 #define HAS_ARGB1555TOUVROW_NEON
    317 #define HAS_ARGB1555TOYROW_NEON
    318 #define HAS_ARGB4444TOARGBROW_NEON
    319 #define HAS_ARGB4444TOUVROW_NEON
    320 #define HAS_ARGB4444TOYROW_NEON
    321 #define HAS_ARGBEXTRACTALPHAROW_NEON
    322 #define HAS_ARGBSETROW_NEON
    323 #define HAS_ARGBTOARGB1555ROW_NEON
    324 #define HAS_ARGBTOARGB4444ROW_NEON
    325 #define HAS_ARGBTORAWROW_NEON
    326 #define HAS_ARGBTORGB24ROW_NEON
    327 #define HAS_ARGBTORGB565DITHERROW_NEON
    328 #define HAS_ARGBTORGB565ROW_NEON
    329 #define HAS_ARGBTOUV444ROW_NEON
    330 #define HAS_ARGBTOUVJROW_NEON
    331 #define HAS_ARGBTOUVROW_NEON
    332 #define HAS_ARGBTOYJROW_NEON
    333 #define HAS_ARGBTOYROW_NEON
    334 #define HAS_BGRATOUVROW_NEON
    335 #define HAS_BGRATOYROW_NEON
    336 #define HAS_BYTETOFLOATROW_NEON
    337 #define HAS_COPYROW_NEON
    338 #define HAS_HALFFLOATROW_NEON
    339 #define HAS_I400TOARGBROW_NEON
    340 #define HAS_I422ALPHATOARGBROW_NEON
    341 #define HAS_I422TOARGB1555ROW_NEON
    342 #define HAS_I422TOARGB4444ROW_NEON
    343 #define HAS_I422TOARGBROW_NEON
    344 #define HAS_I422TORGB24ROW_NEON
    345 #define HAS_I422TORGB565ROW_NEON
    346 #define HAS_I422TORGBAROW_NEON
    347 #define HAS_I422TOUYVYROW_NEON
    348 #define HAS_I422TOYUY2ROW_NEON
    349 #define HAS_I444TOARGBROW_NEON
    350 #define HAS_J400TOARGBROW_NEON
    351 #define HAS_MERGEUVROW_NEON
    352 #define HAS_MIRRORROW_NEON
    353 #define HAS_MIRRORUVROW_NEON
    354 #define HAS_NV12TOARGBROW_NEON
    355 #define HAS_NV12TORGB24ROW_NEON
    356 #define HAS_NV12TORGB565ROW_NEON
    357 #define HAS_NV21TOARGBROW_NEON
    358 #define HAS_NV21TORGB24ROW_NEON
    359 #define HAS_RAWTOARGBROW_NEON
    360 #define HAS_RAWTORGB24ROW_NEON
    361 #define HAS_RAWTOUVROW_NEON
    362 #define HAS_RAWTOYROW_NEON
    363 #define HAS_RGB24TOARGBROW_NEON
    364 #define HAS_RGB24TOUVROW_NEON
    365 #define HAS_RGB24TOYROW_NEON
    366 #define HAS_RGB565TOARGBROW_NEON
    367 #define HAS_RGB565TOUVROW_NEON
    368 #define HAS_RGB565TOYROW_NEON
    369 #define HAS_RGBATOUVROW_NEON
    370 #define HAS_RGBATOYROW_NEON
    371 #define HAS_SETROW_NEON
    372 #define HAS_SPLITRGBROW_NEON
    373 #define HAS_SPLITUVROW_NEON
    374 #define HAS_UYVYTOARGBROW_NEON
    375 #define HAS_UYVYTOUV422ROW_NEON
    376 #define HAS_UYVYTOUVROW_NEON
    377 #define HAS_UYVYTOYROW_NEON
    378 #define HAS_YUY2TOARGBROW_NEON
    379 #define HAS_YUY2TOUV422ROW_NEON
    380 #define HAS_YUY2TOUVROW_NEON
    381 #define HAS_YUY2TOYROW_NEON
    382 
    383 // Effects:
    384 #define HAS_ARGBADDROW_NEON
    385 #define HAS_ARGBATTENUATEROW_NEON
    386 #define HAS_ARGBBLENDROW_NEON
    387 #define HAS_ARGBCOLORMATRIXROW_NEON
    388 #define HAS_ARGBGRAYROW_NEON
    389 #define HAS_ARGBMIRRORROW_NEON
    390 #define HAS_ARGBMULTIPLYROW_NEON
    391 #define HAS_ARGBQUANTIZEROW_NEON
    392 #define HAS_ARGBSEPIAROW_NEON
    393 #define HAS_ARGBSHADEROW_NEON
    394 #define HAS_ARGBSHUFFLEROW_NEON
    395 #define HAS_ARGBSUBTRACTROW_NEON
    396 #define HAS_INTERPOLATEROW_NEON
    397 #define HAS_SOBELROW_NEON
    398 #define HAS_SOBELTOPLANEROW_NEON
    399 #define HAS_SOBELXROW_NEON
    400 #define HAS_SOBELXYROW_NEON
    401 #define HAS_SOBELYROW_NEON
    402 #endif
    403 
    404 // The following are available on AArch64 platforms:
    405 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
    406 #define HAS_SCALESUMSAMPLES_NEON
    407 #endif
    408 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
    409 #define HAS_ABGRTOUVROW_MSA
    410 #define HAS_ABGRTOYROW_MSA
    411 #define HAS_ARGB1555TOARGBROW_MSA
    412 #define HAS_ARGB1555TOUVROW_MSA
    413 #define HAS_ARGB1555TOYROW_MSA
    414 #define HAS_ARGB4444TOARGBROW_MSA
    415 #define HAS_ARGBADDROW_MSA
    416 #define HAS_ARGBATTENUATEROW_MSA
    417 #define HAS_ARGBBLENDROW_MSA
    418 #define HAS_ARGBCOLORMATRIXROW_MSA
    419 #define HAS_ARGBEXTRACTALPHAROW_MSA
    420 #define HAS_ARGBGRAYROW_MSA
    421 #define HAS_ARGBMIRRORROW_MSA
    422 #define HAS_ARGBMULTIPLYROW_MSA
    423 #define HAS_ARGBQUANTIZEROW_MSA
    424 #define HAS_ARGBSEPIAROW_MSA
    425 #define HAS_ARGBSETROW_MSA
    426 #define HAS_ARGBSHADEROW_MSA
    427 #define HAS_ARGBSHUFFLEROW_MSA
    428 #define HAS_ARGBSUBTRACTROW_MSA
    429 #define HAS_ARGBTOARGB1555ROW_MSA
    430 #define HAS_ARGBTOARGB4444ROW_MSA
    431 #define HAS_ARGBTORAWROW_MSA
    432 #define HAS_ARGBTORGB24ROW_MSA
    433 #define HAS_ARGBTORGB565DITHERROW_MSA
    434 #define HAS_ARGBTORGB565ROW_MSA
    435 #define HAS_ARGBTOUV444ROW_MSA
    436 #define HAS_ARGBTOUVJROW_MSA
    437 #define HAS_ARGBTOUVROW_MSA
    438 #define HAS_ARGBTOYJROW_MSA
    439 #define HAS_ARGBTOYROW_MSA
    440 #define HAS_BGRATOUVROW_MSA
    441 #define HAS_BGRATOYROW_MSA
    442 #define HAS_HALFFLOATROW_MSA
    443 #define HAS_I400TOARGBROW_MSA
    444 #define HAS_I422ALPHATOARGBROW_MSA
    445 #define HAS_I422TOARGBROW_MSA
    446 #define HAS_I422TORGB24ROW_MSA
    447 #define HAS_I422TORGBAROW_MSA
    448 #define HAS_I422TOUYVYROW_MSA
    449 #define HAS_I422TOYUY2ROW_MSA
    450 #define HAS_I444TOARGBROW_MSA
    451 #define HAS_INTERPOLATEROW_MSA
    452 #define HAS_J400TOARGBROW_MSA
    453 #define HAS_MERGEUVROW_MSA
    454 #define HAS_MIRRORROW_MSA
    455 #define HAS_MIRRORUVROW_MSA
    456 #define HAS_NV12TOARGBROW_MSA
    457 #define HAS_NV12TORGB565ROW_MSA
    458 #define HAS_NV21TOARGBROW_MSA
    459 #define HAS_RAWTOARGBROW_MSA
    460 #define HAS_RAWTORGB24ROW_MSA
    461 #define HAS_RAWTOUVROW_MSA
    462 #define HAS_RAWTOYROW_MSA
    463 #define HAS_RGB24TOARGBROW_MSA
    464 #define HAS_RGB24TOUVROW_MSA
    465 #define HAS_RGB24TOYROW_MSA
    466 #define HAS_RGB565TOARGBROW_MSA
    467 #define HAS_RGB565TOUVROW_MSA
    468 #define HAS_RGB565TOYROW_MSA
    469 #define HAS_RGBATOUVROW_MSA
    470 #define HAS_RGBATOYROW_MSA
    471 #define HAS_SETROW_MSA
    472 #define HAS_SOBELROW_MSA
    473 #define HAS_SOBELTOPLANEROW_MSA
    474 #define HAS_SOBELXROW_MSA
    475 #define HAS_SOBELXYROW_MSA
    476 #define HAS_SOBELYROW_MSA
    477 #define HAS_SPLITUVROW_MSA
    478 #define HAS_UYVYTOARGBROW_MSA
    479 #define HAS_UYVYTOUVROW_MSA
    480 #define HAS_UYVYTOYROW_MSA
    481 #define HAS_YUY2TOARGBROW_MSA
    482 #define HAS_YUY2TOUV422ROW_MSA
    483 #define HAS_YUY2TOUVROW_MSA
    484 #define HAS_YUY2TOYROW_MSA
    485 #endif
    486 
    487 #if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__)
    488 #if defined(VISUALC_HAS_AVX2)
    489 #define SIMD_ALIGNED(var) __declspec(align(32)) var
    490 #else
    491 #define SIMD_ALIGNED(var) __declspec(align(16)) var
    492 #endif
    493 typedef __declspec(align(16)) int16_t vec16[8];
    494 typedef __declspec(align(16)) int32_t vec32[4];
    495 typedef __declspec(align(16)) int8_t vec8[16];
    496 typedef __declspec(align(16)) uint16_t uvec16[8];
    497 typedef __declspec(align(16)) uint32_t uvec32[4];
    498 typedef __declspec(align(16)) uint8_t uvec8[16];
    499 typedef __declspec(align(32)) int16_t lvec16[16];
    500 typedef __declspec(align(32)) int32_t lvec32[8];
    501 typedef __declspec(align(32)) int8_t lvec8[32];
    502 typedef __declspec(align(32)) uint16_t ulvec16[16];
    503 typedef __declspec(align(32)) uint32_t ulvec32[8];
    504 typedef __declspec(align(32)) uint8_t ulvec8[32];
    505 #elif !defined(__pnacl__) && (defined(__GNUC__) || defined(__clang__))
    506 // Caveat GCC 4.2 to 4.7 have a known issue using vectors with const.
    507 #if defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)
    508 #define SIMD_ALIGNED(var) var __attribute__((aligned(32)))
    509 #else
    510 #define SIMD_ALIGNED(var) var __attribute__((aligned(16)))
    511 #endif
    512 typedef int16_t __attribute__((vector_size(16))) vec16;
    513 typedef int32_t __attribute__((vector_size(16))) vec32;
    514 typedef int8_t __attribute__((vector_size(16))) vec8;
    515 typedef uint16_t __attribute__((vector_size(16))) uvec16;
    516 typedef uint32_t __attribute__((vector_size(16))) uvec32;
    517 typedef uint8_t __attribute__((vector_size(16))) uvec8;
    518 typedef int16_t __attribute__((vector_size(32))) lvec16;
    519 typedef int32_t __attribute__((vector_size(32))) lvec32;
    520 typedef int8_t __attribute__((vector_size(32))) lvec8;
    521 typedef uint16_t __attribute__((vector_size(32))) ulvec16;
    522 typedef uint32_t __attribute__((vector_size(32))) ulvec32;
    523 typedef uint8_t __attribute__((vector_size(32))) ulvec8;
    524 #else
    525 #define SIMD_ALIGNED(var) var
    526 typedef int16_t vec16[8];
    527 typedef int32_t vec32[4];
    528 typedef int8_t vec8[16];
    529 typedef uint16_t uvec16[8];
    530 typedef uint32_t uvec32[4];
    531 typedef uint8_t uvec8[16];
    532 typedef int16_t lvec16[16];
    533 typedef int32_t lvec32[8];
    534 typedef int8_t lvec8[32];
    535 typedef uint16_t ulvec16[16];
    536 typedef uint32_t ulvec32[8];
    537 typedef uint8_t ulvec8[32];
    538 #endif
    539 
    540 #if defined(__aarch64__)
    541 // This struct is for Arm64 color conversion.
    542 struct YuvConstants {
    543   uvec16 kUVToRB;
    544   uvec16 kUVToRB2;
    545   uvec16 kUVToG;
    546   uvec16 kUVToG2;
    547   vec16 kUVBiasBGR;
    548   vec32 kYToRgb;
    549 };
    550 #elif defined(__arm__)
    551 // This struct is for ArmV7 color conversion.
    552 struct YuvConstants {
    553   uvec8 kUVToRB;
    554   uvec8 kUVToG;
    555   vec16 kUVBiasBGR;
    556   vec32 kYToRgb;
    557 };
    558 #else
    559 // This struct is for Intel color conversion.
    560 struct YuvConstants {
    561   int8_t kUVToB[32];
    562   int8_t kUVToG[32];
    563   int8_t kUVToR[32];
    564   int16_t kUVBiasB[16];
    565   int16_t kUVBiasG[16];
    566   int16_t kUVBiasR[16];
    567   int16_t kYToRgb[16];
    568 };
    569 
    570 // Offsets into YuvConstants structure
    571 #define KUVTOB 0
    572 #define KUVTOG 32
    573 #define KUVTOR 64
    574 #define KUVBIASB 96
    575 #define KUVBIASG 128
    576 #define KUVBIASR 160
    577 #define KYTORGB 192
    578 #endif
    579 
    580 // Conversion matrix for YUV to RGB
    581 extern const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants);  // BT.601
    582 extern const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants);  // JPeg
    583 extern const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants);  // BT.709
    584 
    585 // Conversion matrix for YVU to BGR
    586 extern const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants);  // BT.601
    587 extern const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants);  // JPeg
    588 extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants);  // BT.709
    589 
    590 #define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1)))
    591 
    592 #define align_buffer_64(var, size)                                           \
    593   uint8_t* var##_mem = (uint8_t*)(malloc((size) + 63));         /* NOLINT */ \
    594   uint8_t* var = (uint8_t*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */
    595 
    596 #define free_aligned_buffer_64(var) \
    597   free(var##_mem);                  \
    598   var = 0
    599 
    600 #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__)
    601 #define OMITFP
    602 #else
    603 #define OMITFP __attribute__((optimize("omit-frame-pointer")))
    604 #endif
    605 
    606 // NaCL macros for GCC x86 and x64.
    607 #if defined(__native_client__)
    608 #define LABELALIGN ".p2align 5\n"
    609 #else
    610 #define LABELALIGN
    611 #endif
    612 
    613 // Intel Code Analizer markers.  Insert IACA_START IACA_END around code to be
    614 // measured and then run with iaca -64 libyuv_unittest.
    615 // IACA_ASM_START amd IACA_ASM_END are equivalents that can be used within
    616 // inline assembly blocks.
    617 // example of iaca:
    618 // ~/iaca-lin64/bin/iaca.sh -64 -analysis LATENCY out/Release/libyuv_unittest
    619 
    620 #if defined(__x86_64__) || defined(__i386__)
    621 
    622 #define IACA_ASM_START  \
    623   ".byte 0x0F, 0x0B\n"  \
    624   " movl $111, %%ebx\n" \
    625   ".byte 0x64, 0x67, 0x90\n"
    626 
    627 #define IACA_ASM_END         \
    628   " movl $222, %%ebx\n"      \
    629   ".byte 0x64, 0x67, 0x90\n" \
    630   ".byte 0x0F, 0x0B\n"
    631 
    632 #define IACA_SSC_MARK(MARK_ID)                        \
    633   __asm__ __volatile__("\n\t  movl $" #MARK_ID        \
    634                        ", %%ebx"                      \
    635                        "\n\t  .byte 0x64, 0x67, 0x90" \
    636                        :                              \
    637                        :                              \
    638                        : "memory");
    639 
    640 #define IACA_UD_BYTES __asm__ __volatile__("\n\t .byte 0x0F, 0x0B");
    641 
    642 #else /* Visual C */
    643 #define IACA_UD_BYTES \
    644   { __asm _emit 0x0F __asm _emit 0x0B }
    645 
    646 #define IACA_SSC_MARK(x) \
    647   { __asm mov ebx, x __asm _emit 0x64 __asm _emit 0x67 __asm _emit 0x90 }
    648 
    649 #define IACA_VC64_START __writegsbyte(111, 111);
    650 #define IACA_VC64_END __writegsbyte(222, 222);
    651 #endif
    652 
    653 #define IACA_START     \
    654   {                    \
    655     IACA_UD_BYTES      \
    656     IACA_SSC_MARK(111) \
    657   }
    658 #define IACA_END       \
    659   {                    \
    660     IACA_SSC_MARK(222) \
    661     IACA_UD_BYTES      \
    662   }
    663 
    664 void I444ToARGBRow_NEON(const uint8_t* src_y,
    665                         const uint8_t* src_u,
    666                         const uint8_t* src_v,
    667                         uint8_t* dst_argb,
    668                         const struct YuvConstants* yuvconstants,
    669                         int width);
    670 void I422ToARGBRow_NEON(const uint8_t* src_y,
    671                         const uint8_t* src_u,
    672                         const uint8_t* src_v,
    673                         uint8_t* dst_argb,
    674                         const struct YuvConstants* yuvconstants,
    675                         int width);
    676 void I422AlphaToARGBRow_NEON(const uint8_t* src_y,
    677                              const uint8_t* src_u,
    678                              const uint8_t* src_v,
    679                              const uint8_t* src_a,
    680                              uint8_t* dst_argb,
    681                              const struct YuvConstants* yuvconstants,
    682                              int width);
    683 void I422ToARGBRow_NEON(const uint8_t* src_y,
    684                         const uint8_t* src_u,
    685                         const uint8_t* src_v,
    686                         uint8_t* dst_argb,
    687                         const struct YuvConstants* yuvconstants,
    688                         int width);
    689 void I422ToRGBARow_NEON(const uint8_t* src_y,
    690                         const uint8_t* src_u,
    691                         const uint8_t* src_v,
    692                         uint8_t* dst_rgba,
    693                         const struct YuvConstants* yuvconstants,
    694                         int width);
    695 void I422ToRGB24Row_NEON(const uint8_t* src_y,
    696                          const uint8_t* src_u,
    697                          const uint8_t* src_v,
    698                          uint8_t* dst_rgb24,
    699                          const struct YuvConstants* yuvconstants,
    700                          int width);
    701 void I422ToRGB565Row_NEON(const uint8_t* src_y,
    702                           const uint8_t* src_u,
    703                           const uint8_t* src_v,
    704                           uint8_t* dst_rgb565,
    705                           const struct YuvConstants* yuvconstants,
    706                           int width);
    707 void I422ToARGB1555Row_NEON(const uint8_t* src_y,
    708                             const uint8_t* src_u,
    709                             const uint8_t* src_v,
    710                             uint8_t* dst_argb1555,
    711                             const struct YuvConstants* yuvconstants,
    712                             int width);
    713 void I422ToARGB4444Row_NEON(const uint8_t* src_y,
    714                             const uint8_t* src_u,
    715                             const uint8_t* src_v,
    716                             uint8_t* dst_argb4444,
    717                             const struct YuvConstants* yuvconstants,
    718                             int width);
    719 void NV12ToARGBRow_NEON(const uint8_t* src_y,
    720                         const uint8_t* src_uv,
    721                         uint8_t* dst_argb,
    722                         const struct YuvConstants* yuvconstants,
    723                         int width);
    724 void NV12ToRGB565Row_NEON(const uint8_t* src_y,
    725                           const uint8_t* src_uv,
    726                           uint8_t* dst_rgb565,
    727                           const struct YuvConstants* yuvconstants,
    728                           int width);
    729 void NV21ToARGBRow_NEON(const uint8_t* src_y,
    730                         const uint8_t* src_vu,
    731                         uint8_t* dst_argb,
    732                         const struct YuvConstants* yuvconstants,
    733                         int width);
    734 void NV12ToRGB24Row_NEON(const uint8_t* src_y,
    735                          const uint8_t* src_uv,
    736                          uint8_t* dst_rgb24,
    737                          const struct YuvConstants* yuvconstants,
    738                          int width);
    739 void NV21ToRGB24Row_NEON(const uint8_t* src_y,
    740                          const uint8_t* src_vu,
    741                          uint8_t* dst_rgb24,
    742                          const struct YuvConstants* yuvconstants,
    743                          int width);
    744 void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2,
    745                         uint8_t* dst_argb,
    746                         const struct YuvConstants* yuvconstants,
    747                         int width);
    748 void UYVYToARGBRow_NEON(const uint8_t* src_uyvy,
    749                         uint8_t* dst_argb,
    750                         const struct YuvConstants* yuvconstants,
    751                         int width);
    752 void I444ToARGBRow_MSA(const uint8_t* src_y,
    753                        const uint8_t* src_u,
    754                        const uint8_t* src_v,
    755                        uint8_t* dst_argb,
    756                        const struct YuvConstants* yuvconstants,
    757                        int width);
    758 
    759 void I422ToARGBRow_MSA(const uint8_t* src_y,
    760                        const uint8_t* src_u,
    761                        const uint8_t* src_v,
    762                        uint8_t* dst_argb,
    763                        const struct YuvConstants* yuvconstants,
    764                        int width);
    765 void I422ToRGBARow_MSA(const uint8_t* src_y,
    766                        const uint8_t* src_u,
    767                        const uint8_t* src_v,
    768                        uint8_t* dst_argb,
    769                        const struct YuvConstants* yuvconstants,
    770                        int width);
    771 void I422AlphaToARGBRow_MSA(const uint8_t* src_y,
    772                             const uint8_t* src_u,
    773                             const uint8_t* src_v,
    774                             const uint8_t* src_a,
    775                             uint8_t* dst_argb,
    776                             const struct YuvConstants* yuvconstants,
    777                             int width);
    778 void I422ToRGB24Row_MSA(const uint8_t* src_y,
    779                         const uint8_t* src_u,
    780                         const uint8_t* src_v,
    781                         uint8_t* dst_argb,
    782                         const struct YuvConstants* yuvconstants,
    783                         int width);
    784 void I422ToRGB565Row_MSA(const uint8_t* src_y,
    785                          const uint8_t* src_u,
    786                          const uint8_t* src_v,
    787                          uint8_t* dst_rgb565,
    788                          const struct YuvConstants* yuvconstants,
    789                          int width);
    790 void I422ToARGB4444Row_MSA(const uint8_t* src_y,
    791                            const uint8_t* src_u,
    792                            const uint8_t* src_v,
    793                            uint8_t* dst_argb4444,
    794                            const struct YuvConstants* yuvconstants,
    795                            int width);
    796 void I422ToARGB1555Row_MSA(const uint8_t* src_y,
    797                            const uint8_t* src_u,
    798                            const uint8_t* src_v,
    799                            uint8_t* dst_argb1555,
    800                            const struct YuvConstants* yuvconstants,
    801                            int width);
    802 void NV12ToARGBRow_MSA(const uint8_t* src_y,
    803                        const uint8_t* src_uv,
    804                        uint8_t* dst_argb,
    805                        const struct YuvConstants* yuvconstants,
    806                        int width);
    807 void NV12ToRGB565Row_MSA(const uint8_t* src_y,
    808                          const uint8_t* src_uv,
    809                          uint8_t* dst_rgb565,
    810                          const struct YuvConstants* yuvconstants,
    811                          int width);
    812 void NV21ToARGBRow_MSA(const uint8_t* src_y,
    813                        const uint8_t* src_vu,
    814                        uint8_t* dst_argb,
    815                        const struct YuvConstants* yuvconstants,
    816                        int width);
    817 void YUY2ToARGBRow_MSA(const uint8_t* src_yuy2,
    818                        uint8_t* dst_argb,
    819                        const struct YuvConstants* yuvconstants,
    820                        int width);
    821 void UYVYToARGBRow_MSA(const uint8_t* src_uyvy,
    822                        uint8_t* dst_argb,
    823                        const struct YuvConstants* yuvconstants,
    824                        int width);
    825 
    826 void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width);
    827 void ARGBToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
    828 void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width);
    829 void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width);
    830 void ARGBToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
    831 void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width);
    832 void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width);
    833 void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width);
    834 void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width);
    835 void RGB24ToYRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
    836 void RAWToYRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width);
    837 void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
    838 void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width);
    839 void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
    840 void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
    841 void ARGBToUV444Row_NEON(const uint8_t* src_argb,
    842                          uint8_t* dst_u,
    843                          uint8_t* dst_v,
    844                          int width);
    845 void ARGBToUVRow_NEON(const uint8_t* src_argb,
    846                       int src_stride_argb,
    847                       uint8_t* dst_u,
    848                       uint8_t* dst_v,
    849                       int width);
    850 void ARGBToUV444Row_MSA(const uint8_t* src_argb,
    851                         uint8_t* dst_u,
    852                         uint8_t* dst_v,
    853                         int width);
    854 void ARGBToUVRow_MSA(const uint8_t* src_argb0,
    855                      int src_stride_argb,
    856                      uint8_t* dst_u,
    857                      uint8_t* dst_v,
    858                      int width);
    859 void ARGBToUVJRow_NEON(const uint8_t* src_argb,
    860                        int src_stride_argb,
    861                        uint8_t* dst_u,
    862                        uint8_t* dst_v,
    863                        int width);
    864 void BGRAToUVRow_NEON(const uint8_t* src_bgra,
    865                       int src_stride_bgra,
    866                       uint8_t* dst_u,
    867                       uint8_t* dst_v,
    868                       int width);
    869 void ABGRToUVRow_NEON(const uint8_t* src_abgr,
    870                       int src_stride_abgr,
    871                       uint8_t* dst_u,
    872                       uint8_t* dst_v,
    873                       int width);
    874 void RGBAToUVRow_NEON(const uint8_t* src_rgba,
    875                       int src_stride_rgba,
    876                       uint8_t* dst_u,
    877                       uint8_t* dst_v,
    878                       int width);
    879 void RGB24ToUVRow_NEON(const uint8_t* src_rgb24,
    880                        int src_stride_rgb24,
    881                        uint8_t* dst_u,
    882                        uint8_t* dst_v,
    883                        int width);
    884 void RAWToUVRow_NEON(const uint8_t* src_raw,
    885                      int src_stride_raw,
    886                      uint8_t* dst_u,
    887                      uint8_t* dst_v,
    888                      int width);
    889 void RGB565ToUVRow_NEON(const uint8_t* src_rgb565,
    890                         int src_stride_rgb565,
    891                         uint8_t* dst_u,
    892                         uint8_t* dst_v,
    893                         int width);
    894 void ARGB1555ToUVRow_NEON(const uint8_t* src_argb1555,
    895                           int src_stride_argb1555,
    896                           uint8_t* dst_u,
    897                           uint8_t* dst_v,
    898                           int width);
    899 void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444,
    900                           int src_stride_argb4444,
    901                           uint8_t* dst_u,
    902                           uint8_t* dst_v,
    903                           int width);
    904 void ARGBToUVJRow_MSA(const uint8_t* src_rgb0,
    905                       int src_stride_rgb,
    906                       uint8_t* dst_u,
    907                       uint8_t* dst_v,
    908                       int width);
    909 void BGRAToUVRow_MSA(const uint8_t* src_rgb0,
    910                      int src_stride_rgb,
    911                      uint8_t* dst_u,
    912                      uint8_t* dst_v,
    913                      int width);
    914 void ABGRToUVRow_MSA(const uint8_t* src_rgb0,
    915                      int src_stride_rgb,
    916                      uint8_t* dst_u,
    917                      uint8_t* dst_v,
    918                      int width);
    919 void RGBAToUVRow_MSA(const uint8_t* src_rgb0,
    920                      int src_stride_rgb,
    921                      uint8_t* dst_u,
    922                      uint8_t* dst_v,
    923                      int width);
    924 void RGB24ToUVRow_MSA(const uint8_t* src_rgb0,
    925                       int src_stride_rgb,
    926                       uint8_t* dst_u,
    927                       uint8_t* dst_v,
    928                       int width);
    929 void RAWToUVRow_MSA(const uint8_t* src_rgb0,
    930                     int src_stride_rgb,
    931                     uint8_t* dst_u,
    932                     uint8_t* dst_v,
    933                     int width);
    934 void RGB565ToUVRow_MSA(const uint8_t* src_rgb565,
    935                        int src_stride_rgb565,
    936                        uint8_t* dst_u,
    937                        uint8_t* dst_v,
    938                        int width);
    939 void ARGB1555ToUVRow_MSA(const uint8_t* src_argb1555,
    940                          int src_stride_argb1555,
    941                          uint8_t* dst_u,
    942                          uint8_t* dst_v,
    943                          int width);
    944 void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width);
    945 void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width);
    946 void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width);
    947 void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
    948 void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width);
    949 void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
    950 void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555,
    951                          uint8_t* dst_y,
    952                          int width);
    953 void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444,
    954                          uint8_t* dst_y,
    955                          int width);
    956 void BGRAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
    957 void ABGRToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
    958 void RGBAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
    959 void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
    960 void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width);
    961 void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
    962 void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
    963 void ARGBToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
    964 void ARGBToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
    965 void BGRAToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
    966 void ABGRToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
    967 void RGBAToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
    968 void RGB24ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
    969 void RAWToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width);
    970 void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width);
    971 void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width);
    972 void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width);
    973 void ARGBToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
    974 void ARGBToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
    975 void BGRAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
    976 void ABGRToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
    977 void RGBAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
    978 void RGB24ToYRow_Any_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width);
    979 void RAWToYRow_Any_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width);
    980 void ARGBToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
    981 void ARGBToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
    982 void BGRAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
    983 void ABGRToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
    984 void RGBAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
    985 void RGB24ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
    986 void RAWToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
    987 void RGB565ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
    988 void ARGB1555ToYRow_Any_NEON(const uint8_t* src_ptr,
    989                              uint8_t* dst_ptr,
    990                              int width);
    991 void ARGB4444ToYRow_Any_NEON(const uint8_t* src_ptr,
    992                              uint8_t* dst_ptr,
    993                              int width);
    994 void BGRAToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
    995 void ABGRToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
    996 void RGBAToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
    997 void ARGBToYJRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
    998 void ARGBToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
    999 void RGB24ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   1000 void RAWToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   1001 void RGB565ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   1002 void ARGB1555ToYRow_Any_MSA(const uint8_t* src_ptr,
   1003                             uint8_t* dst_ptr,
   1004                             int width);
   1005 
   1006 void ARGBToUVRow_AVX2(const uint8_t* src_argb0,
   1007                       int src_stride_argb,
   1008                       uint8_t* dst_u,
   1009                       uint8_t* dst_v,
   1010                       int width);
   1011 void ARGBToUVJRow_AVX2(const uint8_t* src_argb0,
   1012                        int src_stride_argb,
   1013                        uint8_t* dst_u,
   1014                        uint8_t* dst_v,
   1015                        int width);
   1016 void ARGBToUVRow_SSSE3(const uint8_t* src_argb0,
   1017                        int src_stride_argb,
   1018                        uint8_t* dst_u,
   1019                        uint8_t* dst_v,
   1020                        int width);
   1021 void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0,
   1022                         int src_stride_argb,
   1023                         uint8_t* dst_u,
   1024                         uint8_t* dst_v,
   1025                         int width);
   1026 void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0,
   1027                        int src_stride_bgra,
   1028                        uint8_t* dst_u,
   1029                        uint8_t* dst_v,
   1030                        int width);
   1031 void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0,
   1032                        int src_stride_abgr,
   1033                        uint8_t* dst_u,
   1034                        uint8_t* dst_v,
   1035                        int width);
   1036 void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0,
   1037                        int src_stride_rgba,
   1038                        uint8_t* dst_u,
   1039                        uint8_t* dst_v,
   1040                        int width);
   1041 void ARGBToUVRow_Any_AVX2(const uint8_t* src_ptr,
   1042                           int src_stride_ptr,
   1043                           uint8_t* dst_u,
   1044                           uint8_t* dst_v,
   1045                           int width);
   1046 void ARGBToUVJRow_Any_AVX2(const uint8_t* src_ptr,
   1047                            int src_stride_ptr,
   1048                            uint8_t* dst_u,
   1049                            uint8_t* dst_v,
   1050                            int width);
   1051 void ARGBToUVRow_Any_SSSE3(const uint8_t* src_ptr,
   1052                            int src_stride_ptr,
   1053                            uint8_t* dst_u,
   1054                            uint8_t* dst_v,
   1055                            int width);
   1056 void ARGBToUVJRow_Any_SSSE3(const uint8_t* src_ptr,
   1057                             int src_stride_ptr,
   1058                             uint8_t* dst_u,
   1059                             uint8_t* dst_v,
   1060                             int width);
   1061 void BGRAToUVRow_Any_SSSE3(const uint8_t* src_ptr,
   1062                            int src_stride_ptr,
   1063                            uint8_t* dst_u,
   1064                            uint8_t* dst_v,
   1065                            int width);
   1066 void ABGRToUVRow_Any_SSSE3(const uint8_t* src_ptr,
   1067                            int src_stride_ptr,
   1068                            uint8_t* dst_u,
   1069                            uint8_t* dst_v,
   1070                            int width);
   1071 void RGBAToUVRow_Any_SSSE3(const uint8_t* src_ptr,
   1072                            int src_stride_ptr,
   1073                            uint8_t* dst_u,
   1074                            uint8_t* dst_v,
   1075                            int width);
   1076 void ARGBToUV444Row_Any_NEON(const uint8_t* src_ptr,
   1077                              uint8_t* dst_u,
   1078                              uint8_t* dst_v,
   1079                              int width);
   1080 void ARGBToUVRow_Any_NEON(const uint8_t* src_ptr,
   1081                           int src_stride_ptr,
   1082                           uint8_t* dst_u,
   1083                           uint8_t* dst_v,
   1084                           int width);
   1085 void ARGBToUV444Row_Any_MSA(const uint8_t* src_ptr,
   1086                             uint8_t* dst_u,
   1087                             uint8_t* dst_v,
   1088                             int width);
   1089 void ARGBToUVRow_Any_MSA(const uint8_t* src_ptr,
   1090                          int src_stride_ptr,
   1091                          uint8_t* dst_u,
   1092                          uint8_t* dst_v,
   1093                          int width);
   1094 void ARGBToUVJRow_Any_NEON(const uint8_t* src_ptr,
   1095                            int src_stride_ptr,
   1096                            uint8_t* dst_u,
   1097                            uint8_t* dst_v,
   1098                            int width);
   1099 void BGRAToUVRow_Any_NEON(const uint8_t* src_ptr,
   1100                           int src_stride_ptr,
   1101                           uint8_t* dst_u,
   1102                           uint8_t* dst_v,
   1103                           int width);
   1104 void ABGRToUVRow_Any_NEON(const uint8_t* src_ptr,
   1105                           int src_stride_ptr,
   1106                           uint8_t* dst_u,
   1107                           uint8_t* dst_v,
   1108                           int width);
   1109 void RGBAToUVRow_Any_NEON(const uint8_t* src_ptr,
   1110                           int src_stride_ptr,
   1111                           uint8_t* dst_u,
   1112                           uint8_t* dst_v,
   1113                           int width);
   1114 void RGB24ToUVRow_Any_NEON(const uint8_t* src_ptr,
   1115                            int src_stride_ptr,
   1116                            uint8_t* dst_u,
   1117                            uint8_t* dst_v,
   1118                            int width);
   1119 void RAWToUVRow_Any_NEON(const uint8_t* src_ptr,
   1120                          int src_stride_ptr,
   1121                          uint8_t* dst_u,
   1122                          uint8_t* dst_v,
   1123                          int width);
   1124 void RGB565ToUVRow_Any_NEON(const uint8_t* src_ptr,
   1125                             int src_stride_ptr,
   1126                             uint8_t* dst_u,
   1127                             uint8_t* dst_v,
   1128                             int width);
   1129 void ARGB1555ToUVRow_Any_NEON(const uint8_t* src_ptr,
   1130                               int src_stride_ptr,
   1131                               uint8_t* dst_u,
   1132                               uint8_t* dst_v,
   1133                               int width);
   1134 void ARGB4444ToUVRow_Any_NEON(const uint8_t* src_ptr,
   1135                               int src_stride_ptr,
   1136                               uint8_t* dst_u,
   1137                               uint8_t* dst_v,
   1138                               int width);
   1139 void ARGBToUVJRow_Any_MSA(const uint8_t* src_ptr,
   1140                           int src_stride_ptr,
   1141                           uint8_t* dst_u,
   1142                           uint8_t* dst_v,
   1143                           int width);
   1144 void BGRAToUVRow_Any_MSA(const uint8_t* src_ptr,
   1145                          int src_stride_ptr,
   1146                          uint8_t* dst_u,
   1147                          uint8_t* dst_v,
   1148                          int width);
   1149 void ABGRToUVRow_Any_MSA(const uint8_t* src_ptr,
   1150                          int src_stride_ptr,
   1151                          uint8_t* dst_u,
   1152                          uint8_t* dst_v,
   1153                          int width);
   1154 void RGBAToUVRow_Any_MSA(const uint8_t* src_ptr,
   1155                          int src_stride_ptr,
   1156                          uint8_t* dst_u,
   1157                          uint8_t* dst_v,
   1158                          int width);
   1159 void RGB24ToUVRow_Any_MSA(const uint8_t* src_ptr,
   1160                           int src_stride_ptr,
   1161                           uint8_t* dst_u,
   1162                           uint8_t* dst_v,
   1163                           int width);
   1164 void RAWToUVRow_Any_MSA(const uint8_t* src_ptr,
   1165                         int src_stride_ptr,
   1166                         uint8_t* dst_u,
   1167                         uint8_t* dst_v,
   1168                         int width);
   1169 void RGB565ToUVRow_Any_MSA(const uint8_t* src_ptr,
   1170                            int src_stride_ptr,
   1171                            uint8_t* dst_u,
   1172                            uint8_t* dst_v,
   1173                            int width);
   1174 void ARGB1555ToUVRow_Any_MSA(const uint8_t* src_ptr,
   1175                              int src_stride_ptr,
   1176                              uint8_t* dst_u,
   1177                              uint8_t* dst_v,
   1178                              int width);
   1179 void ARGBToUVRow_C(const uint8_t* src_rgb0,
   1180                    int src_stride_rgb,
   1181                    uint8_t* dst_u,
   1182                    uint8_t* dst_v,
   1183                    int width);
   1184 void ARGBToUVJRow_C(const uint8_t* src_rgb0,
   1185                     int src_stride_rgb,
   1186                     uint8_t* dst_u,
   1187                     uint8_t* dst_v,
   1188                     int width);
   1189 void ARGBToUVRow_C(const uint8_t* src_rgb0,
   1190                    int src_stride_rgb,
   1191                    uint8_t* dst_u,
   1192                    uint8_t* dst_v,
   1193                    int width);
   1194 void ARGBToUVJRow_C(const uint8_t* src_rgb0,
   1195                     int src_stride_rgb,
   1196                     uint8_t* dst_u,
   1197                     uint8_t* dst_v,
   1198                     int width);
   1199 void BGRAToUVRow_C(const uint8_t* src_rgb0,
   1200                    int src_stride_rgb,
   1201                    uint8_t* dst_u,
   1202                    uint8_t* dst_v,
   1203                    int width);
   1204 void ABGRToUVRow_C(const uint8_t* src_rgb0,
   1205                    int src_stride_rgb,
   1206                    uint8_t* dst_u,
   1207                    uint8_t* dst_v,
   1208                    int width);
   1209 void RGBAToUVRow_C(const uint8_t* src_rgb0,
   1210                    int src_stride_rgb,
   1211                    uint8_t* dst_u,
   1212                    uint8_t* dst_v,
   1213                    int width);
   1214 void RGB24ToUVRow_C(const uint8_t* src_rgb0,
   1215                     int src_stride_rgb,
   1216                     uint8_t* dst_u,
   1217                     uint8_t* dst_v,
   1218                     int width);
   1219 void RAWToUVRow_C(const uint8_t* src_rgb0,
   1220                   int src_stride_rgb,
   1221                   uint8_t* dst_u,
   1222                   uint8_t* dst_v,
   1223                   int width);
   1224 void RGB565ToUVRow_C(const uint8_t* src_rgb565,
   1225                      int src_stride_rgb565,
   1226                      uint8_t* dst_u,
   1227                      uint8_t* dst_v,
   1228                      int width);
   1229 void ARGB1555ToUVRow_C(const uint8_t* src_argb1555,
   1230                        int src_stride_argb1555,
   1231                        uint8_t* dst_u,
   1232                        uint8_t* dst_v,
   1233                        int width);
   1234 void ARGB4444ToUVRow_C(const uint8_t* src_argb4444,
   1235                        int src_stride_argb4444,
   1236                        uint8_t* dst_u,
   1237                        uint8_t* dst_v,
   1238                        int width);
   1239 
   1240 void ARGBToUV444Row_SSSE3(const uint8_t* src_argb,
   1241                           uint8_t* dst_u,
   1242                           uint8_t* dst_v,
   1243                           int width);
   1244 void ARGBToUV444Row_Any_SSSE3(const uint8_t* src_ptr,
   1245                               uint8_t* dst_u,
   1246                               uint8_t* dst_v,
   1247                               int width);
   1248 
   1249 void ARGBToUV444Row_C(const uint8_t* src_argb,
   1250                       uint8_t* dst_u,
   1251                       uint8_t* dst_v,
   1252                       int width);
   1253 
   1254 void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
   1255 void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width);
   1256 void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);
   1257 void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width);
   1258 void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
   1259 void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   1260 void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   1261 void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width);
   1262 void MirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   1263 void MirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   1264 
   1265 void MirrorUVRow_SSSE3(const uint8_t* src,
   1266                        uint8_t* dst_u,
   1267                        uint8_t* dst_v,
   1268                        int width);
   1269 void MirrorUVRow_NEON(const uint8_t* src_uv,
   1270                       uint8_t* dst_u,
   1271                       uint8_t* dst_v,
   1272                       int width);
   1273 void MirrorUVRow_MSA(const uint8_t* src_uv,
   1274                      uint8_t* dst_u,
   1275                      uint8_t* dst_v,
   1276                      int width);
   1277 void MirrorUVRow_C(const uint8_t* src_uv,
   1278                    uint8_t* dst_u,
   1279                    uint8_t* dst_v,
   1280                    int width);
   1281 
   1282 void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
   1283 void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
   1284 void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width);
   1285 void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width);
   1286 void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width);
   1287 void ARGBMirrorRow_Any_AVX2(const uint8_t* src_ptr,
   1288                             uint8_t* dst_ptr,
   1289                             int width);
   1290 void ARGBMirrorRow_Any_SSE2(const uint8_t* src_ptr,
   1291                             uint8_t* dst_ptr,
   1292                             int width);
   1293 void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr,
   1294                             uint8_t* dst_ptr,
   1295                             int width);
   1296 void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   1297 
   1298 void SplitUVRow_C(const uint8_t* src_uv,
   1299                   uint8_t* dst_u,
   1300                   uint8_t* dst_v,
   1301                   int width);
   1302 void SplitUVRow_SSE2(const uint8_t* src_uv,
   1303                      uint8_t* dst_u,
   1304                      uint8_t* dst_v,
   1305                      int width);
   1306 void SplitUVRow_AVX2(const uint8_t* src_uv,
   1307                      uint8_t* dst_u,
   1308                      uint8_t* dst_v,
   1309                      int width);
   1310 void SplitUVRow_NEON(const uint8_t* src_uv,
   1311                      uint8_t* dst_u,
   1312                      uint8_t* dst_v,
   1313                      int width);
   1314 void SplitUVRow_MSA(const uint8_t* src_uv,
   1315                     uint8_t* dst_u,
   1316                     uint8_t* dst_v,
   1317                     int width);
   1318 void SplitUVRow_Any_SSE2(const uint8_t* src_ptr,
   1319                          uint8_t* dst_u,
   1320                          uint8_t* dst_v,
   1321                          int width);
   1322 void SplitUVRow_Any_AVX2(const uint8_t* src_ptr,
   1323                          uint8_t* dst_u,
   1324                          uint8_t* dst_v,
   1325                          int width);
   1326 void SplitUVRow_Any_NEON(const uint8_t* src_ptr,
   1327                          uint8_t* dst_u,
   1328                          uint8_t* dst_v,
   1329                          int width);
   1330 void SplitUVRow_Any_MSA(const uint8_t* src_ptr,
   1331                         uint8_t* dst_u,
   1332                         uint8_t* dst_v,
   1333                         int width);
   1334 
   1335 void MergeUVRow_C(const uint8_t* src_u,
   1336                   const uint8_t* src_v,
   1337                   uint8_t* dst_uv,
   1338                   int width);
   1339 void MergeUVRow_SSE2(const uint8_t* src_u,
   1340                      const uint8_t* src_v,
   1341                      uint8_t* dst_uv,
   1342                      int width);
   1343 void MergeUVRow_AVX2(const uint8_t* src_u,
   1344                      const uint8_t* src_v,
   1345                      uint8_t* dst_uv,
   1346                      int width);
   1347 void MergeUVRow_NEON(const uint8_t* src_u,
   1348                      const uint8_t* src_v,
   1349                      uint8_t* dst_uv,
   1350                      int width);
   1351 void MergeUVRow_MSA(const uint8_t* src_u,
   1352                     const uint8_t* src_v,
   1353                     uint8_t* dst_uv,
   1354                     int width);
   1355 void MergeUVRow_Any_SSE2(const uint8_t* y_buf,
   1356                          const uint8_t* uv_buf,
   1357                          uint8_t* dst_ptr,
   1358                          int width);
   1359 void MergeUVRow_Any_AVX2(const uint8_t* y_buf,
   1360                          const uint8_t* uv_buf,
   1361                          uint8_t* dst_ptr,
   1362                          int width);
   1363 void MergeUVRow_Any_NEON(const uint8_t* y_buf,
   1364                          const uint8_t* uv_buf,
   1365                          uint8_t* dst_ptr,
   1366                          int width);
   1367 void MergeUVRow_Any_MSA(const uint8_t* y_buf,
   1368                         const uint8_t* uv_buf,
   1369                         uint8_t* dst_ptr,
   1370                         int width);
   1371 
   1372 void SplitRGBRow_C(const uint8_t* src_rgb,
   1373                    uint8_t* dst_r,
   1374                    uint8_t* dst_g,
   1375                    uint8_t* dst_b,
   1376                    int width);
   1377 void SplitRGBRow_SSSE3(const uint8_t* src_rgb,
   1378                        uint8_t* dst_r,
   1379                        uint8_t* dst_g,
   1380                        uint8_t* dst_b,
   1381                        int width);
   1382 void SplitRGBRow_NEON(const uint8_t* src_rgb,
   1383                       uint8_t* dst_r,
   1384                       uint8_t* dst_g,
   1385                       uint8_t* dst_b,
   1386                       int width);
   1387 void SplitRGBRow_Any_SSSE3(const uint8_t* src_ptr,
   1388                            uint8_t* dst_r,
   1389                            uint8_t* dst_g,
   1390                            uint8_t* dst_b,
   1391                            int width);
   1392 void SplitRGBRow_Any_NEON(const uint8_t* src_ptr,
   1393                           uint8_t* dst_r,
   1394                           uint8_t* dst_g,
   1395                           uint8_t* dst_b,
   1396                           int width);
   1397 
   1398 void MergeRGBRow_C(const uint8_t* src_r,
   1399                    const uint8_t* src_g,
   1400                    const uint8_t* src_b,
   1401                    uint8_t* dst_rgb,
   1402                    int width);
   1403 void MergeRGBRow_SSSE3(const uint8_t* src_r,
   1404                        const uint8_t* src_g,
   1405                        const uint8_t* src_b,
   1406                        uint8_t* dst_rgb,
   1407                        int width);
   1408 void MergeRGBRow_NEON(const uint8_t* src_r,
   1409                       const uint8_t* src_g,
   1410                       const uint8_t* src_b,
   1411                       uint8_t* dst_rgb,
   1412                       int width);
   1413 void MergeRGBRow_Any_SSSE3(const uint8_t* y_buf,
   1414                            const uint8_t* u_buf,
   1415                            const uint8_t* v_buf,
   1416                            uint8_t* dst_ptr,
   1417                            int width);
   1418 void MergeRGBRow_Any_NEON(const uint8_t* src_r,
   1419                           const uint8_t* src_g,
   1420                           const uint8_t* src_b,
   1421                           uint8_t* dst_rgb,
   1422                           int width);
   1423 
   1424 void MergeUVRow_16_C(const uint16_t* src_u,
   1425                      const uint16_t* src_v,
   1426                      uint16_t* dst_uv,
   1427                      int scale, /* 64 for 10 bit */
   1428                      int width);
   1429 void MergeUVRow_16_AVX2(const uint16_t* src_u,
   1430                         const uint16_t* src_v,
   1431                         uint16_t* dst_uv,
   1432                         int scale,
   1433                         int width);
   1434 
   1435 void MultiplyRow_16_AVX2(const uint16_t* src_y,
   1436                          uint16_t* dst_y,
   1437                          int scale,
   1438                          int width);
   1439 void MultiplyRow_16_C(const uint16_t* src_y,
   1440                       uint16_t* dst_y,
   1441                       int scale,
   1442                       int width);
   1443 
   1444 void Convert8To16Row_C(const uint8_t* src_y,
   1445                        uint16_t* dst_y,
   1446                        int scale,
   1447                        int width);
   1448 void Convert8To16Row_SSE2(const uint8_t* src_y,
   1449                           uint16_t* dst_y,
   1450                           int scale,
   1451                           int width);
   1452 void Convert8To16Row_AVX2(const uint8_t* src_y,
   1453                           uint16_t* dst_y,
   1454                           int scale,
   1455                           int width);
   1456 void Convert8To16Row_Any_SSE2(const uint8_t* src_ptr,
   1457                               uint16_t* dst_ptr,
   1458                               int scale,
   1459                               int width);
   1460 void Convert8To16Row_Any_AVX2(const uint8_t* src_ptr,
   1461                               uint16_t* dst_ptr,
   1462                               int scale,
   1463                               int width);
   1464 
   1465 void Convert16To8Row_C(const uint16_t* src_y,
   1466                        uint8_t* dst_y,
   1467                        int scale,
   1468                        int width);
   1469 void Convert16To8Row_SSSE3(const uint16_t* src_y,
   1470                            uint8_t* dst_y,
   1471                            int scale,
   1472                            int width);
   1473 void Convert16To8Row_AVX2(const uint16_t* src_y,
   1474                           uint8_t* dst_y,
   1475                           int scale,
   1476                           int width);
   1477 void Convert16To8Row_Any_SSSE3(const uint16_t* src_ptr,
   1478                                uint8_t* dst_ptr,
   1479                                int scale,
   1480                                int width);
   1481 void Convert16To8Row_Any_AVX2(const uint16_t* src_ptr,
   1482                               uint8_t* dst_ptr,
   1483                               int scale,
   1484                               int width);
   1485 
   1486 void CopyRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
   1487 void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width);
   1488 void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width);
   1489 void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width);
   1490 void CopyRow_MIPS(const uint8_t* src, uint8_t* dst, int count);
   1491 void CopyRow_C(const uint8_t* src, uint8_t* dst, int count);
   1492 void CopyRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   1493 void CopyRow_Any_AVX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   1494 void CopyRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   1495 
   1496 void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count);
   1497 
   1498 void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width);
   1499 void ARGBCopyAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
   1500 void ARGBCopyAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
   1501 void ARGBCopyAlphaRow_Any_SSE2(const uint8_t* src_ptr,
   1502                                uint8_t* dst_ptr,
   1503                                int width);
   1504 void ARGBCopyAlphaRow_Any_AVX2(const uint8_t* src_ptr,
   1505                                uint8_t* dst_ptr,
   1506                                int width);
   1507 
   1508 void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width);
   1509 void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb,
   1510                               uint8_t* dst_a,
   1511                               int width);
   1512 void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb,
   1513                               uint8_t* dst_a,
   1514                               int width);
   1515 void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb,
   1516                               uint8_t* dst_a,
   1517                               int width);
   1518 void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb,
   1519                              uint8_t* dst_a,
   1520                              int width);
   1521 void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_ptr,
   1522                                   uint8_t* dst_ptr,
   1523                                   int width);
   1524 void ARGBExtractAlphaRow_Any_AVX2(const uint8_t* src_ptr,
   1525                                   uint8_t* dst_ptr,
   1526                                   int width);
   1527 void ARGBExtractAlphaRow_Any_NEON(const uint8_t* src_ptr,
   1528                                   uint8_t* dst_ptr,
   1529                                   int width);
   1530 void ARGBExtractAlphaRow_Any_MSA(const uint8_t* src_ptr,
   1531                                  uint8_t* dst_ptr,
   1532                                  int width);
   1533 
   1534 void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width);
   1535 void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
   1536 void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
   1537 void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_ptr,
   1538                                   uint8_t* dst_ptr,
   1539                                   int width);
   1540 void ARGBCopyYToAlphaRow_Any_AVX2(const uint8_t* src_ptr,
   1541                                   uint8_t* dst_ptr,
   1542                                   int width);
   1543 
   1544 void SetRow_C(uint8_t* dst, uint8_t v8, int width);
   1545 void SetRow_MSA(uint8_t* dst, uint8_t v8, int width);
   1546 void SetRow_X86(uint8_t* dst, uint8_t v8, int width);
   1547 void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width);
   1548 void SetRow_NEON(uint8_t* dst, uint8_t v8, int width);
   1549 void SetRow_Any_X86(uint8_t* dst_ptr, uint8_t v32, int width);
   1550 void SetRow_Any_NEON(uint8_t* dst_ptr, uint8_t v32, int width);
   1551 
   1552 void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width);
   1553 void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int width);
   1554 void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width);
   1555 void ARGBSetRow_Any_NEON(uint8_t* dst_ptr, uint32_t v32, int width);
   1556 void ARGBSetRow_MSA(uint8_t* dst_argb, uint32_t v32, int width);
   1557 void ARGBSetRow_Any_MSA(uint8_t* dst_ptr, uint32_t v32, int width);
   1558 
   1559 // ARGBShufflers for BGRAToARGB etc.
   1560 void ARGBShuffleRow_C(const uint8_t* src_argb,
   1561                       uint8_t* dst_argb,
   1562                       const uint8_t* shuffler,
   1563                       int width);
   1564 void ARGBShuffleRow_SSSE3(const uint8_t* src_argb,
   1565                           uint8_t* dst_argb,
   1566                           const uint8_t* shuffler,
   1567                           int width);
   1568 void ARGBShuffleRow_AVX2(const uint8_t* src_argb,
   1569                          uint8_t* dst_argb,
   1570                          const uint8_t* shuffler,
   1571                          int width);
   1572 void ARGBShuffleRow_NEON(const uint8_t* src_argb,
   1573                          uint8_t* dst_argb,
   1574                          const uint8_t* shuffler,
   1575                          int width);
   1576 void ARGBShuffleRow_MSA(const uint8_t* src_argb,
   1577                         uint8_t* dst_argb,
   1578                         const uint8_t* shuffler,
   1579                         int width);
   1580 void ARGBShuffleRow_Any_SSSE3(const uint8_t* src_ptr,
   1581                               uint8_t* dst_ptr,
   1582                               const uint8_t* param,
   1583                               int width);
   1584 void ARGBShuffleRow_Any_AVX2(const uint8_t* src_ptr,
   1585                              uint8_t* dst_ptr,
   1586                              const uint8_t* param,
   1587                              int width);
   1588 void ARGBShuffleRow_Any_NEON(const uint8_t* src_ptr,
   1589                              uint8_t* dst_ptr,
   1590                              const uint8_t* param,
   1591                              int width);
   1592 void ARGBShuffleRow_Any_MSA(const uint8_t* src_ptr,
   1593                             uint8_t* dst_ptr,
   1594                             const uint8_t* param,
   1595                             int width);
   1596 
   1597 void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24,
   1598                           uint8_t* dst_argb,
   1599                           int width);
   1600 void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width);
   1601 void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
   1602 void RGB565ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
   1603 void ARGB1555ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
   1604 void ARGB4444ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width);
   1605 void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565,
   1606                           uint8_t* dst_argb,
   1607                           int width);
   1608 void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555,
   1609                             uint8_t* dst_argb,
   1610                             int width);
   1611 void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444,
   1612                             uint8_t* dst_argb,
   1613                             int width);
   1614 
   1615 void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24,
   1616                          uint8_t* dst_argb,
   1617                          int width);
   1618 void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
   1619 void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width);
   1620 void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width);
   1621 void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
   1622 void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
   1623 void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565,
   1624                           uint8_t* dst_argb,
   1625                           int width);
   1626 void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565,
   1627                          uint8_t* dst_argb,
   1628                          int width);
   1629 void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555,
   1630                             uint8_t* dst_argb,
   1631                             int width);
   1632 void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555,
   1633                            uint8_t* dst_argb,
   1634                            int width);
   1635 void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444,
   1636                             uint8_t* dst_argb,
   1637                             int width);
   1638 void ARGB4444ToARGBRow_MSA(const uint8_t* src_argb4444,
   1639                            uint8_t* dst_argb,
   1640                            int width);
   1641 void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width);
   1642 void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width);
   1643 void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width);
   1644 void RGB565ToARGBRow_C(const uint8_t* src_rgb565, uint8_t* dst_argb, int width);
   1645 void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555,
   1646                          uint8_t* dst_argb,
   1647                          int width);
   1648 void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444,
   1649                          uint8_t* dst_argb,
   1650                          int width);
   1651 void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width);
   1652 void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width);
   1653 void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width);
   1654 void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width);
   1655 
   1656 void RGB24ToARGBRow_Any_SSSE3(const uint8_t* src_ptr,
   1657                               uint8_t* dst_ptr,
   1658                               int width);
   1659 void RAWToARGBRow_Any_SSSE3(const uint8_t* src_ptr,
   1660                             uint8_t* dst_ptr,
   1661                             int width);
   1662 void RAWToRGB24Row_Any_SSSE3(const uint8_t* src_ptr,
   1663                              uint8_t* dst_ptr,
   1664                              int width);
   1665 
   1666 void RGB565ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
   1667                               uint8_t* dst_ptr,
   1668                               int width);
   1669 void ARGB1555ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
   1670                                 uint8_t* dst_ptr,
   1671                                 int width);
   1672 void ARGB4444ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
   1673                                 uint8_t* dst_ptr,
   1674                                 int width);
   1675 void RGB565ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
   1676                               uint8_t* dst_ptr,
   1677                               int width);
   1678 void ARGB1555ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
   1679                                 uint8_t* dst_ptr,
   1680                                 int width);
   1681 void ARGB4444ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
   1682                                 uint8_t* dst_ptr,
   1683                                 int width);
   1684 
   1685 void RGB24ToARGBRow_Any_NEON(const uint8_t* src_ptr,
   1686                              uint8_t* dst_ptr,
   1687                              int width);
   1688 void RGB24ToARGBRow_Any_MSA(const uint8_t* src_ptr,
   1689                             uint8_t* dst_ptr,
   1690                             int width);
   1691 void RAWToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   1692 void RAWToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   1693 void RAWToRGB24Row_Any_NEON(const uint8_t* src_ptr,
   1694                             uint8_t* dst_ptr,
   1695                             int width);
   1696 void RAWToRGB24Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   1697 void RGB565ToARGBRow_Any_NEON(const uint8_t* src_ptr,
   1698                               uint8_t* dst_ptr,
   1699                               int width);
   1700 void RGB565ToARGBRow_Any_MSA(const uint8_t* src_ptr,
   1701                              uint8_t* dst_ptr,
   1702                              int width);
   1703 void ARGB1555ToARGBRow_Any_NEON(const uint8_t* src_ptr,
   1704                                 uint8_t* dst_ptr,
   1705                                 int width);
   1706 void ARGB1555ToARGBRow_Any_MSA(const uint8_t* src_ptr,
   1707                                uint8_t* dst_ptr,
   1708                                int width);
   1709 void ARGB4444ToARGBRow_Any_NEON(const uint8_t* src_ptr,
   1710                                 uint8_t* dst_ptr,
   1711                                 int width);
   1712 
   1713 void ARGB4444ToARGBRow_Any_MSA(const uint8_t* src_ptr,
   1714                                uint8_t* dst_ptr,
   1715                                int width);
   1716 
   1717 void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width);
   1718 void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width);
   1719 void ARGBToRGB565Row_SSE2(const uint8_t* src, uint8_t* dst, int width);
   1720 void ARGBToARGB1555Row_SSE2(const uint8_t* src, uint8_t* dst, int width);
   1721 void ARGBToARGB4444Row_SSE2(const uint8_t* src, uint8_t* dst, int width);
   1722 void ABGRToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width);
   1723 void ARGBToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width);
   1724 
   1725 void ARGBToRAWRow_AVX2(const uint8_t* src, uint8_t* dst, int width);
   1726 void ARGBToRGB24Row_AVX2(const uint8_t* src, uint8_t* dst, int width);
   1727 
   1728 void ARGBToRGB24Row_AVX512VBMI(const uint8_t* src, uint8_t* dst, int width);
   1729 
   1730 void ARGBToRGB565DitherRow_C(const uint8_t* src_argb,
   1731                              uint8_t* dst_rgb,
   1732                              const uint32_t dither4,
   1733                              int width);
   1734 void ARGBToRGB565DitherRow_SSE2(const uint8_t* src,
   1735                                 uint8_t* dst,
   1736                                 const uint32_t dither4,
   1737                                 int width);
   1738 void ARGBToRGB565DitherRow_AVX2(const uint8_t* src,
   1739                                 uint8_t* dst,
   1740                                 const uint32_t dither4,
   1741                                 int width);
   1742 
   1743 void ARGBToRGB565Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
   1744 void ARGBToARGB1555Row_AVX2(const uint8_t* src_argb,
   1745                             uint8_t* dst_rgb,
   1746                             int width);
   1747 void ARGBToARGB4444Row_AVX2(const uint8_t* src_argb,
   1748                             uint8_t* dst_rgb,
   1749                             int width);
   1750 void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width);
   1751 void ARGBToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width);
   1752 
   1753 void ARGBToRGB24Row_NEON(const uint8_t* src_argb,
   1754                          uint8_t* dst_rgb24,
   1755                          int width);
   1756 void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width);
   1757 void ARGBToRGB565Row_NEON(const uint8_t* src_argb,
   1758                           uint8_t* dst_rgb565,
   1759                           int width);
   1760 void ARGBToARGB1555Row_NEON(const uint8_t* src_argb,
   1761                             uint8_t* dst_argb1555,
   1762                             int width);
   1763 void ARGBToARGB4444Row_NEON(const uint8_t* src_argb,
   1764                             uint8_t* dst_argb4444,
   1765                             int width);
   1766 void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb,
   1767                                 uint8_t* dst_rgb,
   1768                                 const uint32_t dither4,
   1769                                 int width);
   1770 void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
   1771 void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
   1772 void ARGBToRGB565Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
   1773 void ARGBToARGB1555Row_MSA(const uint8_t* src_argb,
   1774                            uint8_t* dst_rgb,
   1775                            int width);
   1776 void ARGBToARGB4444Row_MSA(const uint8_t* src_argb,
   1777                            uint8_t* dst_rgb,
   1778                            int width);
   1779 void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb,
   1780                                uint8_t* dst_rgb,
   1781                                const uint32_t dither4,
   1782                                int width);
   1783 
   1784 void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
   1785 void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
   1786 void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
   1787 void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
   1788 void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
   1789 void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width);
   1790 void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width);
   1791 void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width);
   1792 
   1793 void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width);
   1794 void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width);
   1795 void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width);
   1796 void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width);
   1797 void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width);
   1798 void J400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
   1799                             uint8_t* dst_ptr,
   1800                             int width);
   1801 void J400ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
   1802                             uint8_t* dst_ptr,
   1803                             int width);
   1804 void J400ToARGBRow_Any_NEON(const uint8_t* src_ptr,
   1805                             uint8_t* dst_ptr,
   1806                             int width);
   1807 void J400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   1808 
   1809 void I444ToARGBRow_C(const uint8_t* src_y,
   1810                      const uint8_t* src_u,
   1811                      const uint8_t* src_v,
   1812                      uint8_t* rgb_buf,
   1813                      const struct YuvConstants* yuvconstants,
   1814                      int width);
   1815 void I422ToARGBRow_C(const uint8_t* src_y,
   1816                      const uint8_t* src_u,
   1817                      const uint8_t* src_v,
   1818                      uint8_t* rgb_buf,
   1819                      const struct YuvConstants* yuvconstants,
   1820                      int width);
   1821 void I422ToAR30Row_C(const uint8_t* src_y,
   1822                      const uint8_t* src_u,
   1823                      const uint8_t* src_v,
   1824                      uint8_t* rgb_buf,
   1825                      const struct YuvConstants* yuvconstants,
   1826                      int width);
   1827 void I210ToAR30Row_C(const uint16_t* src_y,
   1828                      const uint16_t* src_u,
   1829                      const uint16_t* src_v,
   1830                      uint8_t* rgb_buf,
   1831                      const struct YuvConstants* yuvconstants,
   1832                      int width);
   1833 void I210ToARGBRow_C(const uint16_t* src_y,
   1834                      const uint16_t* src_u,
   1835                      const uint16_t* src_v,
   1836                      uint8_t* rgb_buf,
   1837                      const struct YuvConstants* yuvconstants,
   1838                      int width);
   1839 void I422AlphaToARGBRow_C(const uint8_t* src_y,
   1840                           const uint8_t* src_u,
   1841                           const uint8_t* src_v,
   1842                           const uint8_t* src_a,
   1843                           uint8_t* rgb_buf,
   1844                           const struct YuvConstants* yuvconstants,
   1845                           int width);
   1846 void NV12ToARGBRow_C(const uint8_t* src_y,
   1847                      const uint8_t* src_uv,
   1848                      uint8_t* rgb_buf,
   1849                      const struct YuvConstants* yuvconstants,
   1850                      int width);
   1851 void NV12ToRGB565Row_C(const uint8_t* src_y,
   1852                        const uint8_t* src_uv,
   1853                        uint8_t* dst_rgb565,
   1854                        const struct YuvConstants* yuvconstants,
   1855                        int width);
   1856 void NV21ToARGBRow_C(const uint8_t* src_y,
   1857                      const uint8_t* src_vu,
   1858                      uint8_t* rgb_buf,
   1859                      const struct YuvConstants* yuvconstants,
   1860                      int width);
   1861 void NV12ToRGB24Row_C(const uint8_t* src_y,
   1862                       const uint8_t* src_uv,
   1863                       uint8_t* rgb_buf,
   1864                       const struct YuvConstants* yuvconstants,
   1865                       int width);
   1866 void NV21ToRGB24Row_C(const uint8_t* src_y,
   1867                       const uint8_t* src_vu,
   1868                       uint8_t* rgb_buf,
   1869                       const struct YuvConstants* yuvconstants,
   1870                       int width);
   1871 void YUY2ToARGBRow_C(const uint8_t* src_yuy2,
   1872                      uint8_t* rgb_buf,
   1873                      const struct YuvConstants* yuvconstants,
   1874                      int width);
   1875 void UYVYToARGBRow_C(const uint8_t* src_uyvy,
   1876                      uint8_t* rgb_buf,
   1877                      const struct YuvConstants* yuvconstants,
   1878                      int width);
   1879 void I422ToRGBARow_C(const uint8_t* src_y,
   1880                      const uint8_t* src_u,
   1881                      const uint8_t* src_v,
   1882                      uint8_t* rgb_buf,
   1883                      const struct YuvConstants* yuvconstants,
   1884                      int width);
   1885 void I422ToRGB24Row_C(const uint8_t* src_y,
   1886                       const uint8_t* src_u,
   1887                       const uint8_t* src_v,
   1888                       uint8_t* rgb_buf,
   1889                       const struct YuvConstants* yuvconstants,
   1890                       int width);
   1891 void I422ToARGB4444Row_C(const uint8_t* src_y,
   1892                          const uint8_t* src_u,
   1893                          const uint8_t* src_v,
   1894                          uint8_t* dst_argb4444,
   1895                          const struct YuvConstants* yuvconstants,
   1896                          int width);
   1897 void I422ToARGB1555Row_C(const uint8_t* src_y,
   1898                          const uint8_t* src_u,
   1899                          const uint8_t* src_v,
   1900                          uint8_t* dst_argb1555,
   1901                          const struct YuvConstants* yuvconstants,
   1902                          int width);
   1903 void I422ToRGB565Row_C(const uint8_t* src_y,
   1904                        const uint8_t* src_u,
   1905                        const uint8_t* src_v,
   1906                        uint8_t* dst_rgb565,
   1907                        const struct YuvConstants* yuvconstants,
   1908                        int width);
   1909 void I422ToARGBRow_AVX2(const uint8_t* y_buf,
   1910                         const uint8_t* u_buf,
   1911                         const uint8_t* v_buf,
   1912                         uint8_t* dst_argb,
   1913                         const struct YuvConstants* yuvconstants,
   1914                         int width);
   1915 void I422ToRGBARow_AVX2(const uint8_t* y_buf,
   1916                         const uint8_t* u_buf,
   1917                         const uint8_t* v_buf,
   1918                         uint8_t* dst_argb,
   1919                         const struct YuvConstants* yuvconstants,
   1920                         int width);
   1921 void I444ToARGBRow_SSSE3(const uint8_t* y_buf,
   1922                          const uint8_t* u_buf,
   1923                          const uint8_t* v_buf,
   1924                          uint8_t* dst_argb,
   1925                          const struct YuvConstants* yuvconstants,
   1926                          int width);
   1927 void I444ToARGBRow_AVX2(const uint8_t* y_buf,
   1928                         const uint8_t* u_buf,
   1929                         const uint8_t* v_buf,
   1930                         uint8_t* dst_argb,
   1931                         const struct YuvConstants* yuvconstants,
   1932                         int width);
   1933 void I444ToARGBRow_SSSE3(const uint8_t* y_buf,
   1934                          const uint8_t* u_buf,
   1935                          const uint8_t* v_buf,
   1936                          uint8_t* dst_argb,
   1937                          const struct YuvConstants* yuvconstants,
   1938                          int width);
   1939 void I444ToARGBRow_AVX2(const uint8_t* y_buf,
   1940                         const uint8_t* u_buf,
   1941                         const uint8_t* v_buf,
   1942                         uint8_t* dst_argb,
   1943                         const struct YuvConstants* yuvconstants,
   1944                         int width);
   1945 void I422ToARGBRow_SSSE3(const uint8_t* y_buf,
   1946                          const uint8_t* u_buf,
   1947                          const uint8_t* v_buf,
   1948                          uint8_t* dst_argb,
   1949                          const struct YuvConstants* yuvconstants,
   1950                          int width);
   1951 
   1952 void I422ToAR30Row_SSSE3(const uint8_t* y_buf,
   1953                          const uint8_t* u_buf,
   1954                          const uint8_t* v_buf,
   1955                          uint8_t* dst_ar30,
   1956                          const struct YuvConstants* yuvconstants,
   1957                          int width);
   1958 void I210ToAR30Row_SSSE3(const uint16_t* y_buf,
   1959                          const uint16_t* u_buf,
   1960                          const uint16_t* v_buf,
   1961                          uint8_t* dst_ar30,
   1962                          const struct YuvConstants* yuvconstants,
   1963                          int width);
   1964 void I210ToARGBRow_SSSE3(const uint16_t* y_buf,
   1965                          const uint16_t* u_buf,
   1966                          const uint16_t* v_buf,
   1967                          uint8_t* dst_argb,
   1968                          const struct YuvConstants* yuvconstants,
   1969                          int width);
   1970 void I422ToAR30Row_AVX2(const uint8_t* y_buf,
   1971                         const uint8_t* u_buf,
   1972                         const uint8_t* v_buf,
   1973                         uint8_t* dst_ar30,
   1974                         const struct YuvConstants* yuvconstants,
   1975                         int width);
   1976 void I210ToARGBRow_AVX2(const uint16_t* y_buf,
   1977                         const uint16_t* u_buf,
   1978                         const uint16_t* v_buf,
   1979                         uint8_t* dst_argb,
   1980                         const struct YuvConstants* yuvconstants,
   1981                         int width);
   1982 void I210ToAR30Row_AVX2(const uint16_t* y_buf,
   1983                         const uint16_t* u_buf,
   1984                         const uint16_t* v_buf,
   1985                         uint8_t* dst_ar30,
   1986                         const struct YuvConstants* yuvconstants,
   1987                         int width);
   1988 void I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf,
   1989                               const uint8_t* u_buf,
   1990                               const uint8_t* v_buf,
   1991                               const uint8_t* a_buf,
   1992                               uint8_t* dst_argb,
   1993                               const struct YuvConstants* yuvconstants,
   1994                               int width);
   1995 void I422AlphaToARGBRow_AVX2(const uint8_t* y_buf,
   1996                              const uint8_t* u_buf,
   1997                              const uint8_t* v_buf,
   1998                              const uint8_t* a_buf,
   1999                              uint8_t* dst_argb,
   2000                              const struct YuvConstants* yuvconstants,
   2001                              int width);
   2002 void NV12ToARGBRow_SSSE3(const uint8_t* y_buf,
   2003                          const uint8_t* uv_buf,
   2004                          uint8_t* dst_argb,
   2005                          const struct YuvConstants* yuvconstants,
   2006                          int width);
   2007 void NV12ToARGBRow_AVX2(const uint8_t* y_buf,
   2008                         const uint8_t* uv_buf,
   2009                         uint8_t* dst_argb,
   2010                         const struct YuvConstants* yuvconstants,
   2011                         int width);
   2012 void NV12ToRGB24Row_SSSE3(const uint8_t* src_y,
   2013                           const uint8_t* src_uv,
   2014                           uint8_t* dst_rgb24,
   2015                           const struct YuvConstants* yuvconstants,
   2016                           int width);
   2017 void NV21ToRGB24Row_SSSE3(const uint8_t* src_y,
   2018                           const uint8_t* src_vu,
   2019                           uint8_t* dst_rgb24,
   2020                           const struct YuvConstants* yuvconstants,
   2021                           int width);
   2022 void NV12ToRGB565Row_SSSE3(const uint8_t* src_y,
   2023                            const uint8_t* src_uv,
   2024                            uint8_t* dst_rgb565,
   2025                            const struct YuvConstants* yuvconstants,
   2026                            int width);
   2027 void NV12ToRGB24Row_AVX2(const uint8_t* src_y,
   2028                          const uint8_t* src_uv,
   2029                          uint8_t* dst_rgb24,
   2030                          const struct YuvConstants* yuvconstants,
   2031                          int width);
   2032 void NV21ToRGB24Row_AVX2(const uint8_t* src_y,
   2033                          const uint8_t* src_vu,
   2034                          uint8_t* dst_rgb24,
   2035                          const struct YuvConstants* yuvconstants,
   2036                          int width);
   2037 void NV12ToRGB565Row_AVX2(const uint8_t* src_y,
   2038                           const uint8_t* src_uv,
   2039                           uint8_t* dst_rgb565,
   2040                           const struct YuvConstants* yuvconstants,
   2041                           int width);
   2042 void NV21ToARGBRow_SSSE3(const uint8_t* y_buf,
   2043                          const uint8_t* vu_buf,
   2044                          uint8_t* dst_argb,
   2045                          const struct YuvConstants* yuvconstants,
   2046                          int width);
   2047 void NV21ToARGBRow_AVX2(const uint8_t* y_buf,
   2048                         const uint8_t* vu_buf,
   2049                         uint8_t* dst_argb,
   2050                         const struct YuvConstants* yuvconstants,
   2051                         int width);
   2052 void YUY2ToARGBRow_SSSE3(const uint8_t* yuy2_buf,
   2053                          uint8_t* dst_argb,
   2054                          const struct YuvConstants* yuvconstants,
   2055                          int width);
   2056 void UYVYToARGBRow_SSSE3(const uint8_t* uyvy_buf,
   2057                          uint8_t* dst_argb,
   2058                          const struct YuvConstants* yuvconstants,
   2059                          int width);
   2060 void YUY2ToARGBRow_AVX2(const uint8_t* yuy2_buf,
   2061                         uint8_t* dst_argb,
   2062                         const struct YuvConstants* yuvconstants,
   2063                         int width);
   2064 void UYVYToARGBRow_AVX2(const uint8_t* uyvy_buf,
   2065                         uint8_t* dst_argb,
   2066                         const struct YuvConstants* yuvconstants,
   2067                         int width);
   2068 void I422ToRGBARow_SSSE3(const uint8_t* y_buf,
   2069                          const uint8_t* u_buf,
   2070                          const uint8_t* v_buf,
   2071                          uint8_t* dst_rgba,
   2072                          const struct YuvConstants* yuvconstants,
   2073                          int width);
   2074 void I422ToARGB4444Row_SSSE3(const uint8_t* src_y,
   2075                              const uint8_t* src_u,
   2076                              const uint8_t* src_v,
   2077                              uint8_t* dst_argb4444,
   2078                              const struct YuvConstants* yuvconstants,
   2079                              int width);
   2080 void I422ToARGB4444Row_AVX2(const uint8_t* src_y,
   2081                             const uint8_t* src_u,
   2082                             const uint8_t* src_v,
   2083                             uint8_t* dst_argb4444,
   2084                             const struct YuvConstants* yuvconstants,
   2085                             int width);
   2086 void I422ToARGB1555Row_SSSE3(const uint8_t* src_y,
   2087                              const uint8_t* src_u,
   2088                              const uint8_t* src_v,
   2089                              uint8_t* dst_argb1555,
   2090                              const struct YuvConstants* yuvconstants,
   2091                              int width);
   2092 void I422ToARGB1555Row_AVX2(const uint8_t* src_y,
   2093                             const uint8_t* src_u,
   2094                             const uint8_t* src_v,
   2095                             uint8_t* dst_argb1555,
   2096                             const struct YuvConstants* yuvconstants,
   2097                             int width);
   2098 void I422ToRGB565Row_SSSE3(const uint8_t* src_y,
   2099                            const uint8_t* src_u,
   2100                            const uint8_t* src_v,
   2101                            uint8_t* dst_rgb565,
   2102                            const struct YuvConstants* yuvconstants,
   2103                            int width);
   2104 void I422ToRGB565Row_AVX2(const uint8_t* src_y,
   2105                           const uint8_t* src_u,
   2106                           const uint8_t* src_v,
   2107                           uint8_t* dst_rgb565,
   2108                           const struct YuvConstants* yuvconstants,
   2109                           int width);
   2110 void I422ToRGB24Row_SSSE3(const uint8_t* y_buf,
   2111                           const uint8_t* u_buf,
   2112                           const uint8_t* v_buf,
   2113                           uint8_t* dst_rgb24,
   2114                           const struct YuvConstants* yuvconstants,
   2115                           int width);
   2116 void I422ToRGB24Row_AVX2(const uint8_t* src_y,
   2117                          const uint8_t* src_u,
   2118                          const uint8_t* src_v,
   2119                          uint8_t* dst_rgb24,
   2120                          const struct YuvConstants* yuvconstants,
   2121                          int width);
   2122 void I422ToARGBRow_Any_AVX2(const uint8_t* y_buf,
   2123                             const uint8_t* u_buf,
   2124                             const uint8_t* v_buf,
   2125                             uint8_t* dst_ptr,
   2126                             const struct YuvConstants* yuvconstants,
   2127                             int width);
   2128 void I422ToRGBARow_Any_AVX2(const uint8_t* y_buf,
   2129                             const uint8_t* u_buf,
   2130                             const uint8_t* v_buf,
   2131                             uint8_t* dst_ptr,
   2132                             const struct YuvConstants* yuvconstants,
   2133                             int width);
   2134 void I444ToARGBRow_Any_SSSE3(const uint8_t* y_buf,
   2135                              const uint8_t* u_buf,
   2136                              const uint8_t* v_buf,
   2137                              uint8_t* dst_ptr,
   2138                              const struct YuvConstants* yuvconstants,
   2139                              int width);
   2140 void I444ToARGBRow_Any_AVX2(const uint8_t* y_buf,
   2141                             const uint8_t* u_buf,
   2142                             const uint8_t* v_buf,
   2143                             uint8_t* dst_ptr,
   2144                             const struct YuvConstants* yuvconstants,
   2145                             int width);
   2146 void I422ToARGBRow_Any_SSSE3(const uint8_t* y_buf,
   2147                              const uint8_t* u_buf,
   2148                              const uint8_t* v_buf,
   2149                              uint8_t* dst_ptr,
   2150                              const struct YuvConstants* yuvconstants,
   2151                              int width);
   2152 void I422ToAR30Row_Any_SSSE3(const uint8_t* y_buf,
   2153                              const uint8_t* u_buf,
   2154                              const uint8_t* v_buf,
   2155                              uint8_t* dst_ptr,
   2156                              const struct YuvConstants* yuvconstants,
   2157                              int width);
   2158 void I210ToAR30Row_Any_SSSE3(const uint16_t* y_buf,
   2159                              const uint16_t* u_buf,
   2160                              const uint16_t* v_buf,
   2161                              uint8_t* dst_ptr,
   2162                              const struct YuvConstants* yuvconstants,
   2163                              int width);
   2164 void I210ToARGBRow_Any_SSSE3(const uint16_t* y_buf,
   2165                              const uint16_t* u_buf,
   2166                              const uint16_t* v_buf,
   2167                              uint8_t* dst_ptr,
   2168                              const struct YuvConstants* yuvconstants,
   2169                              int width);
   2170 void I422ToAR30Row_Any_AVX2(const uint8_t* y_buf,
   2171                             const uint8_t* u_buf,
   2172                             const uint8_t* v_buf,
   2173                             uint8_t* dst_ptr,
   2174                             const struct YuvConstants* yuvconstants,
   2175                             int width);
   2176 void I210ToARGBRow_Any_AVX2(const uint16_t* y_buf,
   2177                             const uint16_t* u_buf,
   2178                             const uint16_t* v_buf,
   2179                             uint8_t* dst_ptr,
   2180                             const struct YuvConstants* yuvconstants,
   2181                             int width);
   2182 void I210ToAR30Row_Any_AVX2(const uint16_t* y_buf,
   2183                             const uint16_t* u_buf,
   2184                             const uint16_t* v_buf,
   2185                             uint8_t* dst_ptr,
   2186                             const struct YuvConstants* yuvconstants,
   2187                             int width);
   2188 void I422AlphaToARGBRow_Any_SSSE3(const uint8_t* y_buf,
   2189                                   const uint8_t* u_buf,
   2190                                   const uint8_t* v_buf,
   2191                                   const uint8_t* a_buf,
   2192                                   uint8_t* dst_ptr,
   2193                                   const struct YuvConstants* yuvconstants,
   2194                                   int width);
   2195 void I422AlphaToARGBRow_Any_AVX2(const uint8_t* y_buf,
   2196                                  const uint8_t* u_buf,
   2197                                  const uint8_t* v_buf,
   2198                                  const uint8_t* a_buf,
   2199                                  uint8_t* dst_ptr,
   2200                                  const struct YuvConstants* yuvconstants,
   2201                                  int width);
   2202 void NV12ToARGBRow_Any_SSSE3(const uint8_t* y_buf,
   2203                              const uint8_t* uv_buf,
   2204                              uint8_t* dst_ptr,
   2205                              const struct YuvConstants* yuvconstants,
   2206                              int width);
   2207 void NV12ToARGBRow_Any_AVX2(const uint8_t* y_buf,
   2208                             const uint8_t* uv_buf,
   2209                             uint8_t* dst_ptr,
   2210                             const struct YuvConstants* yuvconstants,
   2211                             int width);
   2212 void NV21ToARGBRow_Any_SSSE3(const uint8_t* y_buf,
   2213                              const uint8_t* uv_buf,
   2214                              uint8_t* dst_ptr,
   2215                              const struct YuvConstants* yuvconstants,
   2216                              int width);
   2217 void NV21ToARGBRow_Any_AVX2(const uint8_t* y_buf,
   2218                             const uint8_t* uv_buf,
   2219                             uint8_t* dst_ptr,
   2220                             const struct YuvConstants* yuvconstants,
   2221                             int width);
   2222 void NV12ToRGB24Row_Any_SSSE3(const uint8_t* y_buf,
   2223                               const uint8_t* uv_buf,
   2224                               uint8_t* dst_ptr,
   2225                               const struct YuvConstants* yuvconstants,
   2226                               int width);
   2227 void NV21ToRGB24Row_Any_SSSE3(const uint8_t* y_buf,
   2228                               const uint8_t* uv_buf,
   2229                               uint8_t* dst_ptr,
   2230                               const struct YuvConstants* yuvconstants,
   2231                               int width);
   2232 void NV12ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
   2233                              const uint8_t* uv_buf,
   2234                              uint8_t* dst_ptr,
   2235                              const struct YuvConstants* yuvconstants,
   2236                              int width);
   2237 void NV21ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
   2238                              const uint8_t* uv_buf,
   2239                              uint8_t* dst_ptr,
   2240                              const struct YuvConstants* yuvconstants,
   2241                              int width);
   2242 void NV12ToRGB565Row_Any_SSSE3(const uint8_t* y_buf,
   2243                                const uint8_t* uv_buf,
   2244                                uint8_t* dst_ptr,
   2245                                const struct YuvConstants* yuvconstants,
   2246                                int width);
   2247 void NV12ToRGB565Row_Any_AVX2(const uint8_t* y_buf,
   2248                               const uint8_t* uv_buf,
   2249                               uint8_t* dst_ptr,
   2250                               const struct YuvConstants* yuvconstants,
   2251                               int width);
   2252 void YUY2ToARGBRow_Any_SSSE3(const uint8_t* src_ptr,
   2253                              uint8_t* dst_ptr,
   2254                              const struct YuvConstants* yuvconstants,
   2255                              int width);
   2256 void UYVYToARGBRow_Any_SSSE3(const uint8_t* src_ptr,
   2257                              uint8_t* dst_ptr,
   2258                              const struct YuvConstants* yuvconstants,
   2259                              int width);
   2260 void YUY2ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
   2261                             uint8_t* dst_ptr,
   2262                             const struct YuvConstants* yuvconstants,
   2263                             int width);
   2264 void UYVYToARGBRow_Any_AVX2(const uint8_t* src_ptr,
   2265                             uint8_t* dst_ptr,
   2266                             const struct YuvConstants* yuvconstants,
   2267                             int width);
   2268 void I422ToRGBARow_Any_SSSE3(const uint8_t* y_buf,
   2269                              const uint8_t* u_buf,
   2270                              const uint8_t* v_buf,
   2271                              uint8_t* dst_ptr,
   2272                              const struct YuvConstants* yuvconstants,
   2273                              int width);
   2274 void I422ToARGB4444Row_Any_SSSE3(const uint8_t* y_buf,
   2275                                  const uint8_t* u_buf,
   2276                                  const uint8_t* v_buf,
   2277                                  uint8_t* dst_ptr,
   2278                                  const struct YuvConstants* yuvconstants,
   2279                                  int width);
   2280 void I422ToARGB4444Row_Any_AVX2(const uint8_t* y_buf,
   2281                                 const uint8_t* u_buf,
   2282                                 const uint8_t* v_buf,
   2283                                 uint8_t* dst_ptr,
   2284                                 const struct YuvConstants* yuvconstants,
   2285                                 int width);
   2286 void I422ToARGB1555Row_Any_SSSE3(const uint8_t* y_buf,
   2287                                  const uint8_t* u_buf,
   2288                                  const uint8_t* v_buf,
   2289                                  uint8_t* dst_ptr,
   2290                                  const struct YuvConstants* yuvconstants,
   2291                                  int width);
   2292 void I422ToARGB1555Row_Any_AVX2(const uint8_t* y_buf,
   2293                                 const uint8_t* u_buf,
   2294                                 const uint8_t* v_buf,
   2295                                 uint8_t* dst_ptr,
   2296                                 const struct YuvConstants* yuvconstants,
   2297                                 int width);
   2298 void I422ToRGB565Row_Any_SSSE3(const uint8_t* y_buf,
   2299                                const uint8_t* u_buf,
   2300                                const uint8_t* v_buf,
   2301                                uint8_t* dst_ptr,
   2302                                const struct YuvConstants* yuvconstants,
   2303                                int width);
   2304 void I422ToRGB565Row_Any_AVX2(const uint8_t* y_buf,
   2305                               const uint8_t* u_buf,
   2306                               const uint8_t* v_buf,
   2307                               uint8_t* dst_ptr,
   2308                               const struct YuvConstants* yuvconstants,
   2309                               int width);
   2310 void I422ToRGB24Row_Any_SSSE3(const uint8_t* y_buf,
   2311                               const uint8_t* u_buf,
   2312                               const uint8_t* v_buf,
   2313                               uint8_t* dst_ptr,
   2314                               const struct YuvConstants* yuvconstants,
   2315                               int width);
   2316 void I422ToRGB24Row_Any_AVX2(const uint8_t* y_buf,
   2317                              const uint8_t* u_buf,
   2318                              const uint8_t* v_buf,
   2319                              uint8_t* dst_ptr,
   2320                              const struct YuvConstants* yuvconstants,
   2321                              int width);
   2322 
   2323 void I400ToARGBRow_C(const uint8_t* src_y, uint8_t* rgb_buf, int width);
   2324 void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width);
   2325 void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width);
   2326 void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width);
   2327 void I400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width);
   2328 void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr,
   2329                             uint8_t* dst_ptr,
   2330                             int width);
   2331 void I400ToARGBRow_Any_AVX2(const uint8_t* src_ptr,
   2332                             uint8_t* dst_ptr,
   2333                             int width);
   2334 void I400ToARGBRow_Any_NEON(const uint8_t* src_ptr,
   2335                             uint8_t* dst_ptr,
   2336                             int width);
   2337 void I400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   2338 
   2339 // ARGB preattenuated alpha blend.
   2340 void ARGBBlendRow_SSSE3(const uint8_t* src_argb0,
   2341                         const uint8_t* src_argb1,
   2342                         uint8_t* dst_argb,
   2343                         int width);
   2344 void ARGBBlendRow_NEON(const uint8_t* src_argb0,
   2345                        const uint8_t* src_argb1,
   2346                        uint8_t* dst_argb,
   2347                        int width);
   2348 void ARGBBlendRow_MSA(const uint8_t* src_argb0,
   2349                       const uint8_t* src_argb1,
   2350                       uint8_t* dst_argb,
   2351                       int width);
   2352 void ARGBBlendRow_C(const uint8_t* src_argb0,
   2353                     const uint8_t* src_argb1,
   2354                     uint8_t* dst_argb,
   2355                     int width);
   2356 
   2357 // Unattenuated planar alpha blend.
   2358 void BlendPlaneRow_SSSE3(const uint8_t* src0,
   2359                          const uint8_t* src1,
   2360                          const uint8_t* alpha,
   2361                          uint8_t* dst,
   2362                          int width);
   2363 void BlendPlaneRow_Any_SSSE3(const uint8_t* y_buf,
   2364                              const uint8_t* u_buf,
   2365                              const uint8_t* v_buf,
   2366                              uint8_t* dst_ptr,
   2367                              int width);
   2368 void BlendPlaneRow_AVX2(const uint8_t* src0,
   2369                         const uint8_t* src1,
   2370                         const uint8_t* alpha,
   2371                         uint8_t* dst,
   2372                         int width);
   2373 void BlendPlaneRow_Any_AVX2(const uint8_t* y_buf,
   2374                             const uint8_t* u_buf,
   2375                             const uint8_t* v_buf,
   2376                             uint8_t* dst_ptr,
   2377                             int width);
   2378 void BlendPlaneRow_C(const uint8_t* src0,
   2379                      const uint8_t* src1,
   2380                      const uint8_t* alpha,
   2381                      uint8_t* dst,
   2382                      int width);
   2383 
   2384 // ARGB multiply images. Same API as Blend, but these require
   2385 // pointer and width alignment for SSE2.
   2386 void ARGBMultiplyRow_C(const uint8_t* src_argb0,
   2387                        const uint8_t* src_argb1,
   2388                        uint8_t* dst_argb,
   2389                        int width);
   2390 void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0,
   2391                           const uint8_t* src_argb1,
   2392                           uint8_t* dst_argb,
   2393                           int width);
   2394 void ARGBMultiplyRow_Any_SSE2(const uint8_t* y_buf,
   2395                               const uint8_t* uv_buf,
   2396                               uint8_t* dst_ptr,
   2397                               int width);
   2398 void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0,
   2399                           const uint8_t* src_argb1,
   2400                           uint8_t* dst_argb,
   2401                           int width);
   2402 void ARGBMultiplyRow_Any_AVX2(const uint8_t* y_buf,
   2403                               const uint8_t* uv_buf,
   2404                               uint8_t* dst_ptr,
   2405                               int width);
   2406 void ARGBMultiplyRow_NEON(const uint8_t* src_argb0,
   2407                           const uint8_t* src_argb1,
   2408                           uint8_t* dst_argb,
   2409                           int width);
   2410 void ARGBMultiplyRow_Any_NEON(const uint8_t* y_buf,
   2411                               const uint8_t* uv_buf,
   2412                               uint8_t* dst_ptr,
   2413                               int width);
   2414 void ARGBMultiplyRow_MSA(const uint8_t* src_argb0,
   2415                          const uint8_t* src_argb1,
   2416                          uint8_t* dst_argb,
   2417                          int width);
   2418 void ARGBMultiplyRow_Any_MSA(const uint8_t* y_buf,
   2419                              const uint8_t* uv_buf,
   2420                              uint8_t* dst_ptr,
   2421                              int width);
   2422 
   2423 // ARGB add images.
   2424 void ARGBAddRow_C(const uint8_t* src_argb0,
   2425                   const uint8_t* src_argb1,
   2426                   uint8_t* dst_argb,
   2427                   int width);
   2428 void ARGBAddRow_SSE2(const uint8_t* src_argb0,
   2429                      const uint8_t* src_argb1,
   2430                      uint8_t* dst_argb,
   2431                      int width);
   2432 void ARGBAddRow_Any_SSE2(const uint8_t* y_buf,
   2433                          const uint8_t* uv_buf,
   2434                          uint8_t* dst_ptr,
   2435                          int width);
   2436 void ARGBAddRow_AVX2(const uint8_t* src_argb0,
   2437                      const uint8_t* src_argb1,
   2438                      uint8_t* dst_argb,
   2439                      int width);
   2440 void ARGBAddRow_Any_AVX2(const uint8_t* y_buf,
   2441                          const uint8_t* uv_buf,
   2442                          uint8_t* dst_ptr,
   2443                          int width);
   2444 void ARGBAddRow_NEON(const uint8_t* src_argb0,
   2445                      const uint8_t* src_argb1,
   2446                      uint8_t* dst_argb,
   2447                      int width);
   2448 void ARGBAddRow_Any_NEON(const uint8_t* y_buf,
   2449                          const uint8_t* uv_buf,
   2450                          uint8_t* dst_ptr,
   2451                          int width);
   2452 void ARGBAddRow_MSA(const uint8_t* src_argb0,
   2453                     const uint8_t* src_argb1,
   2454                     uint8_t* dst_argb,
   2455                     int width);
   2456 void ARGBAddRow_Any_MSA(const uint8_t* y_buf,
   2457                         const uint8_t* uv_buf,
   2458                         uint8_t* dst_ptr,
   2459                         int width);
   2460 
   2461 // ARGB subtract images. Same API as Blend, but these require
   2462 // pointer and width alignment for SSE2.
   2463 void ARGBSubtractRow_C(const uint8_t* src_argb0,
   2464                        const uint8_t* src_argb1,
   2465                        uint8_t* dst_argb,
   2466                        int width);
   2467 void ARGBSubtractRow_SSE2(const uint8_t* src_argb0,
   2468                           const uint8_t* src_argb1,
   2469                           uint8_t* dst_argb,
   2470                           int width);
   2471 void ARGBSubtractRow_Any_SSE2(const uint8_t* y_buf,
   2472                               const uint8_t* uv_buf,
   2473                               uint8_t* dst_ptr,
   2474                               int width);
   2475 void ARGBSubtractRow_AVX2(const uint8_t* src_argb0,
   2476                           const uint8_t* src_argb1,
   2477                           uint8_t* dst_argb,
   2478                           int width);
   2479 void ARGBSubtractRow_Any_AVX2(const uint8_t* y_buf,
   2480                               const uint8_t* uv_buf,
   2481                               uint8_t* dst_ptr,
   2482                               int width);
   2483 void ARGBSubtractRow_NEON(const uint8_t* src_argb0,
   2484                           const uint8_t* src_argb1,
   2485                           uint8_t* dst_argb,
   2486                           int width);
   2487 void ARGBSubtractRow_Any_NEON(const uint8_t* y_buf,
   2488                               const uint8_t* uv_buf,
   2489                               uint8_t* dst_ptr,
   2490                               int width);
   2491 void ARGBSubtractRow_MSA(const uint8_t* src_argb0,
   2492                          const uint8_t* src_argb1,
   2493                          uint8_t* dst_argb,
   2494                          int width);
   2495 void ARGBSubtractRow_Any_MSA(const uint8_t* y_buf,
   2496                              const uint8_t* uv_buf,
   2497                              uint8_t* dst_ptr,
   2498                              int width);
   2499 
   2500 void ARGBToRGB24Row_Any_SSSE3(const uint8_t* src_ptr,
   2501                               uint8_t* dst_ptr,
   2502                               int width);
   2503 void ARGBToRAWRow_Any_SSSE3(const uint8_t* src_ptr,
   2504                             uint8_t* dst_ptr,
   2505                             int width);
   2506 void ARGBToRGB565Row_Any_SSE2(const uint8_t* src_ptr,
   2507                               uint8_t* dst_ptr,
   2508                               int width);
   2509 void ARGBToARGB1555Row_Any_SSE2(const uint8_t* src_ptr,
   2510                                 uint8_t* dst_ptr,
   2511                                 int width);
   2512 void ARGBToARGB4444Row_Any_SSE2(const uint8_t* src_ptr,
   2513                                 uint8_t* dst_ptr,
   2514                                 int width);
   2515 void ABGRToAR30Row_Any_SSSE3(const uint8_t* src_ptr,
   2516                              uint8_t* dst_ptr,
   2517                              int width);
   2518 void ARGBToAR30Row_Any_SSSE3(const uint8_t* src_ptr,
   2519                              uint8_t* dst_ptr,
   2520                              int width);
   2521 void ARGBToRAWRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   2522 void ARGBToRGB24Row_Any_AVX2(const uint8_t* src_ptr,
   2523                              uint8_t* dst_ptr,
   2524                              int width);
   2525 void ARGBToRGB24Row_Any_AVX512VBMI(const uint8_t* src_ptr,
   2526                                    uint8_t* dst_ptr,
   2527                                    int width);
   2528 void ARGBToRGB565DitherRow_Any_SSE2(const uint8_t* src_ptr,
   2529                                     uint8_t* dst_ptr,
   2530                                     const uint32_t param,
   2531                                     int width);
   2532 void ARGBToRGB565DitherRow_Any_AVX2(const uint8_t* src_ptr,
   2533                                     uint8_t* dst_ptr,
   2534                                     const uint32_t param,
   2535                                     int width);
   2536 
   2537 void ARGBToRGB565Row_Any_AVX2(const uint8_t* src_ptr,
   2538                               uint8_t* dst_ptr,
   2539                               int width);
   2540 void ARGBToARGB1555Row_Any_AVX2(const uint8_t* src_ptr,
   2541                                 uint8_t* dst_ptr,
   2542                                 int width);
   2543 void ARGBToARGB4444Row_Any_AVX2(const uint8_t* src_ptr,
   2544                                 uint8_t* dst_ptr,
   2545                                 int width);
   2546 void ABGRToAR30Row_Any_AVX2(const uint8_t* src_ptr,
   2547                             uint8_t* dst_ptr,
   2548                             int width);
   2549 void ARGBToAR30Row_Any_AVX2(const uint8_t* src_ptr,
   2550                             uint8_t* dst_ptr,
   2551                             int width);
   2552 
   2553 void ARGBToRGB24Row_Any_NEON(const uint8_t* src_ptr,
   2554                              uint8_t* dst_ptr,
   2555                              int width);
   2556 void ARGBToRAWRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   2557 void ARGBToRGB565Row_Any_NEON(const uint8_t* src_ptr,
   2558                               uint8_t* dst_ptr,
   2559                               int width);
   2560 void ARGBToARGB1555Row_Any_NEON(const uint8_t* src_ptr,
   2561                                 uint8_t* dst_ptr,
   2562                                 int width);
   2563 void ARGBToARGB4444Row_Any_NEON(const uint8_t* src_ptr,
   2564                                 uint8_t* dst_ptr,
   2565                                 int width);
   2566 void ARGBToRGB565DitherRow_Any_NEON(const uint8_t* src_ptr,
   2567                                     uint8_t* dst_ptr,
   2568                                     const uint32_t param,
   2569                                     int width);
   2570 void ARGBToRGB24Row_Any_MSA(const uint8_t* src_ptr,
   2571                             uint8_t* dst_ptr,
   2572                             int width);
   2573 void ARGBToRAWRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   2574 void ARGBToRGB565Row_Any_MSA(const uint8_t* src_ptr,
   2575                              uint8_t* dst_ptr,
   2576                              int width);
   2577 void ARGBToARGB1555Row_Any_MSA(const uint8_t* src_ptr,
   2578                                uint8_t* dst_ptr,
   2579                                int width);
   2580 void ARGBToARGB4444Row_Any_MSA(const uint8_t* src_ptr,
   2581                                uint8_t* dst_ptr,
   2582                                int width);
   2583 void ARGBToRGB565DitherRow_Any_MSA(const uint8_t* src_ptr,
   2584                                    uint8_t* dst_ptr,
   2585                                    const uint32_t param,
   2586                                    int width);
   2587 
   2588 void I444ToARGBRow_Any_NEON(const uint8_t* y_buf,
   2589                             const uint8_t* u_buf,
   2590                             const uint8_t* v_buf,
   2591                             uint8_t* dst_ptr,
   2592                             const struct YuvConstants* yuvconstants,
   2593                             int width);
   2594 void I422ToARGBRow_Any_NEON(const uint8_t* y_buf,
   2595                             const uint8_t* u_buf,
   2596                             const uint8_t* v_buf,
   2597                             uint8_t* dst_ptr,
   2598                             const struct YuvConstants* yuvconstants,
   2599                             int width);
   2600 void I422AlphaToARGBRow_Any_NEON(const uint8_t* y_buf,
   2601                                  const uint8_t* u_buf,
   2602                                  const uint8_t* v_buf,
   2603                                  const uint8_t* a_buf,
   2604                                  uint8_t* dst_ptr,
   2605                                  const struct YuvConstants* yuvconstants,
   2606                                  int width);
   2607 void I422ToRGBARow_Any_NEON(const uint8_t* y_buf,
   2608                             const uint8_t* u_buf,
   2609                             const uint8_t* v_buf,
   2610                             uint8_t* dst_ptr,
   2611                             const struct YuvConstants* yuvconstants,
   2612                             int width);
   2613 void I422ToRGB24Row_Any_NEON(const uint8_t* y_buf,
   2614                              const uint8_t* u_buf,
   2615                              const uint8_t* v_buf,
   2616                              uint8_t* dst_ptr,
   2617                              const struct YuvConstants* yuvconstants,
   2618                              int width);
   2619 void I422ToARGB4444Row_Any_NEON(const uint8_t* y_buf,
   2620                                 const uint8_t* u_buf,
   2621                                 const uint8_t* v_buf,
   2622                                 uint8_t* dst_ptr,
   2623                                 const struct YuvConstants* yuvconstants,
   2624                                 int width);
   2625 void I422ToARGB1555Row_Any_NEON(const uint8_t* y_buf,
   2626                                 const uint8_t* u_buf,
   2627                                 const uint8_t* v_buf,
   2628                                 uint8_t* dst_ptr,
   2629                                 const struct YuvConstants* yuvconstants,
   2630                                 int width);
   2631 void I422ToRGB565Row_Any_NEON(const uint8_t* y_buf,
   2632                               const uint8_t* u_buf,
   2633                               const uint8_t* v_buf,
   2634                               uint8_t* dst_ptr,
   2635                               const struct YuvConstants* yuvconstants,
   2636                               int width);
   2637 void NV12ToARGBRow_Any_NEON(const uint8_t* y_buf,
   2638                             const uint8_t* uv_buf,
   2639                             uint8_t* dst_ptr,
   2640                             const struct YuvConstants* yuvconstants,
   2641                             int width);
   2642 void NV21ToARGBRow_Any_NEON(const uint8_t* y_buf,
   2643                             const uint8_t* uv_buf,
   2644                             uint8_t* dst_ptr,
   2645                             const struct YuvConstants* yuvconstants,
   2646                             int width);
   2647 void NV12ToRGB24Row_Any_NEON(const uint8_t* y_buf,
   2648                              const uint8_t* uv_buf,
   2649                              uint8_t* dst_ptr,
   2650                              const struct YuvConstants* yuvconstants,
   2651                              int width);
   2652 void NV21ToRGB24Row_Any_NEON(const uint8_t* y_buf,
   2653                              const uint8_t* uv_buf,
   2654                              uint8_t* dst_ptr,
   2655                              const struct YuvConstants* yuvconstants,
   2656                              int width);
   2657 void NV12ToRGB565Row_Any_NEON(const uint8_t* y_buf,
   2658                               const uint8_t* uv_buf,
   2659                               uint8_t* dst_ptr,
   2660                               const struct YuvConstants* yuvconstants,
   2661                               int width);
   2662 void YUY2ToARGBRow_Any_NEON(const uint8_t* src_ptr,
   2663                             uint8_t* dst_ptr,
   2664                             const struct YuvConstants* yuvconstants,
   2665                             int width);
   2666 void UYVYToARGBRow_Any_NEON(const uint8_t* src_ptr,
   2667                             uint8_t* dst_ptr,
   2668                             const struct YuvConstants* yuvconstants,
   2669                             int width);
   2670 void I444ToARGBRow_Any_MSA(const uint8_t* y_buf,
   2671                            const uint8_t* u_buf,
   2672                            const uint8_t* v_buf,
   2673                            uint8_t* dst_ptr,
   2674                            const struct YuvConstants* yuvconstants,
   2675                            int width);
   2676 void I422ToARGBRow_Any_MSA(const uint8_t* y_buf,
   2677                            const uint8_t* u_buf,
   2678                            const uint8_t* v_buf,
   2679                            uint8_t* dst_ptr,
   2680                            const struct YuvConstants* yuvconstants,
   2681                            int width);
   2682 void I422ToRGBARow_Any_MSA(const uint8_t* y_buf,
   2683                            const uint8_t* u_buf,
   2684                            const uint8_t* v_buf,
   2685                            uint8_t* dst_ptr,
   2686                            const struct YuvConstants* yuvconstants,
   2687                            int width);
   2688 void I422AlphaToARGBRow_Any_MSA(const uint8_t* y_buf,
   2689                                 const uint8_t* u_buf,
   2690                                 const uint8_t* v_buf,
   2691                                 const uint8_t* a_buf,
   2692                                 uint8_t* dst_ptr,
   2693                                 const struct YuvConstants* yuvconstants,
   2694                                 int width);
   2695 void I422ToRGB24Row_Any_MSA(const uint8_t* y_buf,
   2696                             const uint8_t* u_buf,
   2697                             const uint8_t* v_buf,
   2698                             uint8_t* dst_ptr,
   2699                             const struct YuvConstants* yuvconstants,
   2700                             int width);
   2701 void I422ToRGB565Row_Any_MSA(const uint8_t* y_buf,
   2702                              const uint8_t* u_buf,
   2703                              const uint8_t* v_buf,
   2704                              uint8_t* dst_ptr,
   2705                              const struct YuvConstants* yuvconstants,
   2706                              int width);
   2707 void I422ToARGB4444Row_Any_MSA(const uint8_t* y_buf,
   2708                                const uint8_t* u_buf,
   2709                                const uint8_t* v_buf,
   2710                                uint8_t* dst_ptr,
   2711                                const struct YuvConstants* yuvconstants,
   2712                                int width);
   2713 void I422ToARGB1555Row_Any_MSA(const uint8_t* y_buf,
   2714                                const uint8_t* u_buf,
   2715                                const uint8_t* v_buf,
   2716                                uint8_t* dst_ptr,
   2717                                const struct YuvConstants* yuvconstants,
   2718                                int width);
   2719 void NV12ToARGBRow_Any_MSA(const uint8_t* y_buf,
   2720                            const uint8_t* uv_buf,
   2721                            uint8_t* dst_ptr,
   2722                            const struct YuvConstants* yuvconstants,
   2723                            int width);
   2724 void NV12ToRGB565Row_Any_MSA(const uint8_t* y_buf,
   2725                              const uint8_t* uv_buf,
   2726                              uint8_t* dst_ptr,
   2727                              const struct YuvConstants* yuvconstants,
   2728                              int width);
   2729 void NV21ToARGBRow_Any_MSA(const uint8_t* y_buf,
   2730                            const uint8_t* uv_buf,
   2731                            uint8_t* dst_ptr,
   2732                            const struct YuvConstants* yuvconstants,
   2733                            int width);
   2734 void YUY2ToARGBRow_Any_MSA(const uint8_t* src_ptr,
   2735                            uint8_t* dst_ptr,
   2736                            const struct YuvConstants* yuvconstants,
   2737                            int width);
   2738 void UYVYToARGBRow_Any_MSA(const uint8_t* src_ptr,
   2739                            uint8_t* dst_ptr,
   2740                            const struct YuvConstants* yuvconstants,
   2741                            int width);
   2742 
   2743 void YUY2ToYRow_AVX2(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
   2744 void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2,
   2745                       int stride_yuy2,
   2746                       uint8_t* dst_u,
   2747                       uint8_t* dst_v,
   2748                       int width);
   2749 void YUY2ToUV422Row_AVX2(const uint8_t* src_yuy2,
   2750                          uint8_t* dst_u,
   2751                          uint8_t* dst_v,
   2752                          int width);
   2753 void YUY2ToYRow_SSE2(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
   2754 void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2,
   2755                       int stride_yuy2,
   2756                       uint8_t* dst_u,
   2757                       uint8_t* dst_v,
   2758                       int width);
   2759 void YUY2ToUV422Row_SSE2(const uint8_t* src_yuy2,
   2760                          uint8_t* dst_u,
   2761                          uint8_t* dst_v,
   2762                          int width);
   2763 void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
   2764 void YUY2ToUVRow_NEON(const uint8_t* src_yuy2,
   2765                       int stride_yuy2,
   2766                       uint8_t* dst_u,
   2767                       uint8_t* dst_v,
   2768                       int width);
   2769 void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2,
   2770                          uint8_t* dst_u,
   2771                          uint8_t* dst_v,
   2772                          int width);
   2773 void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
   2774 void YUY2ToUVRow_MSA(const uint8_t* src_yuy2,
   2775                      int src_stride_yuy2,
   2776                      uint8_t* dst_u,
   2777                      uint8_t* dst_v,
   2778                      int width);
   2779 void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2,
   2780                         uint8_t* dst_u,
   2781                         uint8_t* dst_v,
   2782                         int width);
   2783 void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width);
   2784 void YUY2ToUVRow_C(const uint8_t* src_yuy2,
   2785                    int src_stride_yuy2,
   2786                    uint8_t* dst_u,
   2787                    uint8_t* dst_v,
   2788                    int width);
   2789 void YUY2ToUV422Row_C(const uint8_t* src_yuy2,
   2790                       uint8_t* dst_u,
   2791                       uint8_t* dst_v,
   2792                       int width);
   2793 void YUY2ToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   2794 void YUY2ToUVRow_Any_AVX2(const uint8_t* src_ptr,
   2795                           int src_stride_ptr,
   2796                           uint8_t* dst_u,
   2797                           uint8_t* dst_v,
   2798                           int width);
   2799 void YUY2ToUV422Row_Any_AVX2(const uint8_t* src_ptr,
   2800                              uint8_t* dst_u,
   2801                              uint8_t* dst_v,
   2802                              int width);
   2803 void YUY2ToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   2804 void YUY2ToUVRow_Any_SSE2(const uint8_t* src_ptr,
   2805                           int src_stride_ptr,
   2806                           uint8_t* dst_u,
   2807                           uint8_t* dst_v,
   2808                           int width);
   2809 void YUY2ToUV422Row_Any_SSE2(const uint8_t* src_ptr,
   2810                              uint8_t* dst_u,
   2811                              uint8_t* dst_v,
   2812                              int width);
   2813 void YUY2ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   2814 void YUY2ToUVRow_Any_NEON(const uint8_t* src_ptr,
   2815                           int src_stride_ptr,
   2816                           uint8_t* dst_u,
   2817                           uint8_t* dst_v,
   2818                           int width);
   2819 void YUY2ToUV422Row_Any_NEON(const uint8_t* src_ptr,
   2820                              uint8_t* dst_u,
   2821                              uint8_t* dst_v,
   2822                              int width);
   2823 void YUY2ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   2824 void YUY2ToUVRow_Any_MSA(const uint8_t* src_ptr,
   2825                          int src_stride_ptr,
   2826                          uint8_t* dst_u,
   2827                          uint8_t* dst_v,
   2828                          int width);
   2829 void YUY2ToUV422Row_Any_MSA(const uint8_t* src_ptr,
   2830                             uint8_t* dst_u,
   2831                             uint8_t* dst_v,
   2832                             int width);
   2833 void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
   2834 void UYVYToUVRow_AVX2(const uint8_t* src_uyvy,
   2835                       int stride_uyvy,
   2836                       uint8_t* dst_u,
   2837                       uint8_t* dst_v,
   2838                       int width);
   2839 void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy,
   2840                          uint8_t* dst_u,
   2841                          uint8_t* dst_v,
   2842                          int width);
   2843 void UYVYToYRow_SSE2(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
   2844 void UYVYToUVRow_SSE2(const uint8_t* src_uyvy,
   2845                       int stride_uyvy,
   2846                       uint8_t* dst_u,
   2847                       uint8_t* dst_v,
   2848                       int width);
   2849 void UYVYToUV422Row_SSE2(const uint8_t* src_uyvy,
   2850                          uint8_t* dst_u,
   2851                          uint8_t* dst_v,
   2852                          int width);
   2853 void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
   2854 void UYVYToUVRow_AVX2(const uint8_t* src_uyvy,
   2855                       int stride_uyvy,
   2856                       uint8_t* dst_u,
   2857                       uint8_t* dst_v,
   2858                       int width);
   2859 void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy,
   2860                          uint8_t* dst_u,
   2861                          uint8_t* dst_v,
   2862                          int width);
   2863 void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
   2864 void UYVYToUVRow_NEON(const uint8_t* src_uyvy,
   2865                       int stride_uyvy,
   2866                       uint8_t* dst_u,
   2867                       uint8_t* dst_v,
   2868                       int width);
   2869 void UYVYToUV422Row_NEON(const uint8_t* src_uyvy,
   2870                          uint8_t* dst_u,
   2871                          uint8_t* dst_v,
   2872                          int width);
   2873 void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
   2874 void UYVYToUVRow_MSA(const uint8_t* src_uyvy,
   2875                      int src_stride_uyvy,
   2876                      uint8_t* dst_u,
   2877                      uint8_t* dst_v,
   2878                      int width);
   2879 void UYVYToUV422Row_MSA(const uint8_t* src_uyvy,
   2880                         uint8_t* dst_u,
   2881                         uint8_t* dst_v,
   2882                         int width);
   2883 
   2884 void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width);
   2885 void UYVYToUVRow_C(const uint8_t* src_uyvy,
   2886                    int src_stride_uyvy,
   2887                    uint8_t* dst_u,
   2888                    uint8_t* dst_v,
   2889                    int width);
   2890 void UYVYToUV422Row_C(const uint8_t* src_uyvy,
   2891                       uint8_t* dst_u,
   2892                       uint8_t* dst_v,
   2893                       int width);
   2894 void UYVYToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   2895 void UYVYToUVRow_Any_AVX2(const uint8_t* src_ptr,
   2896                           int src_stride_ptr,
   2897                           uint8_t* dst_u,
   2898                           uint8_t* dst_v,
   2899                           int width);
   2900 void UYVYToUV422Row_Any_AVX2(const uint8_t* src_ptr,
   2901                              uint8_t* dst_u,
   2902                              uint8_t* dst_v,
   2903                              int width);
   2904 void UYVYToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   2905 void UYVYToUVRow_Any_SSE2(const uint8_t* src_ptr,
   2906                           int src_stride_ptr,
   2907                           uint8_t* dst_u,
   2908                           uint8_t* dst_v,
   2909                           int width);
   2910 void UYVYToUV422Row_Any_SSE2(const uint8_t* src_ptr,
   2911                              uint8_t* dst_u,
   2912                              uint8_t* dst_v,
   2913                              int width);
   2914 void UYVYToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   2915 void UYVYToUVRow_Any_NEON(const uint8_t* src_ptr,
   2916                           int src_stride_ptr,
   2917                           uint8_t* dst_u,
   2918                           uint8_t* dst_v,
   2919                           int width);
   2920 void UYVYToUV422Row_Any_NEON(const uint8_t* src_ptr,
   2921                              uint8_t* dst_u,
   2922                              uint8_t* dst_v,
   2923                              int width);
   2924 void UYVYToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width);
   2925 void UYVYToUVRow_Any_MSA(const uint8_t* src_ptr,
   2926                          int src_stride_ptr,
   2927                          uint8_t* dst_u,
   2928                          uint8_t* dst_v,
   2929                          int width);
   2930 void UYVYToUV422Row_Any_MSA(const uint8_t* src_ptr,
   2931                             uint8_t* dst_u,
   2932                             uint8_t* dst_v,
   2933                             int width);
   2934 
   2935 void I422ToYUY2Row_C(const uint8_t* src_y,
   2936                      const uint8_t* src_u,
   2937                      const uint8_t* src_v,
   2938                      uint8_t* dst_frame,
   2939                      int width);
   2940 void I422ToUYVYRow_C(const uint8_t* src_y,
   2941                      const uint8_t* src_u,
   2942                      const uint8_t* src_v,
   2943                      uint8_t* dst_frame,
   2944                      int width);
   2945 void I422ToYUY2Row_SSE2(const uint8_t* src_y,
   2946                         const uint8_t* src_u,
   2947                         const uint8_t* src_v,
   2948                         uint8_t* dst_yuy2,
   2949                         int width);
   2950 void I422ToUYVYRow_SSE2(const uint8_t* src_y,
   2951                         const uint8_t* src_u,
   2952                         const uint8_t* src_v,
   2953                         uint8_t* dst_uyvy,
   2954                         int width);
   2955 void I422ToYUY2Row_Any_SSE2(const uint8_t* y_buf,
   2956                             const uint8_t* u_buf,
   2957                             const uint8_t* v_buf,
   2958                             uint8_t* dst_ptr,
   2959                             int width);
   2960 void I422ToUYVYRow_Any_SSE2(const uint8_t* y_buf,
   2961                             const uint8_t* u_buf,
   2962                             const uint8_t* v_buf,
   2963                             uint8_t* dst_ptr,
   2964                             int width);
   2965 void I422ToYUY2Row_AVX2(const uint8_t* src_y,
   2966                         const uint8_t* src_u,
   2967                         const uint8_t* src_v,
   2968                         uint8_t* dst_yuy2,
   2969                         int width);
   2970 void I422ToUYVYRow_AVX2(const uint8_t* src_y,
   2971                         const uint8_t* src_u,
   2972                         const uint8_t* src_v,
   2973                         uint8_t* dst_uyvy,
   2974                         int width);
   2975 void I422ToYUY2Row_Any_AVX2(const uint8_t* y_buf,
   2976                             const uint8_t* u_buf,
   2977                             const uint8_t* v_buf,
   2978                             uint8_t* dst_ptr,
   2979                             int width);
   2980 void I422ToUYVYRow_Any_AVX2(const uint8_t* y_buf,
   2981                             const uint8_t* u_buf,
   2982                             const uint8_t* v_buf,
   2983                             uint8_t* dst_ptr,
   2984                             int width);
   2985 void I422ToYUY2Row_NEON(const uint8_t* src_y,
   2986                         const uint8_t* src_u,
   2987                         const uint8_t* src_v,
   2988                         uint8_t* dst_yuy2,
   2989                         int width);
   2990 void I422ToUYVYRow_NEON(const uint8_t* src_y,
   2991                         const uint8_t* src_u,
   2992                         const uint8_t* src_v,
   2993                         uint8_t* dst_uyvy,
   2994                         int width);
   2995 void I422ToYUY2Row_Any_NEON(const uint8_t* y_buf,
   2996                             const uint8_t* u_buf,
   2997                             const uint8_t* v_buf,
   2998                             uint8_t* dst_ptr,
   2999                             int width);
   3000 void I422ToUYVYRow_Any_NEON(const uint8_t* y_buf,
   3001                             const uint8_t* u_buf,
   3002                             const uint8_t* v_buf,
   3003                             uint8_t* dst_ptr,
   3004                             int width);
   3005 void I422ToYUY2Row_MSA(const uint8_t* src_y,
   3006                        const uint8_t* src_u,
   3007                        const uint8_t* src_v,
   3008                        uint8_t* dst_yuy2,
   3009                        int width);
   3010 void I422ToUYVYRow_MSA(const uint8_t* src_y,
   3011                        const uint8_t* src_u,
   3012                        const uint8_t* src_v,
   3013                        uint8_t* dst_uyvy,
   3014                        int width);
   3015 void I422ToYUY2Row_Any_MSA(const uint8_t* y_buf,
   3016                            const uint8_t* u_buf,
   3017                            const uint8_t* v_buf,
   3018                            uint8_t* dst_ptr,
   3019                            int width);
   3020 void I422ToUYVYRow_Any_MSA(const uint8_t* y_buf,
   3021                            const uint8_t* u_buf,
   3022                            const uint8_t* v_buf,
   3023                            uint8_t* dst_ptr,
   3024                            int width);
   3025 
   3026 // Effects related row functions.
   3027 void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width);
   3028 void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb,
   3029                             uint8_t* dst_argb,
   3030                             int width);
   3031 void ARGBAttenuateRow_AVX2(const uint8_t* src_argb,
   3032                            uint8_t* dst_argb,
   3033                            int width);
   3034 void ARGBAttenuateRow_NEON(const uint8_t* src_argb,
   3035                            uint8_t* dst_argb,
   3036                            int width);
   3037 void ARGBAttenuateRow_MSA(const uint8_t* src_argb,
   3038                           uint8_t* dst_argb,
   3039                           int width);
   3040 void ARGBAttenuateRow_Any_SSSE3(const uint8_t* src_ptr,
   3041                                 uint8_t* dst_ptr,
   3042                                 int width);
   3043 void ARGBAttenuateRow_Any_AVX2(const uint8_t* src_ptr,
   3044                                uint8_t* dst_ptr,
   3045                                int width);
   3046 void ARGBAttenuateRow_Any_NEON(const uint8_t* src_ptr,
   3047                                uint8_t* dst_ptr,
   3048                                int width);
   3049 void ARGBAttenuateRow_Any_MSA(const uint8_t* src_ptr,
   3050                               uint8_t* dst_ptr,
   3051                               int width);
   3052 
   3053 // Inverse table for unattenuate, shared by C and SSE2.
   3054 extern const uint32_t fixed_invtbl8[256];
   3055 void ARGBUnattenuateRow_C(const uint8_t* src_argb,
   3056                           uint8_t* dst_argb,
   3057                           int width);
   3058 void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb,
   3059                              uint8_t* dst_argb,
   3060                              int width);
   3061 void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb,
   3062                              uint8_t* dst_argb,
   3063                              int width);
   3064 void ARGBUnattenuateRow_Any_SSE2(const uint8_t* src_ptr,
   3065                                  uint8_t* dst_ptr,
   3066                                  int width);
   3067 void ARGBUnattenuateRow_Any_AVX2(const uint8_t* src_ptr,
   3068                                  uint8_t* dst_ptr,
   3069                                  int width);
   3070 
   3071 void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width);
   3072 void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width);
   3073 void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width);
   3074 void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width);
   3075 
   3076 void ARGBSepiaRow_C(uint8_t* dst_argb, int width);
   3077 void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width);
   3078 void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width);
   3079 void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width);
   3080 
   3081 void ARGBColorMatrixRow_C(const uint8_t* src_argb,
   3082                           uint8_t* dst_argb,
   3083                           const int8_t* matrix_argb,
   3084                           int width);
   3085 void ARGBColorMatrixRow_SSSE3(const uint8_t* src_argb,
   3086                               uint8_t* dst_argb,
   3087                               const int8_t* matrix_argb,
   3088                               int width);
   3089 void ARGBColorMatrixRow_NEON(const uint8_t* src_argb,
   3090                              uint8_t* dst_argb,
   3091                              const int8_t* matrix_argb,
   3092                              int width);
   3093 void ARGBColorMatrixRow_MSA(const uint8_t* src_argb,
   3094                             uint8_t* dst_argb,
   3095                             const int8_t* matrix_argb,
   3096                             int width);
   3097 
   3098 void ARGBColorTableRow_C(uint8_t* dst_argb,
   3099                          const uint8_t* table_argb,
   3100                          int width);
   3101 void ARGBColorTableRow_X86(uint8_t* dst_argb,
   3102                            const uint8_t* table_argb,
   3103                            int width);
   3104 
   3105 void RGBColorTableRow_C(uint8_t* dst_argb,
   3106                         const uint8_t* table_argb,
   3107                         int width);
   3108 void RGBColorTableRow_X86(uint8_t* dst_argb,
   3109                           const uint8_t* table_argb,
   3110                           int width);
   3111 
   3112 void ARGBQuantizeRow_C(uint8_t* dst_argb,
   3113                        int scale,
   3114                        int interval_size,
   3115                        int interval_offset,
   3116                        int width);
   3117 void ARGBQuantizeRow_SSE2(uint8_t* dst_argb,
   3118                           int scale,
   3119                           int interval_size,
   3120                           int interval_offset,
   3121                           int width);
   3122 void ARGBQuantizeRow_NEON(uint8_t* dst_argb,
   3123                           int scale,
   3124                           int interval_size,
   3125                           int interval_offset,
   3126                           int width);
   3127 void ARGBQuantizeRow_MSA(uint8_t* dst_argb,
   3128                          int scale,
   3129                          int interval_size,
   3130                          int interval_offset,
   3131                          int width);
   3132 
   3133 void ARGBShadeRow_C(const uint8_t* src_argb,
   3134                     uint8_t* dst_argb,
   3135                     int width,
   3136                     uint32_t value);
   3137 void ARGBShadeRow_SSE2(const uint8_t* src_argb,
   3138                        uint8_t* dst_argb,
   3139                        int width,
   3140                        uint32_t value);
   3141 void ARGBShadeRow_NEON(const uint8_t* src_argb,
   3142                        uint8_t* dst_argb,
   3143                        int width,
   3144                        uint32_t value);
   3145 void ARGBShadeRow_MSA(const uint8_t* src_argb,
   3146                       uint8_t* dst_argb,
   3147                       int width,
   3148                       uint32_t value);
   3149 
   3150 // Used for blur.
   3151 void CumulativeSumToAverageRow_SSE2(const int32_t* topleft,
   3152                                     const int32_t* botleft,
   3153                                     int width,
   3154                                     int area,
   3155                                     uint8_t* dst,
   3156                                     int count);
   3157 void ComputeCumulativeSumRow_SSE2(const uint8_t* row,
   3158                                   int32_t* cumsum,
   3159                                   const int32_t* previous_cumsum,
   3160                                   int width);
   3161 
   3162 void CumulativeSumToAverageRow_C(const int32_t* tl,
   3163                                  const int32_t* bl,
   3164                                  int w,
   3165                                  int area,
   3166                                  uint8_t* dst,
   3167                                  int count);
   3168 void ComputeCumulativeSumRow_C(const uint8_t* row,
   3169                                int32_t* cumsum,
   3170                                const int32_t* previous_cumsum,
   3171                                int width);
   3172 
   3173 LIBYUV_API
   3174 void ARGBAffineRow_C(const uint8_t* src_argb,
   3175                      int src_argb_stride,
   3176                      uint8_t* dst_argb,
   3177                      const float* uv_dudv,
   3178                      int width);
   3179 LIBYUV_API
   3180 void ARGBAffineRow_SSE2(const uint8_t* src_argb,
   3181                         int src_argb_stride,
   3182                         uint8_t* dst_argb,
   3183                         const float* src_dudv,
   3184                         int width);
   3185 
   3186 // Used for I420Scale, ARGBScale, and ARGBInterpolate.
   3187 void InterpolateRow_C(uint8_t* dst_ptr,
   3188                       const uint8_t* src_ptr,
   3189                       ptrdiff_t src_stride,
   3190                       int width,
   3191                       int source_y_fraction);
   3192 void InterpolateRow_SSSE3(uint8_t* dst_ptr,
   3193                           const uint8_t* src_ptr,
   3194                           ptrdiff_t src_stride,
   3195                           int dst_width,
   3196                           int source_y_fraction);
   3197 void InterpolateRow_AVX2(uint8_t* dst_ptr,
   3198                          const uint8_t* src_ptr,
   3199                          ptrdiff_t src_stride,
   3200                          int dst_width,
   3201                          int source_y_fraction);
   3202 void InterpolateRow_NEON(uint8_t* dst_ptr,
   3203                          const uint8_t* src_ptr,
   3204                          ptrdiff_t src_stride,
   3205                          int dst_width,
   3206                          int source_y_fraction);
   3207 void InterpolateRow_MSA(uint8_t* dst_ptr,
   3208                         const uint8_t* src_ptr,
   3209                         ptrdiff_t src_stride,
   3210                         int width,
   3211                         int source_y_fraction);
   3212 void InterpolateRow_Any_NEON(uint8_t* dst_ptr,
   3213                              const uint8_t* src_ptr,
   3214                              ptrdiff_t src_stride_ptr,
   3215                              int width,
   3216                              int source_y_fraction);
   3217 void InterpolateRow_Any_SSSE3(uint8_t* dst_ptr,
   3218                               const uint8_t* src_ptr,
   3219                               ptrdiff_t src_stride_ptr,
   3220                               int width,
   3221                               int source_y_fraction);
   3222 void InterpolateRow_Any_AVX2(uint8_t* dst_ptr,
   3223                              const uint8_t* src_ptr,
   3224                              ptrdiff_t src_stride_ptr,
   3225                              int width,
   3226                              int source_y_fraction);
   3227 void InterpolateRow_Any_MSA(uint8_t* dst_ptr,
   3228                             const uint8_t* src_ptr,
   3229                             ptrdiff_t src_stride_ptr,
   3230                             int width,
   3231                             int source_y_fraction);
   3232 
   3233 void InterpolateRow_16_C(uint16_t* dst_ptr,
   3234                          const uint16_t* src_ptr,
   3235                          ptrdiff_t src_stride,
   3236                          int width,
   3237                          int source_y_fraction);
   3238 
   3239 // Sobel images.
   3240 void SobelXRow_C(const uint8_t* src_y0,
   3241                  const uint8_t* src_y1,
   3242                  const uint8_t* src_y2,
   3243                  uint8_t* dst_sobelx,
   3244                  int width);
   3245 void SobelXRow_SSE2(const uint8_t* src_y0,
   3246                     const uint8_t* src_y1,
   3247                     const uint8_t* src_y2,
   3248                     uint8_t* dst_sobelx,
   3249                     int width);
   3250 void SobelXRow_NEON(const uint8_t* src_y0,
   3251                     const uint8_t* src_y1,
   3252                     const uint8_t* src_y2,
   3253                     uint8_t* dst_sobelx,
   3254                     int width);
   3255 void SobelXRow_MSA(const uint8_t* src_y0,
   3256                    const uint8_t* src_y1,
   3257                    const uint8_t* src_y2,
   3258                    uint8_t* dst_sobelx,
   3259                    int width);
   3260 void SobelYRow_C(const uint8_t* src_y0,
   3261                  const uint8_t* src_y1,
   3262                  uint8_t* dst_sobely,
   3263                  int width);
   3264 void SobelYRow_SSE2(const uint8_t* src_y0,
   3265                     const uint8_t* src_y1,
   3266                     uint8_t* dst_sobely,
   3267                     int width);
   3268 void SobelYRow_NEON(const uint8_t* src_y0,
   3269                     const uint8_t* src_y1,
   3270                     uint8_t* dst_sobely,
   3271                     int width);
   3272 void SobelYRow_MSA(const uint8_t* src_y0,
   3273                    const uint8_t* src_y1,
   3274                    uint8_t* dst_sobely,
   3275                    int width);
   3276 void SobelRow_C(const uint8_t* src_sobelx,
   3277                 const uint8_t* src_sobely,
   3278                 uint8_t* dst_argb,
   3279                 int width);
   3280 void SobelRow_SSE2(const uint8_t* src_sobelx,
   3281                    const uint8_t* src_sobely,
   3282                    uint8_t* dst_argb,
   3283                    int width);
   3284 void SobelRow_NEON(const uint8_t* src_sobelx,
   3285                    const uint8_t* src_sobely,
   3286                    uint8_t* dst_argb,
   3287                    int width);
   3288 void SobelRow_MSA(const uint8_t* src_sobelx,
   3289                   const uint8_t* src_sobely,
   3290                   uint8_t* dst_argb,
   3291                   int width);
   3292 void SobelToPlaneRow_C(const uint8_t* src_sobelx,
   3293                        const uint8_t* src_sobely,
   3294                        uint8_t* dst_y,
   3295                        int width);
   3296 void SobelToPlaneRow_SSE2(const uint8_t* src_sobelx,
   3297                           const uint8_t* src_sobely,
   3298                           uint8_t* dst_y,
   3299                           int width);
   3300 void SobelToPlaneRow_NEON(const uint8_t* src_sobelx,
   3301                           const uint8_t* src_sobely,
   3302                           uint8_t* dst_y,
   3303                           int width);
   3304 void SobelToPlaneRow_MSA(const uint8_t* src_sobelx,
   3305                          const uint8_t* src_sobely,
   3306                          uint8_t* dst_y,
   3307                          int width);
   3308 void SobelXYRow_C(const uint8_t* src_sobelx,
   3309                   const uint8_t* src_sobely,
   3310                   uint8_t* dst_argb,
   3311                   int width);
   3312 void SobelXYRow_SSE2(const uint8_t* src_sobelx,
   3313                      const uint8_t* src_sobely,
   3314                      uint8_t* dst_argb,
   3315                      int width);
   3316 void SobelXYRow_NEON(const uint8_t* src_sobelx,
   3317                      const uint8_t* src_sobely,
   3318                      uint8_t* dst_argb,
   3319                      int width);
   3320 void SobelXYRow_MSA(const uint8_t* src_sobelx,
   3321                     const uint8_t* src_sobely,
   3322                     uint8_t* dst_argb,
   3323                     int width);
   3324 void SobelRow_Any_SSE2(const uint8_t* y_buf,
   3325                        const uint8_t* uv_buf,
   3326                        uint8_t* dst_ptr,
   3327                        int width);
   3328 void SobelRow_Any_NEON(const uint8_t* y_buf,
   3329                        const uint8_t* uv_buf,
   3330                        uint8_t* dst_ptr,
   3331                        int width);
   3332 void SobelRow_Any_MSA(const uint8_t* y_buf,
   3333                       const uint8_t* uv_buf,
   3334                       uint8_t* dst_ptr,
   3335                       int width);
   3336 void SobelToPlaneRow_Any_SSE2(const uint8_t* y_buf,
   3337                               const uint8_t* uv_buf,
   3338                               uint8_t* dst_ptr,
   3339                               int width);
   3340 void SobelToPlaneRow_Any_NEON(const uint8_t* y_buf,
   3341                               const uint8_t* uv_buf,
   3342                               uint8_t* dst_ptr,
   3343                               int width);
   3344 void SobelToPlaneRow_Any_MSA(const uint8_t* y_buf,
   3345                              const uint8_t* uv_buf,
   3346                              uint8_t* dst_ptr,
   3347                              int width);
   3348 void SobelXYRow_Any_SSE2(const uint8_t* y_buf,
   3349                          const uint8_t* uv_buf,
   3350                          uint8_t* dst_ptr,
   3351                          int width);
   3352 void SobelXYRow_Any_NEON(const uint8_t* y_buf,
   3353                          const uint8_t* uv_buf,
   3354                          uint8_t* dst_ptr,
   3355                          int width);
   3356 void SobelXYRow_Any_MSA(const uint8_t* y_buf,
   3357                         const uint8_t* uv_buf,
   3358                         uint8_t* dst_ptr,
   3359                         int width);
   3360 
   3361 void ARGBPolynomialRow_C(const uint8_t* src_argb,
   3362                          uint8_t* dst_argb,
   3363                          const float* poly,
   3364                          int width);
   3365 void ARGBPolynomialRow_SSE2(const uint8_t* src_argb,
   3366                             uint8_t* dst_argb,
   3367                             const float* poly,
   3368                             int width);
   3369 void ARGBPolynomialRow_AVX2(const uint8_t* src_argb,
   3370                             uint8_t* dst_argb,
   3371                             const float* poly,
   3372                             int width);
   3373 
   3374 // Scale and convert to half float.
   3375 void HalfFloatRow_C(const uint16_t* src, uint16_t* dst, float scale, int width);
   3376 void HalfFloatRow_SSE2(const uint16_t* src,
   3377                        uint16_t* dst,
   3378                        float scale,
   3379                        int width);
   3380 void HalfFloatRow_Any_SSE2(const uint16_t* src_ptr,
   3381                            uint16_t* dst_ptr,
   3382                            float param,
   3383                            int width);
   3384 void HalfFloatRow_AVX2(const uint16_t* src,
   3385                        uint16_t* dst,
   3386                        float scale,
   3387                        int width);
   3388 void HalfFloatRow_Any_AVX2(const uint16_t* src_ptr,
   3389                            uint16_t* dst_ptr,
   3390                            float param,
   3391                            int width);
   3392 void HalfFloatRow_F16C(const uint16_t* src,
   3393                        uint16_t* dst,
   3394                        float scale,
   3395                        int width);
   3396 void HalfFloatRow_Any_F16C(const uint16_t* src,
   3397                            uint16_t* dst,
   3398                            float scale,
   3399                            int width);
   3400 void HalfFloat1Row_F16C(const uint16_t* src,
   3401                         uint16_t* dst,
   3402                         float scale,
   3403                         int width);
   3404 void HalfFloat1Row_Any_F16C(const uint16_t* src,
   3405                             uint16_t* dst,
   3406                             float scale,
   3407                             int width);
   3408 void HalfFloatRow_NEON(const uint16_t* src,
   3409                        uint16_t* dst,
   3410                        float scale,
   3411                        int width);
   3412 void HalfFloatRow_Any_NEON(const uint16_t* src_ptr,
   3413                            uint16_t* dst_ptr,
   3414                            float param,
   3415                            int width);
   3416 void HalfFloat1Row_NEON(const uint16_t* src,
   3417                         uint16_t* dst,
   3418                         float scale,
   3419                         int width);
   3420 void HalfFloat1Row_Any_NEON(const uint16_t* src_ptr,
   3421                             uint16_t* dst_ptr,
   3422                             float param,
   3423                             int width);
   3424 void HalfFloatRow_MSA(const uint16_t* src,
   3425                       uint16_t* dst,
   3426                       float scale,
   3427                       int width);
   3428 void HalfFloatRow_Any_MSA(const uint16_t* src_ptr,
   3429                           uint16_t* dst_ptr,
   3430                           float param,
   3431                           int width);
   3432 void ByteToFloatRow_C(const uint8_t* src, float* dst, float scale, int width);
   3433 void ByteToFloatRow_NEON(const uint8_t* src,
   3434                          float* dst,
   3435                          float scale,
   3436                          int width);
   3437 void ByteToFloatRow_Any_NEON(const uint8_t* src_ptr,
   3438                              float* dst_ptr,
   3439                              float param,
   3440                              int width);
   3441 
   3442 void ARGBLumaColorTableRow_C(const uint8_t* src_argb,
   3443                              uint8_t* dst_argb,
   3444                              int width,
   3445                              const uint8_t* luma,
   3446                              uint32_t lumacoeff);
   3447 void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb,
   3448                                  uint8_t* dst_argb,
   3449                                  int width,
   3450                                  const uint8_t* luma,
   3451                                  uint32_t lumacoeff);
   3452 
   3453 float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width);
   3454 float ScaleMaxSamples_NEON(const float* src,
   3455                            float* dst,
   3456                            float scale,
   3457                            int width);
   3458 float ScaleSumSamples_C(const float* src, float* dst, float scale, int width);
   3459 float ScaleSumSamples_NEON(const float* src,
   3460                            float* dst,
   3461                            float scale,
   3462                            int width);
   3463 void ScaleSamples_C(const float* src, float* dst, float scale, int width);
   3464 void ScaleSamples_NEON(const float* src, float* dst, float scale, int width);
   3465 
   3466 #ifdef __cplusplus
   3467 }  // extern "C"
   3468 }  // namespace libyuv
   3469 #endif
   3470 
   3471 #endif  // INCLUDE_LIBYUV_ROW_H_
   3472