Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS. All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "libyuv/row.h"
     12 
     13 #include <string.h>  // For memcpy and memset.
     14 
     15 #include "libyuv/basic_types.h"
     16 
     17 #ifdef __cplusplus
     18 namespace libyuv {
     19 extern "C" {
     20 #endif
     21 
     22 // llvm x86 is poor at ternary operator, so use branchless min/max.
     23 
     24 #define USE_BRANCHLESS 1
     25 #if USE_BRANCHLESS
     26 static __inline int32 clamp0(int32 v) {
     27   return ((-(v) >> 31) & (v));
     28 }
     29 
     30 static __inline int32 clamp255(int32 v) {
     31   return (((255 - (v)) >> 31) | (v)) & 255;
     32 }
     33 
     34 static __inline uint32 Clamp(int32 val) {
     35   int v = clamp0(val);
     36   return (uint32)(clamp255(v));
     37 }
     38 
     39 static __inline uint32 Abs(int32 v) {
     40   int m = v >> 31;
     41   return (v + m) ^ m;
     42 }
     43 #else  // USE_BRANCHLESS
     44 static __inline int32 clamp0(int32 v) {
     45   return (v < 0) ? 0 : v;
     46 }
     47 
     48 static __inline int32 clamp255(int32 v) {
     49   return (v > 255) ? 255 : v;
     50 }
     51 
     52 static __inline uint32 Clamp(int32 val) {
     53   int v = clamp0(val);
     54   return (uint32)(clamp255(v));
     55 }
     56 
     57 static __inline uint32 Abs(int32 v) {
     58   return (v < 0) ? -v : v;
     59 }
     60 #endif  // USE_BRANCHLESS
     61 
     62 #ifdef LIBYUV_LITTLE_ENDIAN
     63 #define WRITEWORD(p, v) *(uint32*)(p) = v
     64 #else
     65 static inline void WRITEWORD(uint8* p, uint32 v) {
     66   p[0] = (uint8)(v & 255);
     67   p[1] = (uint8)((v >> 8) & 255);
     68   p[2] = (uint8)((v >> 16) & 255);
     69   p[3] = (uint8)((v >> 24) & 255);
     70 }
     71 #endif
     72 
     73 void RGB24ToARGBRow_C(const uint8* src_rgb24, uint8* dst_argb, int width) {
     74   int x;
     75   for (x = 0; x < width; ++x) {
     76     uint8 b = src_rgb24[0];
     77     uint8 g = src_rgb24[1];
     78     uint8 r = src_rgb24[2];
     79     dst_argb[0] = b;
     80     dst_argb[1] = g;
     81     dst_argb[2] = r;
     82     dst_argb[3] = 255u;
     83     dst_argb += 4;
     84     src_rgb24 += 3;
     85   }
     86 }
     87 
     88 void RAWToARGBRow_C(const uint8* src_raw, uint8* dst_argb, int width) {
     89   int x;
     90   for (x = 0; x < width; ++x) {
     91     uint8 r = src_raw[0];
     92     uint8 g = src_raw[1];
     93     uint8 b = src_raw[2];
     94     dst_argb[0] = b;
     95     dst_argb[1] = g;
     96     dst_argb[2] = r;
     97     dst_argb[3] = 255u;
     98     dst_argb += 4;
     99     src_raw += 3;
    100   }
    101 }
    102 
    103 void RAWToRGB24Row_C(const uint8* src_raw, uint8* dst_rgb24, int width) {
    104   int x;
    105   for (x = 0; x < width; ++x) {
    106     uint8 r = src_raw[0];
    107     uint8 g = src_raw[1];
    108     uint8 b = src_raw[2];
    109     dst_rgb24[0] = b;
    110     dst_rgb24[1] = g;
    111     dst_rgb24[2] = r;
    112     dst_rgb24 += 3;
    113     src_raw += 3;
    114   }
    115 }
    116 
    117 void RGB565ToARGBRow_C(const uint8* src_rgb565, uint8* dst_argb, int width) {
    118   int x;
    119   for (x = 0; x < width; ++x) {
    120     uint8 b = src_rgb565[0] & 0x1f;
    121     uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
    122     uint8 r = src_rgb565[1] >> 3;
    123     dst_argb[0] = (b << 3) | (b >> 2);
    124     dst_argb[1] = (g << 2) | (g >> 4);
    125     dst_argb[2] = (r << 3) | (r >> 2);
    126     dst_argb[3] = 255u;
    127     dst_argb += 4;
    128     src_rgb565 += 2;
    129   }
    130 }
    131 
    132 void ARGB1555ToARGBRow_C(const uint8* src_argb1555, uint8* dst_argb,
    133                          int width) {
    134   int x;
    135   for (x = 0; x < width; ++x) {
    136     uint8 b = src_argb1555[0] & 0x1f;
    137     uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
    138     uint8 r = (src_argb1555[1] & 0x7c) >> 2;
    139     uint8 a = src_argb1555[1] >> 7;
    140     dst_argb[0] = (b << 3) | (b >> 2);
    141     dst_argb[1] = (g << 3) | (g >> 2);
    142     dst_argb[2] = (r << 3) | (r >> 2);
    143     dst_argb[3] = -a;
    144     dst_argb += 4;
    145     src_argb1555 += 2;
    146   }
    147 }
    148 
    149 void ARGB4444ToARGBRow_C(const uint8* src_argb4444, uint8* dst_argb,
    150                          int width) {
    151   int x;
    152   for (x = 0; x < width; ++x) {
    153     uint8 b = src_argb4444[0] & 0x0f;
    154     uint8 g = src_argb4444[0] >> 4;
    155     uint8 r = src_argb4444[1] & 0x0f;
    156     uint8 a = src_argb4444[1] >> 4;
    157     dst_argb[0] = (b << 4) | b;
    158     dst_argb[1] = (g << 4) | g;
    159     dst_argb[2] = (r << 4) | r;
    160     dst_argb[3] = (a << 4) | a;
    161     dst_argb += 4;
    162     src_argb4444 += 2;
    163   }
    164 }
    165 
    166 void ARGBToRGB24Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
    167   int x;
    168   for (x = 0; x < width; ++x) {
    169     uint8 b = src_argb[0];
    170     uint8 g = src_argb[1];
    171     uint8 r = src_argb[2];
    172     dst_rgb[0] = b;
    173     dst_rgb[1] = g;
    174     dst_rgb[2] = r;
    175     dst_rgb += 3;
    176     src_argb += 4;
    177   }
    178 }
    179 
    180 void ARGBToRAWRow_C(const uint8* src_argb, uint8* dst_rgb, int width) {
    181   int x;
    182   for (x = 0; x < width; ++x) {
    183     uint8 b = src_argb[0];
    184     uint8 g = src_argb[1];
    185     uint8 r = src_argb[2];
    186     dst_rgb[0] = r;
    187     dst_rgb[1] = g;
    188     dst_rgb[2] = b;
    189     dst_rgb += 3;
    190     src_argb += 4;
    191   }
    192 }
    193 
    194 void ARGBToRGB565Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
    195   int x;
    196   for (x = 0; x < width - 1; x += 2) {
    197     uint8 b0 = src_argb[0] >> 3;
    198     uint8 g0 = src_argb[1] >> 2;
    199     uint8 r0 = src_argb[2] >> 3;
    200     uint8 b1 = src_argb[4] >> 3;
    201     uint8 g1 = src_argb[5] >> 2;
    202     uint8 r1 = src_argb[6] >> 3;
    203     WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) |
    204               (b1 << 16) | (g1 << 21) | (r1 << 27));
    205     dst_rgb += 4;
    206     src_argb += 8;
    207   }
    208   if (width & 1) {
    209     uint8 b0 = src_argb[0] >> 3;
    210     uint8 g0 = src_argb[1] >> 2;
    211     uint8 r0 = src_argb[2] >> 3;
    212     *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
    213   }
    214 }
    215 
    216 // dither4 is a row of 4 values from 4x4 dither matrix.
    217 // The 4x4 matrix contains values to increase RGB.  When converting to
    218 // fewer bits (565) this provides an ordered dither.
    219 // The order in the 4x4 matrix in first byte is upper left.
    220 // The 4 values are passed as an int, then referenced as an array, so
    221 // endian will not affect order of the original matrix.  But the dither4
    222 // will containing the first pixel in the lower byte for little endian
    223 // or the upper byte for big endian.
    224 void ARGBToRGB565DitherRow_C(const uint8* src_argb, uint8* dst_rgb,
    225                              const uint32 dither4, int width) {
    226   int x;
    227   for (x = 0; x < width - 1; x += 2) {
    228     int dither0 = ((const unsigned char*)(&dither4))[x & 3];
    229     int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3];
    230     uint8 b0 = clamp255(src_argb[0] + dither0) >> 3;
    231     uint8 g0 = clamp255(src_argb[1] + dither0) >> 2;
    232     uint8 r0 = clamp255(src_argb[2] + dither0) >> 3;
    233     uint8 b1 = clamp255(src_argb[4] + dither1) >> 3;
    234     uint8 g1 = clamp255(src_argb[5] + dither1) >> 2;
    235     uint8 r1 = clamp255(src_argb[6] + dither1) >> 3;
    236     WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) |
    237               (b1 << 16) | (g1 << 21) | (r1 << 27));
    238     dst_rgb += 4;
    239     src_argb += 8;
    240   }
    241   if (width & 1) {
    242     int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3];
    243     uint8 b0 = clamp255(src_argb[0] + dither0) >> 3;
    244     uint8 g0 = clamp255(src_argb[1] + dither0) >> 2;
    245     uint8 r0 = clamp255(src_argb[2] + dither0) >> 3;
    246     *(uint16*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11);
    247   }
    248 }
    249 
    250 void ARGBToARGB1555Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
    251   int x;
    252   for (x = 0; x < width - 1; x += 2) {
    253     uint8 b0 = src_argb[0] >> 3;
    254     uint8 g0 = src_argb[1] >> 3;
    255     uint8 r0 = src_argb[2] >> 3;
    256     uint8 a0 = src_argb[3] >> 7;
    257     uint8 b1 = src_argb[4] >> 3;
    258     uint8 g1 = src_argb[5] >> 3;
    259     uint8 r1 = src_argb[6] >> 3;
    260     uint8 a1 = src_argb[7] >> 7;
    261     *(uint32*)(dst_rgb) =
    262         b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) |
    263         (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31);
    264     dst_rgb += 4;
    265     src_argb += 8;
    266   }
    267   if (width & 1) {
    268     uint8 b0 = src_argb[0] >> 3;
    269     uint8 g0 = src_argb[1] >> 3;
    270     uint8 r0 = src_argb[2] >> 3;
    271     uint8 a0 = src_argb[3] >> 7;
    272     *(uint16*)(dst_rgb) =
    273         b0 | (g0 << 5) | (r0 << 10) | (a0 << 15);
    274   }
    275 }
    276 
    277 void ARGBToARGB4444Row_C(const uint8* src_argb, uint8* dst_rgb, int width) {
    278   int x;
    279   for (x = 0; x < width - 1; x += 2) {
    280     uint8 b0 = src_argb[0] >> 4;
    281     uint8 g0 = src_argb[1] >> 4;
    282     uint8 r0 = src_argb[2] >> 4;
    283     uint8 a0 = src_argb[3] >> 4;
    284     uint8 b1 = src_argb[4] >> 4;
    285     uint8 g1 = src_argb[5] >> 4;
    286     uint8 r1 = src_argb[6] >> 4;
    287     uint8 a1 = src_argb[7] >> 4;
    288     *(uint32*)(dst_rgb) =
    289         b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
    290         (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
    291     dst_rgb += 4;
    292     src_argb += 8;
    293   }
    294   if (width & 1) {
    295     uint8 b0 = src_argb[0] >> 4;
    296     uint8 g0 = src_argb[1] >> 4;
    297     uint8 r0 = src_argb[2] >> 4;
    298     uint8 a0 = src_argb[3] >> 4;
    299     *(uint16*)(dst_rgb) =
    300         b0 | (g0 << 4) | (r0 << 8) | (a0 << 12);
    301   }
    302 }
    303 
    304 static __inline int RGBToY(uint8 r, uint8 g, uint8 b) {
    305   return (66 * r + 129 * g +  25 * b + 0x1080) >> 8;
    306 }
    307 
    308 static __inline int RGBToU(uint8 r, uint8 g, uint8 b) {
    309   return (112 * b - 74 * g - 38 * r + 0x8080) >> 8;
    310 }
    311 static __inline int RGBToV(uint8 r, uint8 g, uint8 b) {
    312   return (112 * r - 94 * g - 18 * b + 0x8080) >> 8;
    313 }
    314 
    315 #define MAKEROWY(NAME, R, G, B, BPP) \
    316 void NAME ## ToYRow_C(const uint8* src_argb0, uint8* dst_y, int width) {       \
    317   int x;                                                                       \
    318   for (x = 0; x < width; ++x) {                                                \
    319     dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]);               \
    320     src_argb0 += BPP;                                                          \
    321     dst_y += 1;                                                                \
    322   }                                                                            \
    323 }                                                                              \
    324 void NAME ## ToUVRow_C(const uint8* src_rgb0, int src_stride_rgb,              \
    325                        uint8* dst_u, uint8* dst_v, int width) {                \
    326   const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                           \
    327   int x;                                                                       \
    328   for (x = 0; x < width - 1; x += 2) {                                         \
    329     uint8 ab = (src_rgb0[B] + src_rgb0[B + BPP] +                              \
    330                src_rgb1[B] + src_rgb1[B + BPP]) >> 2;                          \
    331     uint8 ag = (src_rgb0[G] + src_rgb0[G + BPP] +                              \
    332                src_rgb1[G] + src_rgb1[G + BPP]) >> 2;                          \
    333     uint8 ar = (src_rgb0[R] + src_rgb0[R + BPP] +                              \
    334                src_rgb1[R] + src_rgb1[R + BPP]) >> 2;                          \
    335     dst_u[0] = RGBToU(ar, ag, ab);                                             \
    336     dst_v[0] = RGBToV(ar, ag, ab);                                             \
    337     src_rgb0 += BPP * 2;                                                       \
    338     src_rgb1 += BPP * 2;                                                       \
    339     dst_u += 1;                                                                \
    340     dst_v += 1;                                                                \
    341   }                                                                            \
    342   if (width & 1) {                                                             \
    343     uint8 ab = (src_rgb0[B] + src_rgb1[B]) >> 1;                               \
    344     uint8 ag = (src_rgb0[G] + src_rgb1[G]) >> 1;                               \
    345     uint8 ar = (src_rgb0[R] + src_rgb1[R]) >> 1;                               \
    346     dst_u[0] = RGBToU(ar, ag, ab);                                             \
    347     dst_v[0] = RGBToV(ar, ag, ab);                                             \
    348   }                                                                            \
    349 }
    350 
    351 MAKEROWY(ARGB, 2, 1, 0, 4)
    352 MAKEROWY(BGRA, 1, 2, 3, 4)
    353 MAKEROWY(ABGR, 0, 1, 2, 4)
    354 MAKEROWY(RGBA, 3, 2, 1, 4)
    355 MAKEROWY(RGB24, 2, 1, 0, 3)
    356 MAKEROWY(RAW, 0, 1, 2, 3)
    357 #undef MAKEROWY
    358 
    359 // JPeg uses a variation on BT.601-1 full range
    360 // y =  0.29900 * r + 0.58700 * g + 0.11400 * b
    361 // u = -0.16874 * r - 0.33126 * g + 0.50000 * b  + center
    362 // v =  0.50000 * r - 0.41869 * g - 0.08131 * b  + center
    363 // BT.601 Mpeg range uses:
    364 // b 0.1016 * 255 = 25.908 = 25
    365 // g 0.5078 * 255 = 129.489 = 129
    366 // r 0.2578 * 255 = 65.739 = 66
    367 // JPeg 8 bit Y (not used):
    368 // b 0.11400 * 256 = 29.184 = 29
    369 // g 0.58700 * 256 = 150.272 = 150
    370 // r 0.29900 * 256 = 76.544 = 77
    371 // JPeg 7 bit Y:
    372 // b 0.11400 * 128 = 14.592 = 15
    373 // g 0.58700 * 128 = 75.136 = 75
    374 // r 0.29900 * 128 = 38.272 = 38
    375 // JPeg 8 bit U:
    376 // b  0.50000 * 255 = 127.5 = 127
    377 // g -0.33126 * 255 = -84.4713 = -84
    378 // r -0.16874 * 255 = -43.0287 = -43
    379 // JPeg 8 bit V:
    380 // b -0.08131 * 255 = -20.73405 = -20
    381 // g -0.41869 * 255 = -106.76595 = -107
    382 // r  0.50000 * 255 = 127.5 = 127
    383 
    384 static __inline int RGBToYJ(uint8 r, uint8 g, uint8 b) {
    385   return (38 * r + 75 * g +  15 * b + 64) >> 7;
    386 }
    387 
    388 static __inline int RGBToUJ(uint8 r, uint8 g, uint8 b) {
    389   return (127 * b - 84 * g - 43 * r + 0x8080) >> 8;
    390 }
    391 static __inline int RGBToVJ(uint8 r, uint8 g, uint8 b) {
    392   return (127 * r - 107 * g - 20 * b + 0x8080) >> 8;
    393 }
    394 
    395 #define AVGB(a, b) (((a) + (b) + 1) >> 1)
    396 
    397 #define MAKEROWYJ(NAME, R, G, B, BPP) \
    398 void NAME ## ToYJRow_C(const uint8* src_argb0, uint8* dst_y, int width) {      \
    399   int x;                                                                       \
    400   for (x = 0; x < width; ++x) {                                                \
    401     dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]);              \
    402     src_argb0 += BPP;                                                          \
    403     dst_y += 1;                                                                \
    404   }                                                                            \
    405 }                                                                              \
    406 void NAME ## ToUVJRow_C(const uint8* src_rgb0, int src_stride_rgb,             \
    407                         uint8* dst_u, uint8* dst_v, int width) {               \
    408   const uint8* src_rgb1 = src_rgb0 + src_stride_rgb;                           \
    409   int x;                                                                       \
    410   for (x = 0; x < width - 1; x += 2) {                                         \
    411     uint8 ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]),                            \
    412                     AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP]));               \
    413     uint8 ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]),                            \
    414                     AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP]));               \
    415     uint8 ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]),                            \
    416                     AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP]));               \
    417     dst_u[0] = RGBToUJ(ar, ag, ab);                                            \
    418     dst_v[0] = RGBToVJ(ar, ag, ab);                                            \
    419     src_rgb0 += BPP * 2;                                                       \
    420     src_rgb1 += BPP * 2;                                                       \
    421     dst_u += 1;                                                                \
    422     dst_v += 1;                                                                \
    423   }                                                                            \
    424   if (width & 1) {                                                             \
    425     uint8 ab = AVGB(src_rgb0[B], src_rgb1[B]);                                 \
    426     uint8 ag = AVGB(src_rgb0[G], src_rgb1[G]);                                 \
    427     uint8 ar = AVGB(src_rgb0[R], src_rgb1[R]);                                 \
    428     dst_u[0] = RGBToUJ(ar, ag, ab);                                            \
    429     dst_v[0] = RGBToVJ(ar, ag, ab);                                            \
    430   }                                                                            \
    431 }
    432 
    433 MAKEROWYJ(ARGB, 2, 1, 0, 4)
    434 #undef MAKEROWYJ
    435 
    436 void RGB565ToYRow_C(const uint8* src_rgb565, uint8* dst_y, int width) {
    437   int x;
    438   for (x = 0; x < width; ++x) {
    439     uint8 b = src_rgb565[0] & 0x1f;
    440     uint8 g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
    441     uint8 r = src_rgb565[1] >> 3;
    442     b = (b << 3) | (b >> 2);
    443     g = (g << 2) | (g >> 4);
    444     r = (r << 3) | (r >> 2);
    445     dst_y[0] = RGBToY(r, g, b);
    446     src_rgb565 += 2;
    447     dst_y += 1;
    448   }
    449 }
    450 
    451 void ARGB1555ToYRow_C(const uint8* src_argb1555, uint8* dst_y, int width) {
    452   int x;
    453   for (x = 0; x < width; ++x) {
    454     uint8 b = src_argb1555[0] & 0x1f;
    455     uint8 g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
    456     uint8 r = (src_argb1555[1] & 0x7c) >> 2;
    457     b = (b << 3) | (b >> 2);
    458     g = (g << 3) | (g >> 2);
    459     r = (r << 3) | (r >> 2);
    460     dst_y[0] = RGBToY(r, g, b);
    461     src_argb1555 += 2;
    462     dst_y += 1;
    463   }
    464 }
    465 
    466 void ARGB4444ToYRow_C(const uint8* src_argb4444, uint8* dst_y, int width) {
    467   int x;
    468   for (x = 0; x < width; ++x) {
    469     uint8 b = src_argb4444[0] & 0x0f;
    470     uint8 g = src_argb4444[0] >> 4;
    471     uint8 r = src_argb4444[1] & 0x0f;
    472     b = (b << 4) | b;
    473     g = (g << 4) | g;
    474     r = (r << 4) | r;
    475     dst_y[0] = RGBToY(r, g, b);
    476     src_argb4444 += 2;
    477     dst_y += 1;
    478   }
    479 }
    480 
    481 void RGB565ToUVRow_C(const uint8* src_rgb565, int src_stride_rgb565,
    482                      uint8* dst_u, uint8* dst_v, int width) {
    483   const uint8* next_rgb565 = src_rgb565 + src_stride_rgb565;
    484   int x;
    485   for (x = 0; x < width - 1; x += 2) {
    486     uint8 b0 = src_rgb565[0] & 0x1f;
    487     uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
    488     uint8 r0 = src_rgb565[1] >> 3;
    489     uint8 b1 = src_rgb565[2] & 0x1f;
    490     uint8 g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3);
    491     uint8 r1 = src_rgb565[3] >> 3;
    492     uint8 b2 = next_rgb565[0] & 0x1f;
    493     uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
    494     uint8 r2 = next_rgb565[1] >> 3;
    495     uint8 b3 = next_rgb565[2] & 0x1f;
    496     uint8 g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3);
    497     uint8 r3 = next_rgb565[3] >> 3;
    498     uint8 b = (b0 + b1 + b2 + b3);  // 565 * 4 = 787.
    499     uint8 g = (g0 + g1 + g2 + g3);
    500     uint8 r = (r0 + r1 + r2 + r3);
    501     b = (b << 1) | (b >> 6);  // 787 -> 888.
    502     r = (r << 1) | (r >> 6);
    503     dst_u[0] = RGBToU(r, g, b);
    504     dst_v[0] = RGBToV(r, g, b);
    505     src_rgb565 += 4;
    506     next_rgb565 += 4;
    507     dst_u += 1;
    508     dst_v += 1;
    509   }
    510   if (width & 1) {
    511     uint8 b0 = src_rgb565[0] & 0x1f;
    512     uint8 g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3);
    513     uint8 r0 = src_rgb565[1] >> 3;
    514     uint8 b2 = next_rgb565[0] & 0x1f;
    515     uint8 g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3);
    516     uint8 r2 = next_rgb565[1] >> 3;
    517     uint8 b = (b0 + b2);  // 565 * 2 = 676.
    518     uint8 g = (g0 + g2);
    519     uint8 r = (r0 + r2);
    520     b = (b << 2) | (b >> 4);  // 676 -> 888
    521     g = (g << 1) | (g >> 6);
    522     r = (r << 2) | (r >> 4);
    523     dst_u[0] = RGBToU(r, g, b);
    524     dst_v[0] = RGBToV(r, g, b);
    525   }
    526 }
    527 
    528 void ARGB1555ToUVRow_C(const uint8* src_argb1555, int src_stride_argb1555,
    529                        uint8* dst_u, uint8* dst_v, int width) {
    530   const uint8* next_argb1555 = src_argb1555 + src_stride_argb1555;
    531   int x;
    532   for (x = 0; x < width - 1; x += 2) {
    533     uint8 b0 = src_argb1555[0] & 0x1f;
    534     uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
    535     uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
    536     uint8 b1 = src_argb1555[2] & 0x1f;
    537     uint8 g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3);
    538     uint8 r1 = (src_argb1555[3] & 0x7c) >> 2;
    539     uint8 b2 = next_argb1555[0] & 0x1f;
    540     uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
    541     uint8 r2 = (next_argb1555[1] & 0x7c) >> 2;
    542     uint8 b3 = next_argb1555[2] & 0x1f;
    543     uint8 g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3);
    544     uint8 r3 = (next_argb1555[3] & 0x7c) >> 2;
    545     uint8 b = (b0 + b1 + b2 + b3);  // 555 * 4 = 777.
    546     uint8 g = (g0 + g1 + g2 + g3);
    547     uint8 r = (r0 + r1 + r2 + r3);
    548     b = (b << 1) | (b >> 6);  // 777 -> 888.
    549     g = (g << 1) | (g >> 6);
    550     r = (r << 1) | (r >> 6);
    551     dst_u[0] = RGBToU(r, g, b);
    552     dst_v[0] = RGBToV(r, g, b);
    553     src_argb1555 += 4;
    554     next_argb1555 += 4;
    555     dst_u += 1;
    556     dst_v += 1;
    557   }
    558   if (width & 1) {
    559     uint8 b0 = src_argb1555[0] & 0x1f;
    560     uint8 g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3);
    561     uint8 r0 = (src_argb1555[1] & 0x7c) >> 2;
    562     uint8 b2 = next_argb1555[0] & 0x1f;
    563     uint8 g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3);
    564     uint8 r2 = next_argb1555[1] >> 3;
    565     uint8 b = (b0 + b2);  // 555 * 2 = 666.
    566     uint8 g = (g0 + g2);
    567     uint8 r = (r0 + r2);
    568     b = (b << 2) | (b >> 4);  // 666 -> 888.
    569     g = (g << 2) | (g >> 4);
    570     r = (r << 2) | (r >> 4);
    571     dst_u[0] = RGBToU(r, g, b);
    572     dst_v[0] = RGBToV(r, g, b);
    573   }
    574 }
    575 
    576 void ARGB4444ToUVRow_C(const uint8* src_argb4444, int src_stride_argb4444,
    577                        uint8* dst_u, uint8* dst_v, int width) {
    578   const uint8* next_argb4444 = src_argb4444 + src_stride_argb4444;
    579   int x;
    580   for (x = 0; x < width - 1; x += 2) {
    581     uint8 b0 = src_argb4444[0] & 0x0f;
    582     uint8 g0 = src_argb4444[0] >> 4;
    583     uint8 r0 = src_argb4444[1] & 0x0f;
    584     uint8 b1 = src_argb4444[2] & 0x0f;
    585     uint8 g1 = src_argb4444[2] >> 4;
    586     uint8 r1 = src_argb4444[3] & 0x0f;
    587     uint8 b2 = next_argb4444[0] & 0x0f;
    588     uint8 g2 = next_argb4444[0] >> 4;
    589     uint8 r2 = next_argb4444[1] & 0x0f;
    590     uint8 b3 = next_argb4444[2] & 0x0f;
    591     uint8 g3 = next_argb4444[2] >> 4;
    592     uint8 r3 = next_argb4444[3] & 0x0f;
    593     uint8 b = (b0 + b1 + b2 + b3);  // 444 * 4 = 666.
    594     uint8 g = (g0 + g1 + g2 + g3);
    595     uint8 r = (r0 + r1 + r2 + r3);
    596     b = (b << 2) | (b >> 4);  // 666 -> 888.
    597     g = (g << 2) | (g >> 4);
    598     r = (r << 2) | (r >> 4);
    599     dst_u[0] = RGBToU(r, g, b);
    600     dst_v[0] = RGBToV(r, g, b);
    601     src_argb4444 += 4;
    602     next_argb4444 += 4;
    603     dst_u += 1;
    604     dst_v += 1;
    605   }
    606   if (width & 1) {
    607     uint8 b0 = src_argb4444[0] & 0x0f;
    608     uint8 g0 = src_argb4444[0] >> 4;
    609     uint8 r0 = src_argb4444[1] & 0x0f;
    610     uint8 b2 = next_argb4444[0] & 0x0f;
    611     uint8 g2 = next_argb4444[0] >> 4;
    612     uint8 r2 = next_argb4444[1] & 0x0f;
    613     uint8 b = (b0 + b2);  // 444 * 2 = 555.
    614     uint8 g = (g0 + g2);
    615     uint8 r = (r0 + r2);
    616     b = (b << 3) | (b >> 2);  // 555 -> 888.
    617     g = (g << 3) | (g >> 2);
    618     r = (r << 3) | (r >> 2);
    619     dst_u[0] = RGBToU(r, g, b);
    620     dst_v[0] = RGBToV(r, g, b);
    621   }
    622 }
    623 
    624 void ARGBToUV444Row_C(const uint8* src_argb,
    625                       uint8* dst_u, uint8* dst_v, int width) {
    626   int x;
    627   for (x = 0; x < width; ++x) {
    628     uint8 ab = src_argb[0];
    629     uint8 ag = src_argb[1];
    630     uint8 ar = src_argb[2];
    631     dst_u[0] = RGBToU(ar, ag, ab);
    632     dst_v[0] = RGBToV(ar, ag, ab);
    633     src_argb += 4;
    634     dst_u += 1;
    635     dst_v += 1;
    636   }
    637 }
    638 
    639 void ARGBToUV411Row_C(const uint8* src_argb,
    640                       uint8* dst_u, uint8* dst_v, int width) {
    641   int x;
    642   for (x = 0; x < width - 3; x += 4) {
    643     uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[12]) >> 2;
    644     uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[13]) >> 2;
    645     uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[14]) >> 2;
    646     dst_u[0] = RGBToU(ar, ag, ab);
    647     dst_v[0] = RGBToV(ar, ag, ab);
    648     src_argb += 16;
    649     dst_u += 1;
    650     dst_v += 1;
    651   }
    652   // Odd width handling mimics 'any' function which replicates last pixel.
    653   if ((width & 3) == 3) {
    654     uint8 ab = (src_argb[0] + src_argb[4] + src_argb[8] + src_argb[8]) >> 2;
    655     uint8 ag = (src_argb[1] + src_argb[5] + src_argb[9] + src_argb[9]) >> 2;
    656     uint8 ar = (src_argb[2] + src_argb[6] + src_argb[10] + src_argb[10]) >> 2;
    657     dst_u[0] = RGBToU(ar, ag, ab);
    658     dst_v[0] = RGBToV(ar, ag, ab);
    659   } else if ((width & 3) == 2) {
    660     uint8 ab = (src_argb[0] + src_argb[4]) >> 1;
    661     uint8 ag = (src_argb[1] + src_argb[5]) >> 1;
    662     uint8 ar = (src_argb[2] + src_argb[6]) >> 1;
    663     dst_u[0] = RGBToU(ar, ag, ab);
    664     dst_v[0] = RGBToV(ar, ag, ab);
    665   } else if ((width & 3) == 1) {
    666     uint8 ab = src_argb[0];
    667     uint8 ag = src_argb[1];
    668     uint8 ar = src_argb[2];
    669     dst_u[0] = RGBToU(ar, ag, ab);
    670     dst_v[0] = RGBToV(ar, ag, ab);
    671   }
    672 }
    673 
    674 void ARGBGrayRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
    675   int x;
    676   for (x = 0; x < width; ++x) {
    677     uint8 y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]);
    678     dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
    679     dst_argb[3] = src_argb[3];
    680     dst_argb += 4;
    681     src_argb += 4;
    682   }
    683 }
    684 
    685 // Convert a row of image to Sepia tone.
    686 void ARGBSepiaRow_C(uint8* dst_argb, int width) {
    687   int x;
    688   for (x = 0; x < width; ++x) {
    689     int b = dst_argb[0];
    690     int g = dst_argb[1];
    691     int r = dst_argb[2];
    692     int sb = (b * 17 + g * 68 + r * 35) >> 7;
    693     int sg = (b * 22 + g * 88 + r * 45) >> 7;
    694     int sr = (b * 24 + g * 98 + r * 50) >> 7;
    695     // b does not over flow. a is preserved from original.
    696     dst_argb[0] = sb;
    697     dst_argb[1] = clamp255(sg);
    698     dst_argb[2] = clamp255(sr);
    699     dst_argb += 4;
    700   }
    701 }
    702 
    703 // Apply color matrix to a row of image. Matrix is signed.
    704 // TODO(fbarchard): Consider adding rounding (+32).
    705 void ARGBColorMatrixRow_C(const uint8* src_argb, uint8* dst_argb,
    706                           const int8* matrix_argb, int width) {
    707   int x;
    708   for (x = 0; x < width; ++x) {
    709     int b = src_argb[0];
    710     int g = src_argb[1];
    711     int r = src_argb[2];
    712     int a = src_argb[3];
    713     int sb = (b * matrix_argb[0] + g * matrix_argb[1] +
    714               r * matrix_argb[2] + a * matrix_argb[3]) >> 6;
    715     int sg = (b * matrix_argb[4] + g * matrix_argb[5] +
    716               r * matrix_argb[6] + a * matrix_argb[7]) >> 6;
    717     int sr = (b * matrix_argb[8] + g * matrix_argb[9] +
    718               r * matrix_argb[10] + a * matrix_argb[11]) >> 6;
    719     int sa = (b * matrix_argb[12] + g * matrix_argb[13] +
    720               r * matrix_argb[14] + a * matrix_argb[15]) >> 6;
    721     dst_argb[0] = Clamp(sb);
    722     dst_argb[1] = Clamp(sg);
    723     dst_argb[2] = Clamp(sr);
    724     dst_argb[3] = Clamp(sa);
    725     src_argb += 4;
    726     dst_argb += 4;
    727   }
    728 }
    729 
    730 // Apply color table to a row of image.
    731 void ARGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
    732   int x;
    733   for (x = 0; x < width; ++x) {
    734     int b = dst_argb[0];
    735     int g = dst_argb[1];
    736     int r = dst_argb[2];
    737     int a = dst_argb[3];
    738     dst_argb[0] = table_argb[b * 4 + 0];
    739     dst_argb[1] = table_argb[g * 4 + 1];
    740     dst_argb[2] = table_argb[r * 4 + 2];
    741     dst_argb[3] = table_argb[a * 4 + 3];
    742     dst_argb += 4;
    743   }
    744 }
    745 
    746 // Apply color table to a row of image.
    747 void RGBColorTableRow_C(uint8* dst_argb, const uint8* table_argb, int width) {
    748   int x;
    749   for (x = 0; x < width; ++x) {
    750     int b = dst_argb[0];
    751     int g = dst_argb[1];
    752     int r = dst_argb[2];
    753     dst_argb[0] = table_argb[b * 4 + 0];
    754     dst_argb[1] = table_argb[g * 4 + 1];
    755     dst_argb[2] = table_argb[r * 4 + 2];
    756     dst_argb += 4;
    757   }
    758 }
    759 
    760 void ARGBQuantizeRow_C(uint8* dst_argb, int scale, int interval_size,
    761                        int interval_offset, int width) {
    762   int x;
    763   for (x = 0; x < width; ++x) {
    764     int b = dst_argb[0];
    765     int g = dst_argb[1];
    766     int r = dst_argb[2];
    767     dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset;
    768     dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset;
    769     dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset;
    770     dst_argb += 4;
    771   }
    772 }
    773 
    774 #define REPEAT8(v) (v) | ((v) << 8)
    775 #define SHADE(f, v) v * f >> 24
    776 
    777 void ARGBShadeRow_C(const uint8* src_argb, uint8* dst_argb, int width,
    778                     uint32 value) {
    779   const uint32 b_scale = REPEAT8(value & 0xff);
    780   const uint32 g_scale = REPEAT8((value >> 8) & 0xff);
    781   const uint32 r_scale = REPEAT8((value >> 16) & 0xff);
    782   const uint32 a_scale = REPEAT8(value >> 24);
    783 
    784   int i;
    785   for (i = 0; i < width; ++i) {
    786     const uint32 b = REPEAT8(src_argb[0]);
    787     const uint32 g = REPEAT8(src_argb[1]);
    788     const uint32 r = REPEAT8(src_argb[2]);
    789     const uint32 a = REPEAT8(src_argb[3]);
    790     dst_argb[0] = SHADE(b, b_scale);
    791     dst_argb[1] = SHADE(g, g_scale);
    792     dst_argb[2] = SHADE(r, r_scale);
    793     dst_argb[3] = SHADE(a, a_scale);
    794     src_argb += 4;
    795     dst_argb += 4;
    796   }
    797 }
    798 #undef REPEAT8
    799 #undef SHADE
    800 
    801 #define REPEAT8(v) (v) | ((v) << 8)
    802 #define SHADE(f, v) v * f >> 16
    803 
    804 void ARGBMultiplyRow_C(const uint8* src_argb0, const uint8* src_argb1,
    805                        uint8* dst_argb, int width) {
    806   int i;
    807   for (i = 0; i < width; ++i) {
    808     const uint32 b = REPEAT8(src_argb0[0]);
    809     const uint32 g = REPEAT8(src_argb0[1]);
    810     const uint32 r = REPEAT8(src_argb0[2]);
    811     const uint32 a = REPEAT8(src_argb0[3]);
    812     const uint32 b_scale = src_argb1[0];
    813     const uint32 g_scale = src_argb1[1];
    814     const uint32 r_scale = src_argb1[2];
    815     const uint32 a_scale = src_argb1[3];
    816     dst_argb[0] = SHADE(b, b_scale);
    817     dst_argb[1] = SHADE(g, g_scale);
    818     dst_argb[2] = SHADE(r, r_scale);
    819     dst_argb[3] = SHADE(a, a_scale);
    820     src_argb0 += 4;
    821     src_argb1 += 4;
    822     dst_argb += 4;
    823   }
    824 }
    825 #undef REPEAT8
    826 #undef SHADE
    827 
    828 #define SHADE(f, v) clamp255(v + f)
    829 
    830 void ARGBAddRow_C(const uint8* src_argb0, const uint8* src_argb1,
    831                   uint8* dst_argb, int width) {
    832   int i;
    833   for (i = 0; i < width; ++i) {
    834     const int b = src_argb0[0];
    835     const int g = src_argb0[1];
    836     const int r = src_argb0[2];
    837     const int a = src_argb0[3];
    838     const int b_add = src_argb1[0];
    839     const int g_add = src_argb1[1];
    840     const int r_add = src_argb1[2];
    841     const int a_add = src_argb1[3];
    842     dst_argb[0] = SHADE(b, b_add);
    843     dst_argb[1] = SHADE(g, g_add);
    844     dst_argb[2] = SHADE(r, r_add);
    845     dst_argb[3] = SHADE(a, a_add);
    846     src_argb0 += 4;
    847     src_argb1 += 4;
    848     dst_argb += 4;
    849   }
    850 }
    851 #undef SHADE
    852 
    853 #define SHADE(f, v) clamp0(f - v)
    854 
    855 void ARGBSubtractRow_C(const uint8* src_argb0, const uint8* src_argb1,
    856                        uint8* dst_argb, int width) {
    857   int i;
    858   for (i = 0; i < width; ++i) {
    859     const int b = src_argb0[0];
    860     const int g = src_argb0[1];
    861     const int r = src_argb0[2];
    862     const int a = src_argb0[3];
    863     const int b_sub = src_argb1[0];
    864     const int g_sub = src_argb1[1];
    865     const int r_sub = src_argb1[2];
    866     const int a_sub = src_argb1[3];
    867     dst_argb[0] = SHADE(b, b_sub);
    868     dst_argb[1] = SHADE(g, g_sub);
    869     dst_argb[2] = SHADE(r, r_sub);
    870     dst_argb[3] = SHADE(a, a_sub);
    871     src_argb0 += 4;
    872     src_argb1 += 4;
    873     dst_argb += 4;
    874   }
    875 }
    876 #undef SHADE
    877 
    878 // Sobel functions which mimics SSSE3.
    879 void SobelXRow_C(const uint8* src_y0, const uint8* src_y1, const uint8* src_y2,
    880                  uint8* dst_sobelx, int width) {
    881   int i;
    882   for (i = 0; i < width; ++i) {
    883     int a = src_y0[i];
    884     int b = src_y1[i];
    885     int c = src_y2[i];
    886     int a_sub = src_y0[i + 2];
    887     int b_sub = src_y1[i + 2];
    888     int c_sub = src_y2[i + 2];
    889     int a_diff = a - a_sub;
    890     int b_diff = b - b_sub;
    891     int c_diff = c - c_sub;
    892     int sobel = Abs(a_diff + b_diff * 2 + c_diff);
    893     dst_sobelx[i] = (uint8)(clamp255(sobel));
    894   }
    895 }
    896 
    897 void SobelYRow_C(const uint8* src_y0, const uint8* src_y1,
    898                  uint8* dst_sobely, int width) {
    899   int i;
    900   for (i = 0; i < width; ++i) {
    901     int a = src_y0[i + 0];
    902     int b = src_y0[i + 1];
    903     int c = src_y0[i + 2];
    904     int a_sub = src_y1[i + 0];
    905     int b_sub = src_y1[i + 1];
    906     int c_sub = src_y1[i + 2];
    907     int a_diff = a - a_sub;
    908     int b_diff = b - b_sub;
    909     int c_diff = c - c_sub;
    910     int sobel = Abs(a_diff + b_diff * 2 + c_diff);
    911     dst_sobely[i] = (uint8)(clamp255(sobel));
    912   }
    913 }
    914 
    915 void SobelRow_C(const uint8* src_sobelx, const uint8* src_sobely,
    916                 uint8* dst_argb, int width) {
    917   int i;
    918   for (i = 0; i < width; ++i) {
    919     int r = src_sobelx[i];
    920     int b = src_sobely[i];
    921     int s = clamp255(r + b);
    922     dst_argb[0] = (uint8)(s);
    923     dst_argb[1] = (uint8)(s);
    924     dst_argb[2] = (uint8)(s);
    925     dst_argb[3] = (uint8)(255u);
    926     dst_argb += 4;
    927   }
    928 }
    929 
    930 void SobelToPlaneRow_C(const uint8* src_sobelx, const uint8* src_sobely,
    931                        uint8* dst_y, int width) {
    932   int i;
    933   for (i = 0; i < width; ++i) {
    934     int r = src_sobelx[i];
    935     int b = src_sobely[i];
    936     int s = clamp255(r + b);
    937     dst_y[i] = (uint8)(s);
    938   }
    939 }
    940 
    941 void SobelXYRow_C(const uint8* src_sobelx, const uint8* src_sobely,
    942                   uint8* dst_argb, int width) {
    943   int i;
    944   for (i = 0; i < width; ++i) {
    945     int r = src_sobelx[i];
    946     int b = src_sobely[i];
    947     int g = clamp255(r + b);
    948     dst_argb[0] = (uint8)(b);
    949     dst_argb[1] = (uint8)(g);
    950     dst_argb[2] = (uint8)(r);
    951     dst_argb[3] = (uint8)(255u);
    952     dst_argb += 4;
    953   }
    954 }
    955 
    956 void J400ToARGBRow_C(const uint8* src_y, uint8* dst_argb, int width) {
    957   // Copy a Y to RGB.
    958   int x;
    959   for (x = 0; x < width; ++x) {
    960     uint8 y = src_y[0];
    961     dst_argb[2] = dst_argb[1] = dst_argb[0] = y;
    962     dst_argb[3] = 255u;
    963     dst_argb += 4;
    964     ++src_y;
    965   }
    966 }
    967 
    968 // TODO(fbarchard): Unify these structures to be platform independent.
    969 // TODO(fbarchard): Generate SIMD structures from float matrix.
    970 
    971 // BT.601 YUV to RGB reference
    972 //  R = (Y - 16) * 1.164              - V * -1.596
    973 //  G = (Y - 16) * 1.164 - U *  0.391 - V *  0.813
    974 //  B = (Y - 16) * 1.164 - U * -2.018
    975 
    976 // Y contribution to R,G,B.  Scale and bias.
    977 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
    978 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
    979 
    980 // U and V contributions to R,G,B.
    981 #define UB -128 /* max(-128, round(-2.018 * 64)) */
    982 #define UG 25 /* round(0.391 * 64) */
    983 #define VG 52 /* round(0.813 * 64) */
    984 #define VR -102 /* round(-1.596 * 64) */
    985 
    986 // Bias values to subtract 16 from Y and 128 from U and V.
    987 #define BB (UB * 128            + YGB)
    988 #define BG (UG * 128 + VG * 128 + YGB)
    989 #define BR            (VR * 128 + YGB)
    990 
    991 #if defined(__aarch64__)
    992 const YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
    993   { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR },
    994   { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR },
    995   { UG, VG, UG, VG, UG, VG, UG, VG },
    996   { UG, VG, UG, VG, UG, VG, UG, VG },
    997   { BB, BG, BR, 0, 0, 0, 0, 0 },
    998   { 0x0101 * YG, 0, 0, 0 }
    999 };
   1000 const YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
   1001   { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB },
   1002   { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB },
   1003   { VG, UG, VG, UG, VG, UG, VG, UG },
   1004   { VG, UG, VG, UG, VG, UG, VG, UG },
   1005   { BR, BG, BB, 0, 0, 0, 0, 0 },
   1006   { 0x0101 * YG, 0, 0, 0 }
   1007 };
   1008 #elif defined(__arm__)
   1009 const YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
   1010   { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 },
   1011   { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 },
   1012   { BB, BG, BR, 0, 0, 0, 0, 0 },
   1013   { 0x0101 * YG, 0, 0, 0 }
   1014 };
   1015 const YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
   1016   { -VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0 },
   1017   { VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0 },
   1018   { BR, BG, BB, 0, 0, 0, 0, 0 },
   1019   { 0x0101 * YG, 0, 0, 0 }
   1020 };
   1021 #else
   1022 const YuvConstants SIMD_ALIGNED(kYuvI601Constants) = {
   1023   { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
   1024     UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 },
   1025   { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
   1026     UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG },
   1027   { 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
   1028     0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR },
   1029   { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
   1030   { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
   1031   { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
   1032   { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
   1033 };
   1034 const YuvConstants SIMD_ALIGNED(kYvuI601Constants) = {
   1035   { VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
   1036     VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 },
   1037   { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
   1038     VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG },
   1039   { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
   1040     0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB },
   1041   { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
   1042   { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
   1043   { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
   1044   { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
   1045 };
   1046 #endif
   1047 
   1048 #undef BB
   1049 #undef BG
   1050 #undef BR
   1051 #undef YGB
   1052 #undef UB
   1053 #undef UG
   1054 #undef VG
   1055 #undef VR
   1056 #undef YG
   1057 
   1058 // JPEG YUV to RGB reference
   1059 // *  R = Y                - V * -1.40200
   1060 // *  G = Y - U *  0.34414 - V *  0.71414
   1061 // *  B = Y - U * -1.77200
   1062 
   1063 // Y contribution to R,G,B.  Scale and bias.
   1064 #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
   1065 #define YGB 32  /* 64 / 2 */
   1066 
   1067 // U and V contributions to R,G,B.
   1068 #define UB -113 /* round(-1.77200 * 64) */
   1069 #define UG 22 /* round(0.34414 * 64) */
   1070 #define VG 46 /* round(0.71414  * 64) */
   1071 #define VR -90 /* round(-1.40200 * 64) */
   1072 
   1073 // Bias values to round, and subtract 128 from U and V.
   1074 #define BB (UB * 128            + YGB)
   1075 #define BG (UG * 128 + VG * 128 + YGB)
   1076 #define BR            (VR * 128 + YGB)
   1077 
   1078 #if defined(__aarch64__)
   1079 const YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
   1080   { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR },
   1081   { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR },
   1082   { UG, VG, UG, VG, UG, VG, UG, VG },
   1083   { UG, VG, UG, VG, UG, VG, UG, VG },
   1084   { BB, BG, BR, 0, 0, 0, 0, 0 },
   1085   { 0x0101 * YG, 0, 0, 0 }
   1086 };
   1087 const YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
   1088   { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB },
   1089   { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB },
   1090   { VG, UG, VG, UG, VG, UG, VG, UG },
   1091   { VG, UG, VG, UG, VG, UG, VG, UG },
   1092   { BR, BG, BB, 0, 0, 0, 0, 0 },
   1093   { 0x0101 * YG, 0, 0, 0 }
   1094 };
   1095 #elif defined(__arm__)
   1096 const YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
   1097   { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 },
   1098   { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 },
   1099   { BB, BG, BR, 0, 0, 0, 0, 0 },
   1100   { 0x0101 * YG, 0, 0, 0 }
   1101 };
   1102 const YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
   1103   { -VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0 },
   1104   { VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0 },
   1105   { BR, BG, BB, 0, 0, 0, 0, 0 },
   1106   { 0x0101 * YG, 0, 0, 0 }
   1107 };
   1108 #else
   1109 const YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = {
   1110   { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
   1111     UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 },
   1112   { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
   1113     UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG },
   1114   { 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
   1115     0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR },
   1116   { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
   1117   { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
   1118   { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
   1119   { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
   1120 };
   1121 const YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = {
   1122   { VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
   1123     VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 },
   1124   { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
   1125     VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG },
   1126   { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
   1127     0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB },
   1128   { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
   1129   { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
   1130   { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
   1131   { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
   1132 };
   1133 #endif
   1134 
   1135 #undef BB
   1136 #undef BG
   1137 #undef BR
   1138 #undef YGB
   1139 #undef UB
   1140 #undef UG
   1141 #undef VG
   1142 #undef VR
   1143 #undef YG
   1144 
   1145 // BT.709 YUV to RGB reference
   1146 // *  R = Y                - V * -1.28033
   1147 // *  G = Y - U *  0.21482 - V *  0.38059
   1148 // *  B = Y - U * -2.12798
   1149 
   1150 // Y contribution to R,G,B.  Scale and bias.
   1151 #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */
   1152 #define YGB 32  /* 64 / 2 */
   1153 
   1154 // TODO(fbarchard): Find way to express 2.12 instead of 2.0.
   1155 // U and V contributions to R,G,B.
   1156 #define UB -128 /* max(-128, round(-2.12798 * 64)) */
   1157 #define UG 14 /* round(0.21482 * 64) */
   1158 #define VG 24 /* round(0.38059  * 64) */
   1159 #define VR -82 /* round(-1.28033 * 64) */
   1160 
   1161 // Bias values to round, and subtract 128 from U and V.
   1162 #define BB (UB * 128            + YGB)
   1163 #define BG (UG * 128 + VG * 128 + YGB)
   1164 #define BR            (VR * 128 + YGB)
   1165 
   1166 #if defined(__aarch64__)
   1167 const YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
   1168   { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR },
   1169   { -UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR },
   1170   { UG, VG, UG, VG, UG, VG, UG, VG },
   1171   { UG, VG, UG, VG, UG, VG, UG, VG },
   1172   { BB, BG, BR, 0, 0, 0, 0, 0 },
   1173   { 0x0101 * YG, 0, 0, 0 }
   1174 };
   1175 const YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
   1176   { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB },
   1177   { -VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB },
   1178   { VG, UG, VG, UG, VG, UG, VG, UG },
   1179   { VG, UG, VG, UG, VG, UG, VG, UG },
   1180   { BR, BG, BB, 0, 0, 0, 0, 0 },
   1181   { 0x0101 * YG, 0, 0, 0 }
   1182 };
   1183 #elif defined(__arm__)
   1184 const YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
   1185   { -UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0 },
   1186   { UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0 },
   1187   { BB, BG, BR, 0, 0, 0, 0, 0 },
   1188   { 0x0101 * YG, 0, 0, 0 }
   1189 };
   1190 const YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
   1191   { -VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0 },
   1192   { VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0 },
   1193   { BR, BG, BB, 0, 0, 0, 0, 0 },
   1194   { 0x0101 * YG, 0, 0, 0 }
   1195 };
   1196 #else
   1197 const YuvConstants SIMD_ALIGNED(kYuvH709Constants) = {
   1198   { UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0,
   1199     UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0 },
   1200   { UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG,
   1201     UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG },
   1202   { 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR,
   1203     0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR },
   1204   { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
   1205   { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
   1206   { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
   1207   { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
   1208 };
   1209 const YuvConstants SIMD_ALIGNED(kYvuH709Constants) = {
   1210   { VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0,
   1211     VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0 },
   1212   { VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,
   1213     VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG },
   1214   { 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB,
   1215     0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB },
   1216   { BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR },
   1217   { BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG },
   1218   { BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB },
   1219   { YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG }
   1220 };
   1221 #endif
   1222 
   1223 #undef BB
   1224 #undef BG
   1225 #undef BR
   1226 #undef YGB
   1227 #undef UB
   1228 #undef UG
   1229 #undef VG
   1230 #undef VR
   1231 #undef YG
   1232 
   1233 // C reference code that mimics the YUV assembly.
   1234 static __inline void YuvPixel(uint8 y, uint8 u, uint8 v,
   1235                               uint8* b, uint8* g, uint8* r,
   1236                               const struct YuvConstants* yuvconstants) {
   1237 #if defined(__aarch64__)
   1238   int ub = -yuvconstants->kUVToRB[0];
   1239   int ug = yuvconstants->kUVToG[0];
   1240   int vg = yuvconstants->kUVToG[1];
   1241   int vr = -yuvconstants->kUVToRB[1];
   1242   int bb = yuvconstants->kUVBiasBGR[0];
   1243   int bg = yuvconstants->kUVBiasBGR[1];
   1244   int br = yuvconstants->kUVBiasBGR[2];
   1245   int yg = yuvconstants->kYToRgb[0] / 0x0101;
   1246 #elif defined(__arm__)
   1247   int ub = -yuvconstants->kUVToRB[0];
   1248   int ug = yuvconstants->kUVToG[0];
   1249   int vg = yuvconstants->kUVToG[4];
   1250   int vr = -yuvconstants->kUVToRB[4];
   1251   int bb = yuvconstants->kUVBiasBGR[0];
   1252   int bg = yuvconstants->kUVBiasBGR[1];
   1253   int br = yuvconstants->kUVBiasBGR[2];
   1254   int yg = yuvconstants->kYToRgb[0] / 0x0101;
   1255 #else
   1256   int ub = yuvconstants->kUVToB[0];
   1257   int ug = yuvconstants->kUVToG[0];
   1258   int vg = yuvconstants->kUVToG[1];
   1259   int vr = yuvconstants->kUVToR[1];
   1260   int bb = yuvconstants->kUVBiasB[0];
   1261   int bg = yuvconstants->kUVBiasG[0];
   1262   int br = yuvconstants->kUVBiasR[0];
   1263   int yg = yuvconstants->kYToRgb[0];
   1264 #endif
   1265 
   1266   uint32 y1 = (uint32)(y * 0x0101 * yg) >> 16;
   1267   *b = Clamp((int32)(-(u * ub         ) + y1 + bb) >> 6);
   1268   *g = Clamp((int32)(-(u * ug + v * vg) + y1 + bg) >> 6);
   1269   *r = Clamp((int32)(-(         v * vr) + y1 + br) >> 6);
   1270 }
   1271 
   1272 // Y contribution to R,G,B.  Scale and bias.
   1273 #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */
   1274 #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */
   1275 
   1276 // C reference code that mimics the YUV assembly.
   1277 static __inline void YPixel(uint8 y, uint8* b, uint8* g, uint8* r) {
   1278   uint32 y1 = (uint32)(y * 0x0101 * YG) >> 16;
   1279   *b = Clamp((int32)(y1 + YGB) >> 6);
   1280   *g = Clamp((int32)(y1 + YGB) >> 6);
   1281   *r = Clamp((int32)(y1 + YGB) >> 6);
   1282 }
   1283 
   1284 #undef YG
   1285 #undef YGB
   1286 
   1287 #if !defined(LIBYUV_DISABLE_NEON) && \
   1288     (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON))
   1289 // C mimic assembly.
   1290 // TODO(fbarchard): Remove subsampling from Neon.
   1291 void I444ToARGBRow_C(const uint8* src_y,
   1292                      const uint8* src_u,
   1293                      const uint8* src_v,
   1294                      uint8* rgb_buf,
   1295                      const struct YuvConstants* yuvconstants,
   1296                      int width) {
   1297   int x;
   1298   for (x = 0; x < width - 1; x += 2) {
   1299     uint8 u = (src_u[0] + src_u[1] + 1) >> 1;
   1300     uint8 v = (src_v[0] + src_v[1] + 1) >> 1;
   1301     YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2,
   1302              yuvconstants);
   1303     rgb_buf[3] = 255;
   1304     YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6,
   1305              yuvconstants);
   1306     rgb_buf[7] = 255;
   1307     src_y += 2;
   1308     src_u += 2;
   1309     src_v += 2;
   1310     rgb_buf += 8;  // Advance 2 pixels.
   1311   }
   1312   if (width & 1) {
   1313     YuvPixel(src_y[0], src_u[0], src_v[0],
   1314              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
   1315     rgb_buf[3] = 255;
   1316   }
   1317 }
   1318 #else
   1319 void I444ToARGBRow_C(const uint8* src_y,
   1320                      const uint8* src_u,
   1321                      const uint8* src_v,
   1322                      uint8* rgb_buf,
   1323                      const struct YuvConstants* yuvconstants,
   1324                      int width) {
   1325   int x;
   1326   for (x = 0; x < width; ++x) {
   1327     YuvPixel(src_y[0], src_u[0], src_v[0],
   1328              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
   1329     rgb_buf[3] = 255;
   1330     src_y += 1;
   1331     src_u += 1;
   1332     src_v += 1;
   1333     rgb_buf += 4;  // Advance 1 pixel.
   1334   }
   1335 }
   1336 #endif
   1337 
   1338 // Also used for 420
   1339 void I422ToARGBRow_C(const uint8* src_y,
   1340                      const uint8* src_u,
   1341                      const uint8* src_v,
   1342                      uint8* rgb_buf,
   1343                      const struct YuvConstants* yuvconstants,
   1344                      int width) {
   1345   int x;
   1346   for (x = 0; x < width - 1; x += 2) {
   1347     YuvPixel(src_y[0], src_u[0], src_v[0],
   1348              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
   1349     rgb_buf[3] = 255;
   1350     YuvPixel(src_y[1], src_u[0], src_v[0],
   1351              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
   1352     rgb_buf[7] = 255;
   1353     src_y += 2;
   1354     src_u += 1;
   1355     src_v += 1;
   1356     rgb_buf += 8;  // Advance 2 pixels.
   1357   }
   1358   if (width & 1) {
   1359     YuvPixel(src_y[0], src_u[0], src_v[0],
   1360              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
   1361     rgb_buf[3] = 255;
   1362   }
   1363 }
   1364 
   1365 void I422AlphaToARGBRow_C(const uint8* src_y,
   1366                           const uint8* src_u,
   1367                           const uint8* src_v,
   1368                           const uint8* src_a,
   1369                           uint8* rgb_buf,
   1370                           const struct YuvConstants* yuvconstants,
   1371                           int width) {
   1372   int x;
   1373   for (x = 0; x < width - 1; x += 2) {
   1374     YuvPixel(src_y[0], src_u[0], src_v[0],
   1375              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
   1376     rgb_buf[3] = src_a[0];
   1377     YuvPixel(src_y[1], src_u[0], src_v[0],
   1378              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
   1379     rgb_buf[7] = src_a[1];
   1380     src_y += 2;
   1381     src_u += 1;
   1382     src_v += 1;
   1383     src_a += 2;
   1384     rgb_buf += 8;  // Advance 2 pixels.
   1385   }
   1386   if (width & 1) {
   1387     YuvPixel(src_y[0], src_u[0], src_v[0],
   1388              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
   1389     rgb_buf[3] = src_a[0];
   1390   }
   1391 }
   1392 
   1393 void I422ToRGB24Row_C(const uint8* src_y,
   1394                       const uint8* src_u,
   1395                       const uint8* src_v,
   1396                       uint8* rgb_buf,
   1397                       const struct YuvConstants* yuvconstants,
   1398                       int width) {
   1399   int x;
   1400   for (x = 0; x < width - 1; x += 2) {
   1401     YuvPixel(src_y[0], src_u[0], src_v[0],
   1402              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
   1403     YuvPixel(src_y[1], src_u[0], src_v[0],
   1404              rgb_buf + 3, rgb_buf + 4, rgb_buf + 5, yuvconstants);
   1405     src_y += 2;
   1406     src_u += 1;
   1407     src_v += 1;
   1408     rgb_buf += 6;  // Advance 2 pixels.
   1409   }
   1410   if (width & 1) {
   1411     YuvPixel(src_y[0], src_u[0], src_v[0],
   1412              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
   1413   }
   1414 }
   1415 
   1416 void I422ToARGB4444Row_C(const uint8* src_y,
   1417                          const uint8* src_u,
   1418                          const uint8* src_v,
   1419                          uint8* dst_argb4444,
   1420                          const struct YuvConstants* yuvconstants,
   1421                          int width) {
   1422   uint8 b0;
   1423   uint8 g0;
   1424   uint8 r0;
   1425   uint8 b1;
   1426   uint8 g1;
   1427   uint8 r1;
   1428   int x;
   1429   for (x = 0; x < width - 1; x += 2) {
   1430     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
   1431     YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
   1432     b0 = b0 >> 4;
   1433     g0 = g0 >> 4;
   1434     r0 = r0 >> 4;
   1435     b1 = b1 >> 4;
   1436     g1 = g1 >> 4;
   1437     r1 = r1 >> 4;
   1438     *(uint32*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
   1439         (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000;
   1440     src_y += 2;
   1441     src_u += 1;
   1442     src_v += 1;
   1443     dst_argb4444 += 4;  // Advance 2 pixels.
   1444   }
   1445   if (width & 1) {
   1446     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
   1447     b0 = b0 >> 4;
   1448     g0 = g0 >> 4;
   1449     r0 = r0 >> 4;
   1450     *(uint16*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) |
   1451         0xf000;
   1452   }
   1453 }
   1454 
   1455 void I422ToARGB1555Row_C(const uint8* src_y,
   1456                          const uint8* src_u,
   1457                          const uint8* src_v,
   1458                          uint8* dst_argb1555,
   1459                          const struct YuvConstants* yuvconstants,
   1460                          int width) {
   1461   uint8 b0;
   1462   uint8 g0;
   1463   uint8 r0;
   1464   uint8 b1;
   1465   uint8 g1;
   1466   uint8 r1;
   1467   int x;
   1468   for (x = 0; x < width - 1; x += 2) {
   1469     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
   1470     YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
   1471     b0 = b0 >> 3;
   1472     g0 = g0 >> 3;
   1473     r0 = r0 >> 3;
   1474     b1 = b1 >> 3;
   1475     g1 = g1 >> 3;
   1476     r1 = r1 >> 3;
   1477     *(uint32*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
   1478         (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000;
   1479     src_y += 2;
   1480     src_u += 1;
   1481     src_v += 1;
   1482     dst_argb1555 += 4;  // Advance 2 pixels.
   1483   }
   1484   if (width & 1) {
   1485     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
   1486     b0 = b0 >> 3;
   1487     g0 = g0 >> 3;
   1488     r0 = r0 >> 3;
   1489     *(uint16*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) |
   1490         0x8000;
   1491   }
   1492 }
   1493 
   1494 void I422ToRGB565Row_C(const uint8* src_y,
   1495                        const uint8* src_u,
   1496                        const uint8* src_v,
   1497                        uint8* dst_rgb565,
   1498                        const struct YuvConstants* yuvconstants,
   1499                        int width) {
   1500   uint8 b0;
   1501   uint8 g0;
   1502   uint8 r0;
   1503   uint8 b1;
   1504   uint8 g1;
   1505   uint8 r1;
   1506   int x;
   1507   for (x = 0; x < width - 1; x += 2) {
   1508     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
   1509     YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants);
   1510     b0 = b0 >> 3;
   1511     g0 = g0 >> 2;
   1512     r0 = r0 >> 3;
   1513     b1 = b1 >> 3;
   1514     g1 = g1 >> 2;
   1515     r1 = r1 >> 3;
   1516     *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
   1517         (b1 << 16) | (g1 << 21) | (r1 << 27);
   1518     src_y += 2;
   1519     src_u += 1;
   1520     src_v += 1;
   1521     dst_rgb565 += 4;  // Advance 2 pixels.
   1522   }
   1523   if (width & 1) {
   1524     YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants);
   1525     b0 = b0 >> 3;
   1526     g0 = g0 >> 2;
   1527     r0 = r0 >> 3;
   1528     *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
   1529   }
   1530 }
   1531 
   1532 void I411ToARGBRow_C(const uint8* src_y,
   1533                      const uint8* src_u,
   1534                      const uint8* src_v,
   1535                      uint8* rgb_buf,
   1536                      const struct YuvConstants* yuvconstants,
   1537                      int width) {
   1538   int x;
   1539   for (x = 0; x < width - 3; x += 4) {
   1540     YuvPixel(src_y[0], src_u[0], src_v[0],
   1541              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
   1542     rgb_buf[3] = 255;
   1543     YuvPixel(src_y[1], src_u[0], src_v[0],
   1544              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
   1545     rgb_buf[7] = 255;
   1546     YuvPixel(src_y[2], src_u[0], src_v[0],
   1547              rgb_buf + 8, rgb_buf + 9, rgb_buf + 10, yuvconstants);
   1548     rgb_buf[11] = 255;
   1549     YuvPixel(src_y[3], src_u[0], src_v[0],
   1550              rgb_buf + 12, rgb_buf + 13, rgb_buf + 14, yuvconstants);
   1551     rgb_buf[15] = 255;
   1552     src_y += 4;
   1553     src_u += 1;
   1554     src_v += 1;
   1555     rgb_buf += 16;  // Advance 4 pixels.
   1556   }
   1557   if (width & 2) {
   1558     YuvPixel(src_y[0], src_u[0], src_v[0],
   1559              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
   1560     rgb_buf[3] = 255;
   1561     YuvPixel(src_y[1], src_u[0], src_v[0],
   1562              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
   1563     rgb_buf[7] = 255;
   1564     src_y += 2;
   1565     rgb_buf += 8;  // Advance 2 pixels.
   1566   }
   1567   if (width & 1) {
   1568     YuvPixel(src_y[0], src_u[0], src_v[0],
   1569              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
   1570     rgb_buf[3] = 255;
   1571   }
   1572 }
   1573 
   1574 void NV12ToARGBRow_C(const uint8* src_y,
   1575                      const uint8* src_uv,
   1576                      uint8* rgb_buf,
   1577                      const struct YuvConstants* yuvconstants,
   1578                      int width) {
   1579   int x;
   1580   for (x = 0; x < width - 1; x += 2) {
   1581     YuvPixel(src_y[0], src_uv[0], src_uv[1],
   1582              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
   1583     rgb_buf[3] = 255;
   1584     YuvPixel(src_y[1], src_uv[0], src_uv[1],
   1585              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
   1586     rgb_buf[7] = 255;
   1587     src_y += 2;
   1588     src_uv += 2;
   1589     rgb_buf += 8;  // Advance 2 pixels.
   1590   }
   1591   if (width & 1) {
   1592     YuvPixel(src_y[0], src_uv[0], src_uv[1],
   1593              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
   1594     rgb_buf[3] = 255;
   1595   }
   1596 }
   1597 
   1598 void NV21ToARGBRow_C(const uint8* src_y,
   1599                      const uint8* src_vu,
   1600                      uint8* rgb_buf,
   1601                      const struct YuvConstants* yuvconstants,
   1602                      int width) {
   1603   int x;
   1604   for (x = 0; x < width - 1; x += 2) {
   1605     YuvPixel(src_y[0], src_vu[1], src_vu[0],
   1606              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
   1607     rgb_buf[3] = 255;
   1608     YuvPixel(src_y[1], src_vu[1], src_vu[0],
   1609              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
   1610     rgb_buf[7] = 255;
   1611     src_y += 2;
   1612     src_vu += 2;
   1613     rgb_buf += 8;  // Advance 2 pixels.
   1614   }
   1615   if (width & 1) {
   1616     YuvPixel(src_y[0], src_vu[1], src_vu[0],
   1617              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
   1618     rgb_buf[3] = 255;
   1619   }
   1620 }
   1621 
   1622 void NV12ToRGB565Row_C(const uint8* src_y,
   1623                        const uint8* src_uv,
   1624                        uint8* dst_rgb565,
   1625                        const struct YuvConstants* yuvconstants,
   1626                        int width) {
   1627   uint8 b0;
   1628   uint8 g0;
   1629   uint8 r0;
   1630   uint8 b1;
   1631   uint8 g1;
   1632   uint8 r1;
   1633   int x;
   1634   for (x = 0; x < width - 1; x += 2) {
   1635     YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
   1636     YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants);
   1637     b0 = b0 >> 3;
   1638     g0 = g0 >> 2;
   1639     r0 = r0 >> 3;
   1640     b1 = b1 >> 3;
   1641     g1 = g1 >> 2;
   1642     r1 = r1 >> 3;
   1643     *(uint32*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) |
   1644         (b1 << 16) | (g1 << 21) | (r1 << 27);
   1645     src_y += 2;
   1646     src_uv += 2;
   1647     dst_rgb565 += 4;  // Advance 2 pixels.
   1648   }
   1649   if (width & 1) {
   1650     YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants);
   1651     b0 = b0 >> 3;
   1652     g0 = g0 >> 2;
   1653     r0 = r0 >> 3;
   1654     *(uint16*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11);
   1655   }
   1656 }
   1657 
   1658 void YUY2ToARGBRow_C(const uint8* src_yuy2,
   1659                      uint8* rgb_buf,
   1660                      const struct YuvConstants* yuvconstants,
   1661                      int width) {
   1662   int x;
   1663   for (x = 0; x < width - 1; x += 2) {
   1664     YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
   1665              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
   1666     rgb_buf[3] = 255;
   1667     YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3],
   1668              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
   1669     rgb_buf[7] = 255;
   1670     src_yuy2 += 4;
   1671     rgb_buf += 8;  // Advance 2 pixels.
   1672   }
   1673   if (width & 1) {
   1674     YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3],
   1675              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
   1676     rgb_buf[3] = 255;
   1677   }
   1678 }
   1679 
   1680 void UYVYToARGBRow_C(const uint8* src_uyvy,
   1681                      uint8* rgb_buf,
   1682                      const struct YuvConstants* yuvconstants,
   1683                      int width) {
   1684   int x;
   1685   for (x = 0; x < width - 1; x += 2) {
   1686     YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
   1687              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
   1688     rgb_buf[3] = 255;
   1689     YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2],
   1690              rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants);
   1691     rgb_buf[7] = 255;
   1692     src_uyvy += 4;
   1693     rgb_buf += 8;  // Advance 2 pixels.
   1694   }
   1695   if (width & 1) {
   1696     YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2],
   1697              rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants);
   1698     rgb_buf[3] = 255;
   1699   }
   1700 }
   1701 
   1702 void I422ToRGBARow_C(const uint8* src_y,
   1703                      const uint8* src_u,
   1704                      const uint8* src_v,
   1705                      uint8* rgb_buf,
   1706                      const struct YuvConstants* yuvconstants,
   1707                      int width) {
   1708   int x;
   1709   for (x = 0; x < width - 1; x += 2) {
   1710     YuvPixel(src_y[0], src_u[0], src_v[0],
   1711              rgb_buf + 1, rgb_buf + 2, rgb_buf + 3, yuvconstants);
   1712     rgb_buf[0] = 255;
   1713     YuvPixel(src_y[1], src_u[0], src_v[0],
   1714              rgb_buf + 5, rgb_buf + 6, rgb_buf + 7, yuvconstants);
   1715     rgb_buf[4] = 255;
   1716     src_y += 2;
   1717     src_u += 1;
   1718     src_v += 1;
   1719     rgb_buf += 8;  // Advance 2 pixels.
   1720   }
   1721   if (width & 1) {
   1722     YuvPixel(src_y[0], src_u[0], src_v[0],
   1723              rgb_buf + 1, rgb_buf + 2, rgb_buf + 3, yuvconstants);
   1724     rgb_buf[0] = 255;
   1725   }
   1726 }
   1727 
   1728 void I400ToARGBRow_C(const uint8* src_y, uint8* rgb_buf, int width) {
   1729   int x;
   1730   for (x = 0; x < width - 1; x += 2) {
   1731     YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
   1732     rgb_buf[3] = 255;
   1733     YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6);
   1734     rgb_buf[7] = 255;
   1735     src_y += 2;
   1736     rgb_buf += 8;  // Advance 2 pixels.
   1737   }
   1738   if (width & 1) {
   1739     YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2);
   1740     rgb_buf[3] = 255;
   1741   }
   1742 }
   1743 
   1744 void MirrorRow_C(const uint8* src, uint8* dst, int width) {
   1745   int x;
   1746   src += width - 1;
   1747   for (x = 0; x < width - 1; x += 2) {
   1748     dst[x] = src[0];
   1749     dst[x + 1] = src[-1];
   1750     src -= 2;
   1751   }
   1752   if (width & 1) {
   1753     dst[width - 1] = src[0];
   1754   }
   1755 }
   1756 
   1757 void MirrorUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
   1758   int x;
   1759   src_uv += (width - 1) << 1;
   1760   for (x = 0; x < width - 1; x += 2) {
   1761     dst_u[x] = src_uv[0];
   1762     dst_u[x + 1] = src_uv[-2];
   1763     dst_v[x] = src_uv[1];
   1764     dst_v[x + 1] = src_uv[-2 + 1];
   1765     src_uv -= 4;
   1766   }
   1767   if (width & 1) {
   1768     dst_u[width - 1] = src_uv[0];
   1769     dst_v[width - 1] = src_uv[1];
   1770   }
   1771 }
   1772 
   1773 void ARGBMirrorRow_C(const uint8* src, uint8* dst, int width) {
   1774   int x;
   1775   const uint32* src32 = (const uint32*)(src);
   1776   uint32* dst32 = (uint32*)(dst);
   1777   src32 += width - 1;
   1778   for (x = 0; x < width - 1; x += 2) {
   1779     dst32[x] = src32[0];
   1780     dst32[x + 1] = src32[-1];
   1781     src32 -= 2;
   1782   }
   1783   if (width & 1) {
   1784     dst32[width - 1] = src32[0];
   1785   }
   1786 }
   1787 
   1788 void SplitUVRow_C(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int width) {
   1789   int x;
   1790   for (x = 0; x < width - 1; x += 2) {
   1791     dst_u[x] = src_uv[0];
   1792     dst_u[x + 1] = src_uv[2];
   1793     dst_v[x] = src_uv[1];
   1794     dst_v[x + 1] = src_uv[3];
   1795     src_uv += 4;
   1796   }
   1797   if (width & 1) {
   1798     dst_u[width - 1] = src_uv[0];
   1799     dst_v[width - 1] = src_uv[1];
   1800   }
   1801 }
   1802 
   1803 void MergeUVRow_C(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
   1804                   int width) {
   1805   int x;
   1806   for (x = 0; x < width - 1; x += 2) {
   1807     dst_uv[0] = src_u[x];
   1808     dst_uv[1] = src_v[x];
   1809     dst_uv[2] = src_u[x + 1];
   1810     dst_uv[3] = src_v[x + 1];
   1811     dst_uv += 4;
   1812   }
   1813   if (width & 1) {
   1814     dst_uv[0] = src_u[width - 1];
   1815     dst_uv[1] = src_v[width - 1];
   1816   }
   1817 }
   1818 
   1819 void CopyRow_C(const uint8* src, uint8* dst, int count) {
   1820   memcpy(dst, src, count);
   1821 }
   1822 
   1823 void CopyRow_16_C(const uint16* src, uint16* dst, int count) {
   1824   memcpy(dst, src, count * 2);
   1825 }
   1826 
   1827 void SetRow_C(uint8* dst, uint8 v8, int width) {
   1828   memset(dst, v8, width);
   1829 }
   1830 
   1831 void ARGBSetRow_C(uint8* dst_argb, uint32 v32, int width) {
   1832   uint32* d = (uint32*)(dst_argb);
   1833   int x;
   1834   for (x = 0; x < width; ++x) {
   1835     d[x] = v32;
   1836   }
   1837 }
   1838 
   1839 // Filter 2 rows of YUY2 UV's (422) into U and V (420).
   1840 void YUY2ToUVRow_C(const uint8* src_yuy2, int src_stride_yuy2,
   1841                    uint8* dst_u, uint8* dst_v, int width) {
   1842   // Output a row of UV values, filtering 2 rows of YUY2.
   1843   int x;
   1844   for (x = 0; x < width; x += 2) {
   1845     dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1;
   1846     dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1;
   1847     src_yuy2 += 4;
   1848     dst_u += 1;
   1849     dst_v += 1;
   1850   }
   1851 }
   1852 
   1853 // Copy row of YUY2 UV's (422) into U and V (422).
   1854 void YUY2ToUV422Row_C(const uint8* src_yuy2,
   1855                       uint8* dst_u, uint8* dst_v, int width) {
   1856   // Output a row of UV values.
   1857   int x;
   1858   for (x = 0; x < width; x += 2) {
   1859     dst_u[0] = src_yuy2[1];
   1860     dst_v[0] = src_yuy2[3];
   1861     src_yuy2 += 4;
   1862     dst_u += 1;
   1863     dst_v += 1;
   1864   }
   1865 }
   1866 
   1867 // Copy row of YUY2 Y's (422) into Y (420/422).
   1868 void YUY2ToYRow_C(const uint8* src_yuy2, uint8* dst_y, int width) {
   1869   // Output a row of Y values.
   1870   int x;
   1871   for (x = 0; x < width - 1; x += 2) {
   1872     dst_y[x] = src_yuy2[0];
   1873     dst_y[x + 1] = src_yuy2[2];
   1874     src_yuy2 += 4;
   1875   }
   1876   if (width & 1) {
   1877     dst_y[width - 1] = src_yuy2[0];
   1878   }
   1879 }
   1880 
   1881 // Filter 2 rows of UYVY UV's (422) into U and V (420).
   1882 void UYVYToUVRow_C(const uint8* src_uyvy, int src_stride_uyvy,
   1883                    uint8* dst_u, uint8* dst_v, int width) {
   1884   // Output a row of UV values.
   1885   int x;
   1886   for (x = 0; x < width; x += 2) {
   1887     dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1;
   1888     dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1;
   1889     src_uyvy += 4;
   1890     dst_u += 1;
   1891     dst_v += 1;
   1892   }
   1893 }
   1894 
   1895 // Copy row of UYVY UV's (422) into U and V (422).
   1896 void UYVYToUV422Row_C(const uint8* src_uyvy,
   1897                       uint8* dst_u, uint8* dst_v, int width) {
   1898   // Output a row of UV values.
   1899   int x;
   1900   for (x = 0; x < width; x += 2) {
   1901     dst_u[0] = src_uyvy[0];
   1902     dst_v[0] = src_uyvy[2];
   1903     src_uyvy += 4;
   1904     dst_u += 1;
   1905     dst_v += 1;
   1906   }
   1907 }
   1908 
   1909 // Copy row of UYVY Y's (422) into Y (420/422).
   1910 void UYVYToYRow_C(const uint8* src_uyvy, uint8* dst_y, int width) {
   1911   // Output a row of Y values.
   1912   int x;
   1913   for (x = 0; x < width - 1; x += 2) {
   1914     dst_y[x] = src_uyvy[1];
   1915     dst_y[x + 1] = src_uyvy[3];
   1916     src_uyvy += 4;
   1917   }
   1918   if (width & 1) {
   1919     dst_y[width - 1] = src_uyvy[1];
   1920   }
   1921 }
   1922 
   1923 #define BLEND(f, b, a) (((256 - a) * b) >> 8) + f
   1924 
   1925 // Blend src_argb0 over src_argb1 and store to dst_argb.
   1926 // dst_argb may be src_argb0 or src_argb1.
   1927 // This code mimics the SSSE3 version for better testability.
   1928 void ARGBBlendRow_C(const uint8* src_argb0, const uint8* src_argb1,
   1929                     uint8* dst_argb, int width) {
   1930   int x;
   1931   for (x = 0; x < width - 1; x += 2) {
   1932     uint32 fb = src_argb0[0];
   1933     uint32 fg = src_argb0[1];
   1934     uint32 fr = src_argb0[2];
   1935     uint32 a = src_argb0[3];
   1936     uint32 bb = src_argb1[0];
   1937     uint32 bg = src_argb1[1];
   1938     uint32 br = src_argb1[2];
   1939     dst_argb[0] = BLEND(fb, bb, a);
   1940     dst_argb[1] = BLEND(fg, bg, a);
   1941     dst_argb[2] = BLEND(fr, br, a);
   1942     dst_argb[3] = 255u;
   1943 
   1944     fb = src_argb0[4 + 0];
   1945     fg = src_argb0[4 + 1];
   1946     fr = src_argb0[4 + 2];
   1947     a = src_argb0[4 + 3];
   1948     bb = src_argb1[4 + 0];
   1949     bg = src_argb1[4 + 1];
   1950     br = src_argb1[4 + 2];
   1951     dst_argb[4 + 0] = BLEND(fb, bb, a);
   1952     dst_argb[4 + 1] = BLEND(fg, bg, a);
   1953     dst_argb[4 + 2] = BLEND(fr, br, a);
   1954     dst_argb[4 + 3] = 255u;
   1955     src_argb0 += 8;
   1956     src_argb1 += 8;
   1957     dst_argb += 8;
   1958   }
   1959 
   1960   if (width & 1) {
   1961     uint32 fb = src_argb0[0];
   1962     uint32 fg = src_argb0[1];
   1963     uint32 fr = src_argb0[2];
   1964     uint32 a = src_argb0[3];
   1965     uint32 bb = src_argb1[0];
   1966     uint32 bg = src_argb1[1];
   1967     uint32 br = src_argb1[2];
   1968     dst_argb[0] = BLEND(fb, bb, a);
   1969     dst_argb[1] = BLEND(fg, bg, a);
   1970     dst_argb[2] = BLEND(fr, br, a);
   1971     dst_argb[3] = 255u;
   1972   }
   1973 }
   1974 #undef BLEND
   1975 
   1976 #define UBLEND(f, b, a) (((a) * f) + ((255 - a) * b) + 255) >> 8
   1977 void BlendPlaneRow_C(const uint8* src0, const uint8* src1,
   1978                      const uint8* alpha, uint8* dst, int width) {
   1979   int x;
   1980   for (x = 0; x < width - 1; x += 2) {
   1981     dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
   1982     dst[1] = UBLEND(src0[1], src1[1], alpha[1]);
   1983     src0 += 2;
   1984     src1 += 2;
   1985     alpha += 2;
   1986     dst += 2;
   1987   }
   1988   if (width & 1) {
   1989     dst[0] = UBLEND(src0[0], src1[0], alpha[0]);
   1990   }
   1991 }
   1992 #undef UBLEND
   1993 
   1994 #define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24
   1995 
   1996 // Multiply source RGB by alpha and store to destination.
   1997 // This code mimics the SSSE3 version for better testability.
   1998 void ARGBAttenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
   1999   int i;
   2000   for (i = 0; i < width - 1; i += 2) {
   2001     uint32 b = src_argb[0];
   2002     uint32 g = src_argb[1];
   2003     uint32 r = src_argb[2];
   2004     uint32 a = src_argb[3];
   2005     dst_argb[0] = ATTENUATE(b, a);
   2006     dst_argb[1] = ATTENUATE(g, a);
   2007     dst_argb[2] = ATTENUATE(r, a);
   2008     dst_argb[3] = a;
   2009     b = src_argb[4];
   2010     g = src_argb[5];
   2011     r = src_argb[6];
   2012     a = src_argb[7];
   2013     dst_argb[4] = ATTENUATE(b, a);
   2014     dst_argb[5] = ATTENUATE(g, a);
   2015     dst_argb[6] = ATTENUATE(r, a);
   2016     dst_argb[7] = a;
   2017     src_argb += 8;
   2018     dst_argb += 8;
   2019   }
   2020 
   2021   if (width & 1) {
   2022     const uint32 b = src_argb[0];
   2023     const uint32 g = src_argb[1];
   2024     const uint32 r = src_argb[2];
   2025     const uint32 a = src_argb[3];
   2026     dst_argb[0] = ATTENUATE(b, a);
   2027     dst_argb[1] = ATTENUATE(g, a);
   2028     dst_argb[2] = ATTENUATE(r, a);
   2029     dst_argb[3] = a;
   2030   }
   2031 }
   2032 #undef ATTENUATE
   2033 
   2034 // Divide source RGB by alpha and store to destination.
   2035 // b = (b * 255 + (a / 2)) / a;
   2036 // g = (g * 255 + (a / 2)) / a;
   2037 // r = (r * 255 + (a / 2)) / a;
   2038 // Reciprocal method is off by 1 on some values. ie 125
   2039 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower.
   2040 #define T(a) 0x01000000 + (0x10000 / a)
   2041 const uint32 fixed_invtbl8[256] = {
   2042   0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07),
   2043   T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f),
   2044   T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17),
   2045   T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f),
   2046   T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27),
   2047   T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f),
   2048   T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37),
   2049   T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f),
   2050   T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47),
   2051   T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f),
   2052   T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57),
   2053   T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f),
   2054   T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67),
   2055   T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f),
   2056   T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77),
   2057   T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f),
   2058   T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87),
   2059   T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f),
   2060   T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97),
   2061   T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f),
   2062   T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7),
   2063   T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf),
   2064   T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7),
   2065   T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf),
   2066   T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7),
   2067   T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf),
   2068   T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7),
   2069   T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf),
   2070   T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7),
   2071   T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef),
   2072   T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7),
   2073   T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x01000100 };
   2074 #undef T
   2075 
   2076 void ARGBUnattenuateRow_C(const uint8* src_argb, uint8* dst_argb, int width) {
   2077   int i;
   2078   for (i = 0; i < width; ++i) {
   2079     uint32 b = src_argb[0];
   2080     uint32 g = src_argb[1];
   2081     uint32 r = src_argb[2];
   2082     const uint32 a = src_argb[3];
   2083     const uint32 ia = fixed_invtbl8[a] & 0xffff;  // 8.8 fixed point
   2084     b = (b * ia) >> 8;
   2085     g = (g * ia) >> 8;
   2086     r = (r * ia) >> 8;
   2087     // Clamping should not be necessary but is free in assembly.
   2088     dst_argb[0] = clamp255(b);
   2089     dst_argb[1] = clamp255(g);
   2090     dst_argb[2] = clamp255(r);
   2091     dst_argb[3] = a;
   2092     src_argb += 4;
   2093     dst_argb += 4;
   2094   }
   2095 }
   2096 
   2097 void ComputeCumulativeSumRow_C(const uint8* row, int32* cumsum,
   2098                                const int32* previous_cumsum, int width) {
   2099   int32 row_sum[4] = {0, 0, 0, 0};
   2100   int x;
   2101   for (x = 0; x < width; ++x) {
   2102     row_sum[0] += row[x * 4 + 0];
   2103     row_sum[1] += row[x * 4 + 1];
   2104     row_sum[2] += row[x * 4 + 2];
   2105     row_sum[3] += row[x * 4 + 3];
   2106     cumsum[x * 4 + 0] = row_sum[0]  + previous_cumsum[x * 4 + 0];
   2107     cumsum[x * 4 + 1] = row_sum[1]  + previous_cumsum[x * 4 + 1];
   2108     cumsum[x * 4 + 2] = row_sum[2]  + previous_cumsum[x * 4 + 2];
   2109     cumsum[x * 4 + 3] = row_sum[3]  + previous_cumsum[x * 4 + 3];
   2110   }
   2111 }
   2112 
   2113 void CumulativeSumToAverageRow_C(const int32* tl, const int32* bl,
   2114                                 int w, int area, uint8* dst, int count) {
   2115   float ooa = 1.0f / area;
   2116   int i;
   2117   for (i = 0; i < count; ++i) {
   2118     dst[0] = (uint8)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa);
   2119     dst[1] = (uint8)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa);
   2120     dst[2] = (uint8)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa);
   2121     dst[3] = (uint8)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa);
   2122     dst += 4;
   2123     tl += 4;
   2124     bl += 4;
   2125   }
   2126 }
   2127 
   2128 // Copy pixels from rotated source to destination row with a slope.
   2129 LIBYUV_API
   2130 void ARGBAffineRow_C(const uint8* src_argb, int src_argb_stride,
   2131                      uint8* dst_argb, const float* uv_dudv, int width) {
   2132   int i;
   2133   // Render a row of pixels from source into a buffer.
   2134   float uv[2];
   2135   uv[0] = uv_dudv[0];
   2136   uv[1] = uv_dudv[1];
   2137   for (i = 0; i < width; ++i) {
   2138     int x = (int)(uv[0]);
   2139     int y = (int)(uv[1]);
   2140     *(uint32*)(dst_argb) =
   2141         *(const uint32*)(src_argb + y * src_argb_stride +
   2142                                          x * 4);
   2143     dst_argb += 4;
   2144     uv[0] += uv_dudv[2];
   2145     uv[1] += uv_dudv[3];
   2146   }
   2147 }
   2148 
   2149 // Blend 2 rows into 1.
   2150 static void HalfRow_C(const uint8* src_uv, ptrdiff_t src_uv_stride,
   2151                       uint8* dst_uv, int width) {
   2152   int x;
   2153   for (x = 0; x < width; ++x) {
   2154     dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
   2155   }
   2156 }
   2157 
   2158 static void HalfRow_16_C(const uint16* src_uv, ptrdiff_t src_uv_stride,
   2159                          uint16* dst_uv, int width) {
   2160   int x;
   2161   for (x = 0; x < width; ++x) {
   2162     dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1;
   2163   }
   2164 }
   2165 
   2166 // C version 2x2 -> 2x1.
   2167 void InterpolateRow_C(uint8* dst_ptr, const uint8* src_ptr,
   2168                       ptrdiff_t src_stride,
   2169                       int width, int source_y_fraction) {
   2170   int y1_fraction = source_y_fraction ;
   2171   int y0_fraction = 256 - y1_fraction;
   2172   const uint8* src_ptr1 = src_ptr + src_stride;
   2173   int x;
   2174   if (y1_fraction == 0) {
   2175     memcpy(dst_ptr, src_ptr, width);
   2176     return;
   2177   }
   2178   if (y1_fraction == 128) {
   2179     HalfRow_C(src_ptr, src_stride, dst_ptr, width);
   2180     return;
   2181   }
   2182   for (x = 0; x < width - 1; x += 2) {
   2183     dst_ptr[0] =
   2184         (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
   2185     dst_ptr[1] =
   2186         (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction + 128) >> 8;
   2187     src_ptr += 2;
   2188     src_ptr1 += 2;
   2189     dst_ptr += 2;
   2190   }
   2191   if (width & 1) {
   2192     dst_ptr[0] =
   2193         (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
   2194   }
   2195 }
   2196 
   2197 void InterpolateRow_16_C(uint16* dst_ptr, const uint16* src_ptr,
   2198                          ptrdiff_t src_stride,
   2199                          int width, int source_y_fraction) {
   2200   int y1_fraction = source_y_fraction;
   2201   int y0_fraction = 256 - y1_fraction;
   2202   const uint16* src_ptr1 = src_ptr + src_stride;
   2203   int x;
   2204   if (source_y_fraction == 0) {
   2205     memcpy(dst_ptr, src_ptr, width * 2);
   2206     return;
   2207   }
   2208   if (source_y_fraction == 128) {
   2209     HalfRow_16_C(src_ptr, src_stride, dst_ptr, width);
   2210     return;
   2211   }
   2212   for (x = 0; x < width - 1; x += 2) {
   2213     dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
   2214     dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
   2215     src_ptr += 2;
   2216     src_ptr1 += 2;
   2217     dst_ptr += 2;
   2218   }
   2219   if (width & 1) {
   2220     dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
   2221   }
   2222 }
   2223 
   2224 // Use first 4 shuffler values to reorder ARGB channels.
   2225 void ARGBShuffleRow_C(const uint8* src_argb, uint8* dst_argb,
   2226                       const uint8* shuffler, int width) {
   2227   int index0 = shuffler[0];
   2228   int index1 = shuffler[1];
   2229   int index2 = shuffler[2];
   2230   int index3 = shuffler[3];
   2231   // Shuffle a row of ARGB.
   2232   int x;
   2233   for (x = 0; x < width; ++x) {
   2234     // To support in-place conversion.
   2235     uint8 b = src_argb[index0];
   2236     uint8 g = src_argb[index1];
   2237     uint8 r = src_argb[index2];
   2238     uint8 a = src_argb[index3];
   2239     dst_argb[0] = b;
   2240     dst_argb[1] = g;
   2241     dst_argb[2] = r;
   2242     dst_argb[3] = a;
   2243     src_argb += 4;
   2244     dst_argb += 4;
   2245   }
   2246 }
   2247 
   2248 void I422ToYUY2Row_C(const uint8* src_y,
   2249                      const uint8* src_u,
   2250                      const uint8* src_v,
   2251                      uint8* dst_frame, int width) {
   2252   int x;
   2253   for (x = 0; x < width - 1; x += 2) {
   2254     dst_frame[0] = src_y[0];
   2255     dst_frame[1] = src_u[0];
   2256     dst_frame[2] = src_y[1];
   2257     dst_frame[3] = src_v[0];
   2258     dst_frame += 4;
   2259     src_y += 2;
   2260     src_u += 1;
   2261     src_v += 1;
   2262   }
   2263   if (width & 1) {
   2264     dst_frame[0] = src_y[0];
   2265     dst_frame[1] = src_u[0];
   2266     dst_frame[2] = 0;
   2267     dst_frame[3] = src_v[0];
   2268   }
   2269 }
   2270 
   2271 void I422ToUYVYRow_C(const uint8* src_y,
   2272                      const uint8* src_u,
   2273                      const uint8* src_v,
   2274                      uint8* dst_frame, int width) {
   2275   int x;
   2276   for (x = 0; x < width - 1; x += 2) {
   2277     dst_frame[0] = src_u[0];
   2278     dst_frame[1] = src_y[0];
   2279     dst_frame[2] = src_v[0];
   2280     dst_frame[3] = src_y[1];
   2281     dst_frame += 4;
   2282     src_y += 2;
   2283     src_u += 1;
   2284     src_v += 1;
   2285   }
   2286   if (width & 1) {
   2287     dst_frame[0] = src_u[0];
   2288     dst_frame[1] = src_y[0];
   2289     dst_frame[2] = src_v[0];
   2290     dst_frame[3] = 0;
   2291   }
   2292 }
   2293 
   2294 
   2295 void ARGBPolynomialRow_C(const uint8* src_argb,
   2296                          uint8* dst_argb,
   2297                          const float* poly,
   2298                          int width) {
   2299   int i;
   2300   for (i = 0; i < width; ++i) {
   2301     float b = (float)(src_argb[0]);
   2302     float g = (float)(src_argb[1]);
   2303     float r = (float)(src_argb[2]);
   2304     float a = (float)(src_argb[3]);
   2305     float b2 = b * b;
   2306     float g2 = g * g;
   2307     float r2 = r * r;
   2308     float a2 = a * a;
   2309     float db = poly[0] + poly[4] * b;
   2310     float dg = poly[1] + poly[5] * g;
   2311     float dr = poly[2] + poly[6] * r;
   2312     float da = poly[3] + poly[7] * a;
   2313     float b3 = b2 * b;
   2314     float g3 = g2 * g;
   2315     float r3 = r2 * r;
   2316     float a3 = a2 * a;
   2317     db += poly[8] * b2;
   2318     dg += poly[9] * g2;
   2319     dr += poly[10] * r2;
   2320     da += poly[11] * a2;
   2321     db += poly[12] * b3;
   2322     dg += poly[13] * g3;
   2323     dr += poly[14] * r3;
   2324     da += poly[15] * a3;
   2325 
   2326     dst_argb[0] = Clamp((int32)(db));
   2327     dst_argb[1] = Clamp((int32)(dg));
   2328     dst_argb[2] = Clamp((int32)(dr));
   2329     dst_argb[3] = Clamp((int32)(da));
   2330     src_argb += 4;
   2331     dst_argb += 4;
   2332   }
   2333 }
   2334 
   2335 void ARGBLumaColorTableRow_C(const uint8* src_argb, uint8* dst_argb, int width,
   2336                              const uint8* luma, uint32 lumacoeff) {
   2337   uint32 bc = lumacoeff & 0xff;
   2338   uint32 gc = (lumacoeff >> 8) & 0xff;
   2339   uint32 rc = (lumacoeff >> 16) & 0xff;
   2340 
   2341   int i;
   2342   for (i = 0; i < width - 1; i += 2) {
   2343     // Luminance in rows, color values in columns.
   2344     const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
   2345                            src_argb[2] * rc) & 0x7F00u) + luma;
   2346     const uint8* luma1;
   2347     dst_argb[0] = luma0[src_argb[0]];
   2348     dst_argb[1] = luma0[src_argb[1]];
   2349     dst_argb[2] = luma0[src_argb[2]];
   2350     dst_argb[3] = src_argb[3];
   2351     luma1 = ((src_argb[4] * bc + src_argb[5] * gc +
   2352               src_argb[6] * rc) & 0x7F00u) + luma;
   2353     dst_argb[4] = luma1[src_argb[4]];
   2354     dst_argb[5] = luma1[src_argb[5]];
   2355     dst_argb[6] = luma1[src_argb[6]];
   2356     dst_argb[7] = src_argb[7];
   2357     src_argb += 8;
   2358     dst_argb += 8;
   2359   }
   2360   if (width & 1) {
   2361     // Luminance in rows, color values in columns.
   2362     const uint8* luma0 = ((src_argb[0] * bc + src_argb[1] * gc +
   2363                            src_argb[2] * rc) & 0x7F00u) + luma;
   2364     dst_argb[0] = luma0[src_argb[0]];
   2365     dst_argb[1] = luma0[src_argb[1]];
   2366     dst_argb[2] = luma0[src_argb[2]];
   2367     dst_argb[3] = src_argb[3];
   2368   }
   2369 }
   2370 
   2371 void ARGBCopyAlphaRow_C(const uint8* src, uint8* dst, int width) {
   2372   int i;
   2373   for (i = 0; i < width - 1; i += 2) {
   2374     dst[3] = src[3];
   2375     dst[7] = src[7];
   2376     dst += 8;
   2377     src += 8;
   2378   }
   2379   if (width & 1) {
   2380     dst[3] = src[3];
   2381   }
   2382 }
   2383 
   2384 void ARGBExtractAlphaRow_C(const uint8* src_argb, uint8* dst_a, int width) {
   2385   int i;
   2386   for (i = 0; i < width - 1; i += 2) {
   2387     dst_a[0] = src_argb[3];
   2388     dst_a[1] = src_argb[7];
   2389     dst_a += 2;
   2390     src_argb += 8;
   2391   }
   2392   if (width & 1) {
   2393     dst_a[0] = src_argb[3];
   2394   }
   2395 }
   2396 
   2397 void ARGBCopyYToAlphaRow_C(const uint8* src, uint8* dst, int width) {
   2398   int i;
   2399   for (i = 0; i < width - 1; i += 2) {
   2400     dst[3] = src[0];
   2401     dst[7] = src[1];
   2402     dst += 8;
   2403     src += 2;
   2404   }
   2405   if (width & 1) {
   2406     dst[3] = src[0];
   2407   }
   2408 }
   2409 
   2410 // Maximum temporary width for wrappers to process at a time, in pixels.
   2411 #define MAXTWIDTH 2048
   2412 
   2413 #if !(defined(_MSC_VER) && defined(_M_IX86)) && \
   2414     defined(HAS_I422TORGB565ROW_SSSE3)
   2415 // row_win.cc has asm version, but GCC uses 2 step wrapper.
   2416 void I422ToRGB565Row_SSSE3(const uint8* src_y,
   2417                            const uint8* src_u,
   2418                            const uint8* src_v,
   2419                            uint8* dst_rgb565,
   2420                            const struct YuvConstants* yuvconstants,
   2421                            int width) {
   2422   SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
   2423   while (width > 0) {
   2424     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
   2425     I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
   2426     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
   2427     src_y += twidth;
   2428     src_u += twidth / 2;
   2429     src_v += twidth / 2;
   2430     dst_rgb565 += twidth * 2;
   2431     width -= twidth;
   2432   }
   2433 }
   2434 #endif
   2435 
   2436 #if defined(HAS_I422TOARGB1555ROW_SSSE3)
   2437 void I422ToARGB1555Row_SSSE3(const uint8* src_y,
   2438                              const uint8* src_u,
   2439                              const uint8* src_v,
   2440                              uint8* dst_argb1555,
   2441                              const struct YuvConstants* yuvconstants,
   2442                              int width) {
   2443   // Row buffer for intermediate ARGB pixels.
   2444   SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
   2445   while (width > 0) {
   2446     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
   2447     I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
   2448     ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
   2449     src_y += twidth;
   2450     src_u += twidth / 2;
   2451     src_v += twidth / 2;
   2452     dst_argb1555 += twidth * 2;
   2453     width -= twidth;
   2454   }
   2455 }
   2456 #endif
   2457 
   2458 #if defined(HAS_I422TOARGB4444ROW_SSSE3)
   2459 void I422ToARGB4444Row_SSSE3(const uint8* src_y,
   2460                              const uint8* src_u,
   2461                              const uint8* src_v,
   2462                              uint8* dst_argb4444,
   2463                              const struct YuvConstants* yuvconstants,
   2464                              int width) {
   2465   // Row buffer for intermediate ARGB pixels.
   2466   SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
   2467   while (width > 0) {
   2468     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
   2469     I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth);
   2470     ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
   2471     src_y += twidth;
   2472     src_u += twidth / 2;
   2473     src_v += twidth / 2;
   2474     dst_argb4444 += twidth * 2;
   2475     width -= twidth;
   2476   }
   2477 }
   2478 #endif
   2479 
   2480 #if defined(HAS_NV12TORGB565ROW_SSSE3)
   2481 void NV12ToRGB565Row_SSSE3(const uint8* src_y,
   2482                            const uint8* src_uv,
   2483                            uint8* dst_rgb565,
   2484                            const struct YuvConstants* yuvconstants,
   2485                            int width) {
   2486   // Row buffer for intermediate ARGB pixels.
   2487   SIMD_ALIGNED(uint8 row[MAXTWIDTH * 4]);
   2488   while (width > 0) {
   2489     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
   2490     NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth);
   2491     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
   2492     src_y += twidth;
   2493     src_uv += twidth;
   2494     dst_rgb565 += twidth * 2;
   2495     width -= twidth;
   2496   }
   2497 }
   2498 #endif
   2499 
   2500 #if defined(HAS_I422TORGB565ROW_AVX2)
   2501 void I422ToRGB565Row_AVX2(const uint8* src_y,
   2502                           const uint8* src_u,
   2503                           const uint8* src_v,
   2504                           uint8* dst_rgb565,
   2505                           const struct YuvConstants* yuvconstants,
   2506                           int width) {
   2507   SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
   2508   while (width > 0) {
   2509     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
   2510     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
   2511 #if defined(HAS_ARGBTORGB565ROW_AVX2)
   2512     ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
   2513 #else
   2514     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
   2515 #endif
   2516     src_y += twidth;
   2517     src_u += twidth / 2;
   2518     src_v += twidth / 2;
   2519     dst_rgb565 += twidth * 2;
   2520     width -= twidth;
   2521   }
   2522 }
   2523 #endif
   2524 
   2525 #if defined(HAS_I422TOARGB1555ROW_AVX2)
   2526 void I422ToARGB1555Row_AVX2(const uint8* src_y,
   2527                             const uint8* src_u,
   2528                             const uint8* src_v,
   2529                             uint8* dst_argb1555,
   2530                             const struct YuvConstants* yuvconstants,
   2531                             int width) {
   2532   // Row buffer for intermediate ARGB pixels.
   2533   SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
   2534   while (width > 0) {
   2535     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
   2536     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
   2537 #if defined(HAS_ARGBTOARGB1555ROW_AVX2)
   2538     ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth);
   2539 #else
   2540     ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth);
   2541 #endif
   2542     src_y += twidth;
   2543     src_u += twidth / 2;
   2544     src_v += twidth / 2;
   2545     dst_argb1555 += twidth * 2;
   2546     width -= twidth;
   2547   }
   2548 }
   2549 #endif
   2550 
   2551 #if defined(HAS_I422TOARGB4444ROW_AVX2)
   2552 void I422ToARGB4444Row_AVX2(const uint8* src_y,
   2553                             const uint8* src_u,
   2554                             const uint8* src_v,
   2555                             uint8* dst_argb4444,
   2556                             const struct YuvConstants* yuvconstants,
   2557                             int width) {
   2558   // Row buffer for intermediate ARGB pixels.
   2559   SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
   2560   while (width > 0) {
   2561     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
   2562     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
   2563 #if defined(HAS_ARGBTOARGB4444ROW_AVX2)
   2564     ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth);
   2565 #else
   2566     ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth);
   2567 #endif
   2568     src_y += twidth;
   2569     src_u += twidth / 2;
   2570     src_v += twidth / 2;
   2571     dst_argb4444 += twidth * 2;
   2572     width -= twidth;
   2573   }
   2574 }
   2575 #endif
   2576 
   2577 #if defined(HAS_I422TORGB24ROW_AVX2)
   2578 void I422ToRGB24Row_AVX2(const uint8* src_y,
   2579                             const uint8* src_u,
   2580                             const uint8* src_v,
   2581                             uint8* dst_rgb24,
   2582                             const struct YuvConstants* yuvconstants,
   2583                             int width) {
   2584   // Row buffer for intermediate ARGB pixels.
   2585   SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
   2586   while (width > 0) {
   2587     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
   2588     I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth);
   2589     // TODO(fbarchard): ARGBToRGB24Row_AVX2
   2590     ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth);
   2591     src_y += twidth;
   2592     src_u += twidth / 2;
   2593     src_v += twidth / 2;
   2594     dst_rgb24 += twidth * 3;
   2595     width -= twidth;
   2596   }
   2597 }
   2598 #endif
   2599 
   2600 #if defined(HAS_NV12TORGB565ROW_AVX2)
   2601 void NV12ToRGB565Row_AVX2(const uint8* src_y,
   2602                           const uint8* src_uv,
   2603                           uint8* dst_rgb565,
   2604                           const struct YuvConstants* yuvconstants,
   2605                           int width) {
   2606   // Row buffer for intermediate ARGB pixels.
   2607   SIMD_ALIGNED32(uint8 row[MAXTWIDTH * 4]);
   2608   while (width > 0) {
   2609     int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
   2610     NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth);
   2611 #if defined(HAS_ARGBTORGB565ROW_AVX2)
   2612     ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth);
   2613 #else
   2614     ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth);
   2615 #endif
   2616     src_y += twidth;
   2617     src_uv += twidth;
   2618     dst_rgb565 += twidth * 2;
   2619     width -= twidth;
   2620   }
   2621 }
   2622 #endif
   2623 
   2624 #ifdef __cplusplus
   2625 }  // extern "C"
   2626 }  // namespace libyuv
   2627 #endif
   2628