Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include "libyuv/convert.h"
     12 
     13 #include "conversion_tables.h"
     14 #include "libyuv/basic_types.h"
     15 #include "libyuv/cpu_id.h"
     16 #include "row.h"
     17 
     18 //#define SCALEOPT //Currently for windows only. June 2010
     19 
     20 #ifdef SCALEOPT
     21 #include <emmintrin.h>
     22 #endif
     23 
     24 namespace libyuv {
     25 
     26 static inline uint8 Clip(int32 val) {
     27   if (val < 0) {
     28     return (uint8) 0;
     29   } else if (val > 255){
     30     return (uint8) 255;
     31   }
     32   return (uint8) val;
     33 }
     34 
     35 int I420ToRGB24(const uint8* src_y, int src_stride_y,
     36                 const uint8* src_u, int src_stride_u,
     37                 const uint8* src_v, int src_stride_v,
     38                 uint8* dst_frame, int dst_stride_frame,
     39                 int width, int height) {
     40   if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
     41     return -1;
     42   }
     43 
     44   // RGB orientation - bottom up
     45   // TODO(fbarchard): support inversion
     46   uint8* out = dst_frame + dst_stride_frame * height - dst_stride_frame;
     47   uint8* out2 = out - dst_stride_frame;
     48   int h, w;
     49   int tmp_r, tmp_g, tmp_b;
     50   const uint8 *y1, *y2 ,*u, *v;
     51   y1 = src_y;
     52   y2 = y1 + src_stride_y;
     53   u = src_u;
     54   v = src_v;
     55   for (h = ((height + 1) >> 1); h > 0; h--){
     56     // 2 rows at a time, 2 y's at a time
     57     for (w = 0; w < ((width + 1) >> 1); w++){
     58       // Vertical and horizontal sub-sampling
     59       tmp_r = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
     60       tmp_g = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
     61       tmp_b = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
     62       out[0] = Clip(tmp_b);
     63       out[1] = Clip(tmp_g);
     64       out[2] = Clip(tmp_r);
     65 
     66       tmp_r = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
     67       tmp_g = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
     68       tmp_b = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
     69       out[3] = Clip(tmp_b);
     70       out[4] = Clip(tmp_g);
     71       out[5] = Clip(tmp_r);
     72 
     73       tmp_r = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
     74       tmp_g = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
     75       tmp_b = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
     76       out2[0] = Clip(tmp_b);
     77       out2[1] = Clip(tmp_g);
     78       out2[2] = Clip(tmp_r);
     79 
     80       tmp_r = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
     81       tmp_g = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
     82       tmp_b = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
     83       out2[3] = Clip(tmp_b);
     84       out2[4] = Clip(tmp_g);
     85       out2[5] = Clip(tmp_r);
     86 
     87       out += 6;
     88       out2 += 6;
     89       y1 += 2;
     90       y2 += 2;
     91       u++;
     92       v++;
     93     }
     94     y1 += src_stride_y + src_stride_y - width;
     95     y2 += src_stride_y + src_stride_y - width;
     96     u += src_stride_u - ((width + 1) >> 1);
     97     v += src_stride_v - ((width + 1) >> 1);
     98     out -= dst_stride_frame * 3;
     99     out2 -= dst_stride_frame * 3;
    100   } // end height for
    101   return 0;
    102 }
    103 
    104 // Little Endian...
    105 int I420ToARGB4444(const uint8* src_y, int src_stride_y,
    106                    const uint8* src_u, int src_stride_u,
    107                    const uint8* src_v, int src_stride_v,
    108                    uint8* dst_frame, int dst_stride_frame,
    109                    int width, int height) {
    110   if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
    111     return -1;
    112   }
    113 
    114   // RGB orientation - bottom up
    115   uint8* out = dst_frame + dst_stride_frame * (height - 1);
    116   uint8* out2 = out - dst_stride_frame;
    117   int tmp_r, tmp_g, tmp_b;
    118   const uint8 *y1,*y2, *u, *v;
    119   y1 = src_y;
    120   y2 = y1 + src_stride_y;
    121   u = src_u;
    122   v = src_v;
    123   int h, w;
    124 
    125   for (h = ((height + 1) >> 1); h > 0; h--) {
    126     // 2 rows at a time, 2 y's at a time
    127     for (w = 0; w < ((width + 1) >> 1); w++) {
    128         // Vertical and horizontal sub-sampling
    129         // Convert to RGB888 and re-scale to 4 bits
    130         tmp_r = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
    131         tmp_g = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
    132         tmp_b = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
    133         out[0] =(uint8)((Clip(tmp_g) & 0xf0) + (Clip(tmp_b) >> 4));
    134         out[1] = (uint8)(0xf0 + (Clip(tmp_r) >> 4));
    135 
    136         tmp_r = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
    137         tmp_g = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
    138         tmp_b = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
    139         out[2] = (uint8)((Clip(tmp_g) & 0xf0 ) + (Clip(tmp_b) >> 4));
    140         out[3] = (uint8)(0xf0 + (Clip(tmp_r) >> 4));
    141 
    142         tmp_r = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
    143         tmp_g = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
    144         tmp_b = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
    145         out2[0] = (uint8)((Clip(tmp_g) & 0xf0 ) + (Clip(tmp_b) >> 4));
    146         out2[1] = (uint8) (0xf0 + (Clip(tmp_r) >> 4));
    147 
    148         tmp_r = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
    149         tmp_g = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
    150         tmp_b = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
    151         out2[2] = (uint8)((Clip(tmp_g) & 0xf0 ) + (Clip(tmp_b) >> 4));
    152         out2[3] = (uint8)(0xf0 + (Clip(tmp_r) >> 4));
    153 
    154         out += 4;
    155         out2 += 4;
    156         y1 += 2;
    157         y2 += 2;
    158         u++;
    159         v++;
    160     }
    161     y1 += 2 * src_stride_y - width;
    162     y2 += 2 * src_stride_y - width;
    163     u += src_stride_u - ((width + 1) >> 1);
    164     v += src_stride_v - ((width + 1) >> 1);
    165     out -= (dst_stride_frame + width) * 2;
    166     out2 -= (dst_stride_frame + width) * 2;
    167   } // end height for
    168   return 0;
    169 }
    170 
    171 
    172 int I420ToRGB565(const uint8* src_y, int src_stride_y,
    173                  const uint8* src_u, int src_stride_u,
    174                  const uint8* src_v, int src_stride_v,
    175                  uint8* dst_frame, int dst_stride_frame,
    176                  int width, int height) {
    177   if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
    178     return -1;
    179   }
    180 
    181   // Negative height means invert the image.
    182   if (height < 0) {
    183     height = -height;
    184     src_y = src_y + (height - 1) * src_stride_y;
    185     src_u = src_u + (height - 1) * src_stride_u;
    186     src_v = src_v + (height - 1) * src_stride_v;
    187     src_stride_y = -src_stride_y;
    188     src_stride_u = -src_stride_u;
    189     src_stride_v = -src_stride_v;
    190   }
    191   uint16* out = (uint16*)(dst_frame) + dst_stride_frame * (height - 1);
    192   uint16* out2 = out - dst_stride_frame;
    193 
    194   int tmp_r, tmp_g, tmp_b;
    195   const uint8* y1,* y2, * u, * v;
    196   y1 = src_y;
    197   y2 = y1 + src_stride_y;
    198   u = src_u;
    199   v = src_v;
    200   int h, w;
    201 
    202   for (h = ((height + 1) >> 1); h > 0; h--){
    203     // 2 rows at a time, 2 y's at a time
    204     for (w = 0; w < ((width + 1) >> 1); w++){
    205       // Vertical and horizontal sub-sampling
    206       // 1. Convert to RGB888
    207       // 2. Shift to adequate location (in the 16 bit word) - RGB 565
    208 
    209       tmp_r = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
    210       tmp_g = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
    211       tmp_b = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
    212       out[0]  = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
    213                           & 0xfc) << 3) + (Clip(tmp_b) >> 3);
    214 
    215       tmp_r = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
    216       tmp_g = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
    217       tmp_b = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
    218       out[1] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
    219                          & 0xfc) << 3) + (Clip(tmp_b ) >> 3);
    220 
    221       tmp_r = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
    222       tmp_g = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
    223       tmp_b = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
    224       out2[0] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
    225                           & 0xfc) << 3) + (Clip(tmp_b) >> 3);
    226 
    227       tmp_r = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
    228       tmp_g = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
    229       tmp_b = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
    230       out2[1] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
    231                           & 0xfc) << 3) + (Clip(tmp_b) >> 3);
    232 
    233       y1 += 2;
    234       y2 += 2;
    235       out += 2;
    236       out2 += 2;
    237       u++;
    238       v++;
    239     }
    240     y1 += 2 * src_stride_y - width;
    241     y2 += 2 * src_stride_y - width;
    242     u += src_stride_u - ((width + 1) >> 1);
    243     v += src_stride_v - ((width + 1) >> 1);
    244     out -= 2 * dst_stride_frame + width;
    245     out2 -=  2 * dst_stride_frame + width;
    246   }
    247   return 0;
    248 }
    249 
    250 
    251 int I420ToARGB1555(const uint8* src_y, int src_stride_y,
    252                    const uint8* src_u, int src_stride_u,
    253                    const uint8* src_v, int src_stride_v,
    254                    uint8* dst_frame, int dst_stride_frame,
    255                    int width, int height) {
    256   if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
    257     return -1;
    258   }
    259   uint16* out = (uint16*)(dst_frame) + dst_stride_frame * (height - 1);
    260   uint16* out2 = out - dst_stride_frame ;
    261   int32 tmp_r, tmp_g, tmp_b;
    262   const uint8 *y1,*y2, *u, *v;
    263   int h, w;
    264 
    265   y1 = src_y;
    266   y2 = y1 + src_stride_y;
    267   u = src_u;
    268   v = src_v;
    269 
    270   for (h = ((height + 1) >> 1); h > 0; h--){
    271     // 2 rows at a time, 2 y's at a time
    272     for (w = 0; w < ((width + 1) >> 1); w++){
    273       // Vertical and horizontal sub-sampling
    274       // 1. Convert to RGB888
    275       // 2. Shift to adequate location (in the 16 bit word) - RGB 555
    276       // 3. Add 1 for alpha value
    277       tmp_r = (int32)((mapYc[y1[0]] + mapVcr[v[0]] + 128) >> 8);
    278       tmp_g = (int32)((mapYc[y1[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
    279       tmp_b = (int32)((mapYc[y1[0]] + mapUcb[u[0]] + 128) >> 8);
    280       out[0]  = (uint16)(0x8000 + ((Clip(tmp_r) & 0xf8) << 10) +
    281                 ((Clip(tmp_g) & 0xf8) << 3) + (Clip(tmp_b) >> 3));
    282 
    283       tmp_r = (int32)((mapYc[y1[1]] + mapVcr[v[0]] + 128) >> 8);
    284       tmp_g = (int32)((mapYc[y1[1]] + mapUcg[u[0]] + mapVcg[v[0]]  + 128) >> 8);
    285       tmp_b = (int32)((mapYc[y1[1]] + mapUcb[u[0]] + 128) >> 8);
    286       out[1]  = (uint16)(0x8000 + ((Clip(tmp_r) & 0xf8) << 10) +
    287                 ((Clip(tmp_g) & 0xf8) << 3)  + (Clip(tmp_b) >> 3));
    288 
    289       tmp_r = (int32)((mapYc[y2[0]] + mapVcr[v[0]] + 128) >> 8);
    290       tmp_g = (int32)((mapYc[y2[0]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
    291       tmp_b = (int32)((mapYc[y2[0]] + mapUcb[u[0]] + 128) >> 8);
    292       out2[0]  = (uint16)(0x8000 + ((Clip(tmp_r) & 0xf8) << 10) +
    293                  ((Clip(tmp_g) & 0xf8) << 3) + (Clip(tmp_b) >> 3));
    294 
    295       tmp_r = (int32)((mapYc[y2[1]] + mapVcr[v[0]] + 128) >> 8);
    296       tmp_g = (int32)((mapYc[y2[1]] + mapUcg[u[0]] + mapVcg[v[0]] + 128) >> 8);
    297       tmp_b = (int32)((mapYc[y2[1]] + mapUcb[u[0]] + 128) >> 8);
    298       out2[1]  = (uint16)(0x8000 + ((Clip(tmp_r) & 0xf8) << 10) +
    299                  ((Clip(tmp_g) & 0xf8) << 3)  + (Clip(tmp_b) >> 3));
    300 
    301       y1 += 2;
    302       y2 += 2;
    303       out += 2;
    304       out2 += 2;
    305       u++;
    306       v++;
    307     }
    308     y1 += 2 * src_stride_y - width;
    309     y2 += 2 * src_stride_y - width;
    310     u += src_stride_u - ((width + 1) >> 1);
    311     v += src_stride_v - ((width + 1) >> 1);
    312     out -= 2 * dst_stride_frame + width;
    313     out2 -=  2 * dst_stride_frame + width;
    314   }
    315   return 0;
    316 }
    317 
    318 
    319 int I420ToYUY2(const uint8* src_y, int src_stride_y,
    320                const uint8* src_u, int src_stride_u,
    321                const uint8* src_v, int src_stride_v,
    322                uint8* dst_frame, int dst_stride_frame,
    323                int width, int height) {
    324   if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
    325     return -1;
    326   }
    327 
    328   const uint8* in1 = src_y;
    329   const uint8* in2 = src_y + src_stride_y;
    330 
    331   uint8* out1 = dst_frame;
    332   uint8* out2 = dst_frame + dst_stride_frame;
    333 
    334   // YUY2 - Macro-pixel = 2 image pixels
    335   // Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4....
    336 #ifndef SCALEOPT
    337   for (int i = 0; i < ((height + 1) >> 1); i++){
    338     for (int j = 0; j < ((width + 1) >> 1); j++){
    339       out1[0] = in1[0];
    340       out1[1] = *src_u;
    341       out1[2] = in1[1];
    342       out1[3] = *src_v;
    343 
    344       out2[0] = in2[0];
    345       out2[1] = *src_u;
    346       out2[2] = in2[1];
    347       out2[3] = *src_v;
    348       out1 += 4;
    349       out2 += 4;
    350       src_u++;
    351       src_v++;
    352       in1 += 2;
    353       in2 += 2;
    354     }
    355     in1 += 2 * src_stride_y - width;
    356     in2 += 2 * src_stride_y - width;
    357     src_u += src_stride_u - ((width + 1) >> 1);
    358     src_v += src_stride_v - ((width + 1) >> 1);
    359     out1 += dst_stride_frame + dst_stride_frame - 2 * width;
    360     out2 += dst_stride_frame + dst_stride_frame - 2 * width;
    361   }
    362 #else
    363   for (WebRtc_UWord32 i = 0; i < ((height + 1) >> 1);i++) {
    364     int32 width__ = (width >> 4);
    365     _asm
    366     {
    367       ;pusha
    368       mov       eax, DWORD PTR [in1]                       ;1939.33
    369       mov       ecx, DWORD PTR [in2]                       ;1939.33
    370       mov       ebx, DWORD PTR [src_u]                       ;1939.33
    371       mov       edx, DWORD PTR [src_v]                       ;1939.33
    372       loop0:
    373       movq      xmm6, QWORD PTR [ebx]          ;src_u
    374       movq      xmm0, QWORD PTR [edx]          ;src_v
    375       punpcklbw xmm6, xmm0                     ;src_u, src_v mix
    376       ;movdqa    xmm1, xmm6
    377       ;movdqa    xmm2, xmm6
    378       ;movdqa    xmm4, xmm6
    379 
    380       movdqu    xmm3, XMMWORD PTR [eax]        ;in1
    381       movdqa    xmm1, xmm3
    382       punpcklbw xmm1, xmm6                     ;in1, src_u, in1, src_v
    383       mov       esi, DWORD PTR [out1]
    384       movdqu    XMMWORD PTR [esi], xmm1        ;write to out1
    385 
    386       movdqu    xmm5, XMMWORD PTR [ecx]        ;in2
    387       movdqa    xmm2, xmm5
    388       punpcklbw xmm2, xmm6                     ;in2, src_u, in2, src_v
    389       mov       edi, DWORD PTR [out2]
    390       movdqu    XMMWORD PTR [edi], xmm2        ;write to out2
    391 
    392       punpckhbw xmm3, xmm6                     ;in1, src_u, in1, src_v again
    393       movdqu    XMMWORD PTR [esi+16], xmm3     ;write to out1 again
    394       add       esi, 32
    395       mov       DWORD PTR [out1], esi
    396 
    397       punpckhbw xmm5, xmm6                     ;src_u, in2, src_v again
    398       movdqu    XMMWORD PTR [edi+16], xmm5     ;write to out2 again
    399       add       edi, 32
    400       mov       DWORD PTR [out2], edi
    401 
    402       add       ebx, 8
    403       add       edx, 8
    404       add       eax, 16
    405       add       ecx, 16
    406 
    407       mov       esi, DWORD PTR [width__]
    408       sub       esi, 1
    409       mov       DWORD PTR [width__], esi
    410       jg        loop0
    411 
    412       mov       DWORD PTR [in1], eax                       ;1939.33
    413       mov       DWORD PTR [in2], ecx                       ;1939.33
    414       mov       DWORD PTR [src_u], ebx                       ;1939.33
    415       mov       DWORD PTR [src_v], edx                       ;1939.33
    416 
    417       ;popa
    418       emms
    419     }
    420     in1 += 2 * src_stride_y - width;
    421     in2 += 2 * src_stride_y - width;
    422     out1 += dst_stride_frame + dst_stride_frame - 2 * width;
    423     out2 += dst_stride_frame + dst_stride_frame - 2 * width;
    424   }
    425 #endif
    426   return 0;
    427 }
    428 
    429 int I420ToUYVY(const uint8* src_y, int src_stride_y,
    430                const uint8* src_u, int src_stride_u,
    431                const uint8* src_v, int src_stride_v,
    432                uint8* dst_frame, int dst_stride_frame,
    433                int width, int height) {
    434   if (src_y == NULL || src_u == NULL || src_v == NULL || dst_frame == NULL) {
    435     return -1;
    436   }
    437 
    438   int i = 0;
    439   const uint8* y1 = src_y;
    440   const uint8* y2 = y1 + src_stride_y;
    441   const uint8* u = src_u;
    442   const uint8* v = src_v;
    443 
    444   uint8* out1 = dst_frame;
    445   uint8* out2 = dst_frame + dst_stride_frame;
    446 
    447   // Macro-pixel = 2 image pixels
    448   // U0Y0V0Y1....U2Y2V2Y3...U4Y4V4Y5.....
    449 
    450 #ifndef SCALEOPT
    451   for (; i < ((height + 1) >> 1); i++) {
    452     for (int j = 0; j < ((width + 1) >> 1); j++) {
    453       out1[0] = *u;
    454       out1[1] = y1[0];
    455       out1[2] = *v;
    456       out1[3] = y1[1];
    457 
    458       out2[0] = *u;
    459       out2[1] = y2[0];
    460       out2[2] = *v;
    461       out2[3] = y2[1];
    462       out1 += 4;
    463       out2 += 4;
    464       u++;
    465       v++;
    466       y1 += 2;
    467       y2 += 2;
    468     }
    469     y1 += 2 * src_stride_y - width;
    470     y2 += 2 * src_stride_y - width;
    471     u += src_stride_u - ((width + 1) >> 1);
    472     v += src_stride_v - ((width + 1) >> 1);
    473     out1 += 2 * (dst_stride_frame - width);
    474     out2 += 2 * (dst_stride_frame - width);
    475   }
    476 #else
    477   for (; i < (height >> 1);i++) {
    478     int32 width__ = (width >> 4);
    479     _asm
    480     {
    481       ;pusha
    482       mov       eax, DWORD PTR [in1]                       ;1939.33
    483       mov       ecx, DWORD PTR [in2]                       ;1939.33
    484       mov       ebx, DWORD PTR [src_u]                       ;1939.33
    485       mov       edx, DWORD PTR [src_v]                       ;1939.33
    486 loop0:
    487       movq      xmm6, QWORD PTR [ebx]          ;src_u
    488       movq      xmm0, QWORD PTR [edx]          ;src_v
    489       punpcklbw xmm6, xmm0                     ;src_u, src_v mix
    490       movdqa    xmm1, xmm6
    491       movdqa    xmm2, xmm6
    492       movdqa    xmm4, xmm6
    493 
    494       movdqu    xmm3, XMMWORD PTR [eax]        ;in1
    495       punpcklbw xmm1, xmm3                     ;src_u, in1, src_v
    496       mov       esi, DWORD PTR [out1]
    497       movdqu    XMMWORD PTR [esi], xmm1        ;write to out1
    498 
    499       movdqu    xmm5, XMMWORD PTR [ecx]        ;in2
    500       punpcklbw xmm2, xmm5                     ;src_u, in2, src_v
    501       mov       edi, DWORD PTR [out2]
    502       movdqu    XMMWORD PTR [edi], xmm2        ;write to out2
    503 
    504       punpckhbw xmm4, xmm3                     ;src_u, in1, src_v again
    505       movdqu    XMMWORD PTR [esi+16], xmm4     ;write to out1 again
    506       add       esi, 32
    507       mov       DWORD PTR [out1], esi
    508 
    509       punpckhbw xmm6, xmm5                     ;src_u, in2, src_v again
    510       movdqu    XMMWORD PTR [edi+16], xmm6     ;write to out2 again
    511       add       edi, 32
    512       mov       DWORD PTR [out2], edi
    513 
    514       add       ebx, 8
    515       add       edx, 8
    516       add       eax, 16
    517       add       ecx, 16
    518 
    519       mov       esi, DWORD PTR [width__]
    520       sub       esi, 1
    521       mov       DWORD PTR [width__], esi
    522       jg        loop0
    523 
    524       mov       DWORD PTR [in1], eax                       ;1939.33
    525       mov       DWORD PTR [in2], ecx                       ;1939.33
    526       mov       DWORD PTR [src_u], ebx                       ;1939.33
    527       mov       DWORD PTR [src_v], edx                       ;1939.33
    528 
    529       ;popa
    530       emms
    531     }
    532     in1 += width;
    533     in2 += width;
    534     out1 += 2 * (dst_stride_frame - width);
    535     out2 += 2 * (dst_stride_frame - width);
    536   }
    537 #endif
    538   return 0;
    539 }
    540 
    541 
    542 int NV12ToRGB565(const uint8* src_y, int src_stride_y,
    543                  const uint8* src_uv, int src_stride_uv,
    544                  uint8* dst_frame, int dst_stride_frame,
    545                  int width, int height) {
    546   if (src_y == NULL || src_uv == NULL || dst_frame == NULL) {
    547     return -1;
    548   }
    549 
    550   // Bi-Planar: Y plane followed by an interlaced U and V plane
    551   const uint8* interlacedSrc = src_uv;
    552   uint16* out = (uint16*)(src_y) + dst_stride_frame * (height - 1);
    553   uint16* out2 = out - dst_stride_frame;
    554   int32 tmp_r, tmp_g, tmp_b;
    555   const uint8 *y1,*y2;
    556   y1 = src_y;
    557   y2 = y1 + src_stride_y;
    558   int h, w;
    559 
    560   for (h = ((height + 1) >> 1); h > 0; h--) {
    561     // 2 rows at a time, 2 y's at a time
    562     for (w = 0; w < ((width + 1) >> 1); w++) {
    563       // Vertical and horizontal sub-sampling
    564       // 1. Convert to RGB888
    565       // 2. Shift to adequate location (in the 16 bit word) - RGB 565
    566 
    567       tmp_r = (int32)((mapYc[y1[0]] + mapVcr[interlacedSrc[1]] + 128) >> 8);
    568       tmp_g = (int32)((mapYc[y1[0]] + mapUcg[interlacedSrc[0]]
    569                       + mapVcg[interlacedSrc[1]] + 128) >> 8);
    570       tmp_b = (int32)((mapYc[y1[0]] + mapUcb[interlacedSrc[0]] + 128) >> 8);
    571       out[0]  = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
    572                           & 0xfc) << 3) + (Clip(tmp_b) >> 3);
    573 
    574       tmp_r = (int32)((mapYc[y1[1]] + mapVcr[interlacedSrc[1]] + 128) >> 8);
    575       tmp_g = (int32)((mapYc[y1[1]] + mapUcg[interlacedSrc[0]]
    576                       + mapVcg[interlacedSrc[1]] + 128) >> 8);
    577       tmp_b = (int32)((mapYc[y1[1]] + mapUcb[interlacedSrc[0]] + 128) >> 8);
    578       out[1] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
    579                          & 0xfc) << 3) + (Clip(tmp_b ) >> 3);
    580 
    581       tmp_r = (int32)((mapYc[y2[0]] + mapVcr[interlacedSrc[1]] + 128) >> 8);
    582       tmp_g = (int32)((mapYc[y2[0]] + mapUcg[interlacedSrc[0]]
    583                       + mapVcg[interlacedSrc[1]] + 128) >> 8);
    584       tmp_b = (int32)((mapYc[y2[0]] + mapUcb[interlacedSrc[0]] + 128) >> 8);
    585       out2[0] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
    586                           & 0xfc) << 3) + (Clip(tmp_b) >> 3);
    587 
    588       tmp_r = (int32)((mapYc[y2[1]] + mapVcr[interlacedSrc[1]]
    589                       + 128) >> 8);
    590       tmp_g = (int32)((mapYc[y2[1]] + mapUcg[interlacedSrc[0]]
    591                       + mapVcg[interlacedSrc[1]] + 128) >> 8);
    592       tmp_b = (int32)((mapYc[y2[1]] + mapUcb[interlacedSrc[0]] + 128) >> 8);
    593       out2[1] = (uint16)((Clip(tmp_r) & 0xf8) << 8) + ((Clip(tmp_g)
    594                           & 0xfc) << 3) + (Clip(tmp_b) >> 3);
    595 
    596       y1 += 2;
    597       y2 += 2;
    598       out += 2;
    599       out2 += 2;
    600       interlacedSrc += 2;
    601     }
    602     y1 += 2 * src_stride_y - width;
    603     y2 += 2 * src_stride_y - width;
    604     interlacedSrc += src_stride_uv - ((width + 1) >> 1);
    605     out -= 3 * dst_stride_frame + dst_stride_frame - width;
    606     out2 -= 3 * dst_stride_frame + dst_stride_frame - width;
    607   }
    608   return 0;
    609 }
    610 
    611 // TODO(fbarchard): Deprecated - this is same as BG24ToARGB with -height
    612 int RGB24ToARGB(const uint8* src_frame, int src_stride_frame,
    613                 uint8* dst_frame, int dst_stride_frame,
    614                 int width, int height) {
    615   if (src_frame == NULL || dst_frame == NULL) {
    616     return -1;
    617   }
    618 
    619   int i, j, offset;
    620   uint8* outFrame = dst_frame;
    621   const uint8* inFrame = src_frame;
    622 
    623   outFrame += dst_stride_frame * (height - 1) * 4;
    624   for (i = 0; i < height; i++) {
    625     for (j = 0; j < width; j++) {
    626       offset = j * 4;
    627       outFrame[0 + offset] = inFrame[0];
    628       outFrame[1 + offset] = inFrame[1];
    629       outFrame[2 + offset] = inFrame[2];
    630       outFrame[3 + offset] = 0xff;
    631       inFrame += 3;
    632     }
    633     outFrame -= 4 * (dst_stride_frame - width);
    634     inFrame += src_stride_frame - width;
    635   }
    636   return 0;
    637 }
    638 
    639 int ARGBToI420(const uint8* src_frame, int src_stride_frame,
    640                uint8* dst_y, int dst_stride_y,
    641                uint8* dst_u, int dst_stride_u,
    642                uint8* dst_v, int dst_stride_v,
    643                int width, int height) {
    644   if (height < 0) {
    645     height = -height;
    646     src_frame = src_frame + (height - 1) * src_stride_frame;
    647     src_stride_frame = -src_stride_frame;
    648   }
    649   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
    650   void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
    651                       uint8* dst_u, uint8* dst_v, int width);
    652 #if defined(HAS_ARGBTOYROW_SSSE3)
    653   if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
    654       (width % 16 == 0) &&
    655       IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
    656       IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
    657     ARGBToYRow = ARGBToYRow_SSSE3;
    658   } else
    659 #endif
    660   {
    661     ARGBToYRow = ARGBToYRow_C;
    662   }
    663 #if defined(HAS_ARGBTOUVROW_SSSE3)
    664   if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
    665       (width % 16 == 0) &&
    666       IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
    667       IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) &&
    668       IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) {
    669     ARGBToUVRow = ARGBToUVRow_SSSE3;
    670   } else
    671 #endif
    672   {
    673     ARGBToUVRow = ARGBToUVRow_C;
    674   }
    675 
    676   for (int y = 0; y < (height - 1); y += 2) {
    677     ARGBToUVRow(src_frame, src_stride_frame, dst_u, dst_v, width);
    678     ARGBToYRow(src_frame, dst_y, width);
    679     ARGBToYRow(src_frame + src_stride_frame, dst_y + dst_stride_y, width);
    680     src_frame += src_stride_frame * 2;
    681     dst_y += dst_stride_y * 2;
    682     dst_u += dst_stride_u;
    683     dst_v += dst_stride_v;
    684   }
    685   if (height & 1) {
    686     ARGBToUVRow(src_frame, 0, dst_u, dst_v, width);
    687     ARGBToYRow(src_frame, dst_y, width);
    688   }
    689   return 0;
    690 }
    691 
    692 int BGRAToI420(const uint8* src_frame, int src_stride_frame,
    693                uint8* dst_y, int dst_stride_y,
    694                uint8* dst_u, int dst_stride_u,
    695                uint8* dst_v, int dst_stride_v,
    696                int width, int height) {
    697   if (height < 0) {
    698     height = -height;
    699     src_frame = src_frame + (height - 1) * src_stride_frame;
    700     src_stride_frame = -src_stride_frame;
    701   }
    702   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
    703   void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
    704                       uint8* dst_u, uint8* dst_v, int width);
    705 #if defined(HAS_BGRATOYROW_SSSE3)
    706   if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
    707       (width % 16 == 0) &&
    708       IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
    709       IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
    710     ARGBToYRow = BGRAToYRow_SSSE3;
    711   } else
    712 #endif
    713   {
    714     ARGBToYRow = BGRAToYRow_C;
    715   }
    716 #if defined(HAS_BGRATOUVROW_SSSE3)
    717   if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
    718       (width % 16 == 0) &&
    719       IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
    720       IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) &&
    721       IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) {
    722     ARGBToUVRow = BGRAToUVRow_SSSE3;
    723   } else
    724 #endif
    725   {
    726     ARGBToUVRow = BGRAToUVRow_C;
    727   }
    728 
    729   for (int y = 0; y < (height - 1); y += 2) {
    730     ARGBToUVRow(src_frame, src_stride_frame, dst_u, dst_v, width);
    731     ARGBToYRow(src_frame, dst_y, width);
    732     ARGBToYRow(src_frame + src_stride_frame, dst_y + dst_stride_y, width);
    733     src_frame += src_stride_frame * 2;
    734     dst_y += dst_stride_y * 2;
    735     dst_u += dst_stride_u;
    736     dst_v += dst_stride_v;
    737   }
    738   if (height & 1) {
    739     ARGBToUVRow(src_frame, 0, dst_u, dst_v, width);
    740     ARGBToYRow(src_frame, dst_y, width);
    741   }
    742   return 0;
    743 }
    744 
    745 int ABGRToI420(const uint8* src_frame, int src_stride_frame,
    746                uint8* dst_y, int dst_stride_y,
    747                uint8* dst_u, int dst_stride_u,
    748                uint8* dst_v, int dst_stride_v,
    749                int width, int height) {
    750   if (height < 0) {
    751     height = -height;
    752     src_frame = src_frame + (height - 1) * src_stride_frame;
    753     src_stride_frame = -src_stride_frame;
    754   }
    755   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
    756   void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
    757                       uint8* dst_u, uint8* dst_v, int width);
    758 #if defined(HAS_ABGRTOYROW_SSSE3)
    759   if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
    760       (width % 16 == 0) &&
    761       IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
    762       IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
    763     ARGBToYRow = ABGRToYRow_SSSE3;
    764   } else
    765 #endif
    766   {
    767     ARGBToYRow = ABGRToYRow_C;
    768   }
    769 #if defined(HAS_ABGRTOUVROW_SSSE3)
    770   if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
    771       (width % 16 == 0) &&
    772       IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
    773       IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) &&
    774       IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) {
    775     ARGBToUVRow = ABGRToUVRow_SSSE3;
    776   } else
    777 #endif
    778   {
    779     ARGBToUVRow = ABGRToUVRow_C;
    780   }
    781 
    782   for (int y = 0; y < (height - 1); y += 2) {
    783     ARGBToUVRow(src_frame, src_stride_frame, dst_u, dst_v, width);
    784     ARGBToYRow(src_frame, dst_y, width);
    785     ARGBToYRow(src_frame + src_stride_frame, dst_y + dst_stride_y, width);
    786     src_frame += src_stride_frame * 2;
    787     dst_y += dst_stride_y * 2;
    788     dst_u += dst_stride_u;
    789     dst_v += dst_stride_v;
    790   }
    791   if (height & 1) {
    792     ARGBToUVRow(src_frame, 0, dst_u, dst_v, width);
    793     ARGBToYRow(src_frame, dst_y, width);
    794   }
    795   return 0;
    796 }
    797 
    798 int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
    799                 uint8* dst_y, int dst_stride_y,
    800                 uint8* dst_u, int dst_stride_u,
    801                 uint8* dst_v, int dst_stride_v,
    802                 int width, int height) {
    803   if (height < 0) {
    804     height = -height;
    805     src_frame = src_frame + (height - 1) * src_stride_frame;
    806     src_stride_frame = -src_stride_frame;
    807   }
    808   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
    809   void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
    810                       uint8* dst_u, uint8* dst_v, int width);
    811 #if defined(HAS_RGB24TOYROW_SSSE3)
    812   if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
    813       (width % 16 == 0) &&
    814       IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
    815       IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
    816     ARGBToYRow = RGB24ToYRow_SSSE3;
    817   } else
    818 #endif
    819   {
    820     ARGBToYRow = RGB24ToYRow_C;
    821   }
    822 #if defined(HAS_RGB24TOUVROW_SSSE3)
    823   if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
    824       (width % 16 == 0) &&
    825       IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
    826       IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) &&
    827       IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) {
    828     ARGBToUVRow = RGB24ToUVRow_SSSE3;
    829   } else
    830 #endif
    831   {
    832     ARGBToUVRow = RGB24ToUVRow_C;
    833   }
    834 
    835   for (int y = 0; y < (height - 1); y += 2) {
    836     ARGBToUVRow(src_frame, src_stride_frame, dst_u, dst_v, width);
    837     ARGBToYRow(src_frame, dst_y, width);
    838     ARGBToYRow(src_frame + src_stride_frame, dst_y + dst_stride_y, width);
    839     src_frame += src_stride_frame * 2;
    840     dst_y += dst_stride_y * 2;
    841     dst_u += dst_stride_u;
    842     dst_v += dst_stride_v;
    843   }
    844   if (height & 1) {
    845     ARGBToUVRow(src_frame, 0, dst_u, dst_v, width);
    846     ARGBToYRow(src_frame, dst_y, width);
    847   }
    848   return 0;
    849 }
    850 
    851 int RAWToI420(const uint8* src_frame, int src_stride_frame,
    852                 uint8* dst_y, int dst_stride_y,
    853                 uint8* dst_u, int dst_stride_u,
    854                 uint8* dst_v, int dst_stride_v,
    855                 int width, int height) {
    856   if (height < 0) {
    857     height = -height;
    858     src_frame = src_frame + (height - 1) * src_stride_frame;
    859     src_stride_frame = -src_stride_frame;
    860   }
    861   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
    862   void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
    863                       uint8* dst_u, uint8* dst_v, int width);
    864 #if defined(HAS_RAWTOYROW_SSSE3)
    865   if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
    866       (width % 16 == 0) &&
    867       IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
    868       IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
    869     ARGBToYRow = RAWToYRow_SSSE3;
    870   } else
    871 #endif
    872   {
    873     ARGBToYRow = RAWToYRow_C;
    874   }
    875 #if defined(HAS_RAWTOUVROW_SSSE3)
    876   if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
    877       (width % 16 == 0) &&
    878       IS_ALIGNED(src_frame, 16) && (src_stride_frame % 16 == 0) &&
    879       IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) &&
    880       IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) {
    881     ARGBToUVRow = RAWToUVRow_SSSE3;
    882   } else
    883 #endif
    884   {
    885     ARGBToUVRow = RAWToUVRow_C;
    886   }
    887 
    888   for (int y = 0; y < (height - 1); y += 2) {
    889     ARGBToUVRow(src_frame, src_stride_frame, dst_u, dst_v, width);
    890     ARGBToYRow(src_frame, dst_y, width);
    891     ARGBToYRow(src_frame + src_stride_frame, dst_y + dst_stride_y, width);
    892     src_frame += src_stride_frame * 2;
    893     dst_y += dst_stride_y * 2;
    894     dst_u += dst_stride_u;
    895     dst_v += dst_stride_v;
    896   }
    897   if (height & 1) {
    898     ARGBToUVRow(src_frame, 0, dst_u, dst_v, width);
    899     ARGBToYRow(src_frame, dst_y, width);
    900   }
    901   return 0;
    902 }
    903 
    904 } // namespace libyuv
    905