Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright (c) 2011 The LibYuv project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <assert.h>
     12 
     13 #include "libyuv/cpu_id.h"
     14 #include "video_common.h"
     15 #include "row.h"
     16 
     17 #define kMaxStride (2048 * 4)
     18 
     19 namespace libyuv {
     20 
     21 // Note: to do this with Neon vld4.8 would load ARGB values into 4 registers
     22 // and vst would select which 2 components to write.  The low level would need
     23 // to be ARGBToBG, ARGBToGB, ARGBToRG, ARGBToGR
     24 
     25 #if defined(WIN32) && !defined(COVERAGE_ENABLED)
     26 #define HAS_ARGBTOBAYERROW_SSSE3
     27 __declspec(naked)
     28 static void ARGBToBayerRow_SSSE3(const uint8* src_argb,
     29                                  uint8* dst_bayer, uint32 selector, int pix) {
     30   __asm {
     31     mov        eax, [esp + 4]    // src_argb
     32     mov        edx, [esp + 8]    // dst_bayer
     33     movd       xmm7, [esp + 12]  // selector
     34     mov        ecx, [esp + 16]   // pix
     35     pshufd     xmm7, xmm7, 0
     36 
     37   wloop:
     38     movdqa     xmm0, [eax]
     39     lea        eax, [eax + 16]
     40     pshufb     xmm0, xmm7
     41     movd       [edx], xmm0
     42     lea        edx, [edx + 4]
     43     sub        ecx, 4
     44     ja         wloop
     45     ret
     46   }
     47 }
     48 
     49 #elif (defined(__x86_64__) || defined(__i386__)) && \
     50     !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR)
     51 
     52 #define HAS_ARGBTOBAYERROW_SSSE3
     53 static void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer,
     54                                  uint32 selector, int pix) {
     55   asm volatile(
     56     "movd   %3,%%xmm7\n"
     57     "pshufd $0x0,%%xmm7,%%xmm7\n"
     58 "1:"
     59     "movdqa (%0),%%xmm0\n"
     60     "lea    0x10(%0),%0\n"
     61     "pshufb %%xmm7,%%xmm0\n"
     62     "movd   %%xmm0,(%1)\n"
     63     "lea    0x4(%1),%1\n"
     64     "sub    $0x4,%2\n"
     65     "ja     1b\n"
     66   : "+r"(src_argb),  // %0
     67     "+r"(dst_bayer), // %1
     68     "+r"(pix)        // %2
     69   : "r"(selector)    // %3
     70   : "memory"
     71 );
     72 }
     73 #endif
     74 
     75 static void ARGBToBayerRow_C(const uint8* src_argb,
     76                              uint8* dst_bayer, uint32 selector, int pix) {
     77   int index0 = selector & 0xff;
     78   int index1 = (selector >> 8) & 0xff;
     79   // Copy a row of Bayer.
     80   for (int x = 0; x < (pix - 1); x += 2) {
     81     dst_bayer[0] = src_argb[index0];
     82     dst_bayer[1] = src_argb[index1];
     83     src_argb += 8;
     84     dst_bayer += 2;
     85   }
     86   if (pix & 1) {
     87     dst_bayer[0] = src_argb[index0];
     88   }
     89 }
     90 
     91 // generate a selector mask useful for pshufb
     92 static uint32 GenerateSelector(int select0, int select1) {
     93   return static_cast<uint32>(select0) |
     94          static_cast<uint32>((select1 + 4) << 8) |
     95          static_cast<uint32>((select0 + 8) << 16) |
     96          static_cast<uint32>((select1 + 12) << 24);
     97 }
     98 
     99 // Converts 32 bit ARGB to any Bayer RGB format.
    100 int ARGBToBayerRGB(const uint8* src_rgb, int src_stride_rgb,
    101                    uint8* dst_bayer, int dst_stride_bayer,
    102                    uint32 dst_fourcc_bayer,
    103                    int width, int height) {
    104   if (height < 0) {
    105     height = -height;
    106     src_rgb = src_rgb + (height - 1) * src_stride_rgb;
    107     src_stride_rgb = -src_stride_rgb;
    108   }
    109   void (*ARGBToBayerRow)(const uint8* src_argb,
    110                          uint8* dst_bayer, uint32 selector, int pix);
    111 #if defined(HAS_ARGBTOBAYERROW_SSSE3)
    112   if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
    113       (width % 4 == 0) &&
    114       IS_ALIGNED(src_rgb, 16) && (src_stride_rgb % 16 == 0) &&
    115       IS_ALIGNED(dst_bayer, 4) && (dst_stride_bayer % 4 == 0)) {
    116     ARGBToBayerRow = ARGBToBayerRow_SSSE3;
    117   } else
    118 #endif
    119   {
    120     ARGBToBayerRow = ARGBToBayerRow_C;
    121   }
    122 
    123   int blue_index = 0;
    124   int green_index = 1;
    125   int red_index = 2;
    126 
    127   // Now build a lookup table containing the indices for the four pixels in each
    128   // 2x2 Bayer grid.
    129   uint32 index_map[2];
    130   switch (dst_fourcc_bayer) {
    131     default:
    132       assert(false);
    133     case FOURCC_RGGB:
    134       index_map[0] = GenerateSelector(red_index, green_index);
    135       index_map[1] = GenerateSelector(green_index, blue_index);
    136       break;
    137     case FOURCC_BGGR:
    138       index_map[0] = GenerateSelector(blue_index, green_index);
    139       index_map[1] = GenerateSelector(green_index, red_index);
    140       break;
    141     case FOURCC_GRBG:
    142       index_map[0] = GenerateSelector(green_index, red_index);
    143       index_map[1] = GenerateSelector(blue_index, green_index);
    144       break;
    145     case FOURCC_GBRG:
    146       index_map[0] = GenerateSelector(green_index, blue_index);
    147       index_map[1] = GenerateSelector(red_index, green_index);
    148       break;
    149   }
    150 
    151   // Now convert.
    152   for (int y = 0; y < height; ++y) {
    153     ARGBToBayerRow(src_rgb, dst_bayer, index_map[y & 1], width);
    154     src_rgb += src_stride_rgb;
    155     dst_bayer += dst_stride_bayer;
    156   }
    157   return 0;
    158 }
    159 
    160 #define AVG(a,b) (((a) + (b)) >> 1)
    161 
    162 static void BayerRowBG(const uint8* src_bayer0, int src_stride_bayer,
    163                        uint8* dst_rgb, int pix) {
    164   const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
    165   uint8 g = src_bayer0[1];
    166   uint8 r = src_bayer1[1];
    167   for (int x = 0; x < (pix - 2); x += 2) {
    168     dst_rgb[0] = src_bayer0[0];
    169     dst_rgb[1] = AVG(g, src_bayer0[1]);
    170     dst_rgb[2] = AVG(r, src_bayer1[1]);
    171     dst_rgb[3] = 255U;
    172     dst_rgb[4] = AVG(src_bayer0[0], src_bayer0[2]);
    173     dst_rgb[5] = src_bayer0[1];
    174     dst_rgb[6] = src_bayer1[1];
    175     dst_rgb[7] = 255U;
    176     g = src_bayer0[1];
    177     r = src_bayer1[1];
    178     src_bayer0 += 2;
    179     src_bayer1 += 2;
    180     dst_rgb += 8;
    181   }
    182   dst_rgb[0] = src_bayer0[0];
    183   dst_rgb[1] = AVG(g, src_bayer0[1]);
    184   dst_rgb[2] = AVG(r, src_bayer1[1]);
    185   dst_rgb[3] = 255U;
    186   dst_rgb[4] = src_bayer0[0];
    187   dst_rgb[5] = src_bayer0[1];
    188   dst_rgb[6] = src_bayer1[1];
    189   dst_rgb[7] = 255U;
    190 }
    191 
    192 static void BayerRowRG(const uint8* src_bayer0, int src_stride_bayer,
    193                        uint8* dst_rgb, int pix) {
    194   const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
    195   uint8 g = src_bayer0[1];
    196   uint8 b = src_bayer1[1];
    197   for (int x = 0; x < (pix - 2); x += 2) {
    198     dst_rgb[0] = AVG(b, src_bayer1[1]);
    199     dst_rgb[1] = AVG(g, src_bayer0[1]);
    200     dst_rgb[2] = src_bayer0[0];
    201     dst_rgb[3] = 255U;
    202     dst_rgb[4] = src_bayer1[1];
    203     dst_rgb[5] = src_bayer0[1];
    204     dst_rgb[6] = AVG(src_bayer0[0], src_bayer0[2]);
    205     dst_rgb[7] = 255U;
    206     g = src_bayer0[1];
    207     b = src_bayer1[1];
    208     src_bayer0 += 2;
    209     src_bayer1 += 2;
    210     dst_rgb += 8;
    211   }
    212   dst_rgb[0] = AVG(b, src_bayer1[1]);
    213   dst_rgb[1] = AVG(g, src_bayer0[1]);
    214   dst_rgb[2] = src_bayer0[0];
    215   dst_rgb[3] = 255U;
    216   dst_rgb[4] = src_bayer1[1];
    217   dst_rgb[5] = src_bayer0[1];
    218   dst_rgb[6] = src_bayer0[0];
    219   dst_rgb[7] = 255U;
    220 }
    221 
    222 static void BayerRowGB(const uint8* src_bayer0, int src_stride_bayer,
    223                        uint8* dst_rgb, int pix) {
    224   const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
    225   uint8 b = src_bayer0[1];
    226   for (int x = 0; x < (pix - 2); x += 2) {
    227     dst_rgb[0] = AVG(b, src_bayer0[1]);
    228     dst_rgb[1] = src_bayer0[0];
    229     dst_rgb[2] = src_bayer1[0];
    230     dst_rgb[3] = 255U;
    231     dst_rgb[4] = src_bayer0[1];
    232     dst_rgb[5] = AVG(src_bayer0[0], src_bayer0[2]);
    233     dst_rgb[6] = AVG(src_bayer1[0], src_bayer1[2]);
    234     dst_rgb[7] = 255U;
    235     b = src_bayer0[1];
    236     src_bayer0 += 2;
    237     src_bayer1 += 2;
    238     dst_rgb += 8;
    239   }
    240   dst_rgb[0] = AVG(b, src_bayer0[1]);
    241   dst_rgb[1] = src_bayer0[0];
    242   dst_rgb[2] = src_bayer1[0];
    243   dst_rgb[3] = 255U;
    244   dst_rgb[4] = src_bayer0[1];
    245   dst_rgb[5] = src_bayer0[0];
    246   dst_rgb[6] = src_bayer1[0];
    247   dst_rgb[7] = 255U;
    248 }
    249 
    250 static void BayerRowGR(const uint8* src_bayer0, int src_stride_bayer,
    251                        uint8* dst_rgb, int pix) {
    252   const uint8* src_bayer1 = src_bayer0 + src_stride_bayer;
    253   uint8 r = src_bayer0[1];
    254   for (int x = 0; x < (pix - 2); x += 2) {
    255     dst_rgb[0] = src_bayer1[0];
    256     dst_rgb[1] = src_bayer0[0];
    257     dst_rgb[2] = AVG(r, src_bayer0[1]);
    258     dst_rgb[3] = 255U;
    259     dst_rgb[4] = AVG(src_bayer1[0], src_bayer1[2]);
    260     dst_rgb[5] = AVG(src_bayer0[0], src_bayer0[2]);
    261     dst_rgb[6] = src_bayer0[1];
    262     dst_rgb[7] = 255U;
    263     r = src_bayer0[1];
    264     src_bayer0 += 2;
    265     src_bayer1 += 2;
    266     dst_rgb += 8;
    267   }
    268   dst_rgb[0] = src_bayer1[0];
    269   dst_rgb[1] = src_bayer0[0];
    270   dst_rgb[2] = AVG(r, src_bayer0[1]);
    271   dst_rgb[3] = 255U;
    272   dst_rgb[4] = src_bayer1[0];
    273   dst_rgb[5] = src_bayer0[0];
    274   dst_rgb[6] = src_bayer0[1];
    275   dst_rgb[7] = 255U;
    276 }
    277 
    278 // Converts any Bayer RGB format to ARGB.
    279 int BayerRGBToARGB(const uint8* src_bayer, int src_stride_bayer,
    280                    uint32 src_fourcc_bayer,
    281                    uint8* dst_rgb, int dst_stride_rgb,
    282                    int width, int height) {
    283   if (height < 0) {
    284     height = -height;
    285     dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb;
    286     dst_stride_rgb = -dst_stride_rgb;
    287   }
    288   void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer,
    289                     uint8* dst_rgb, int pix);
    290   void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
    291                     uint8* dst_rgb, int pix);
    292 
    293   switch (src_fourcc_bayer) {
    294     default:
    295       assert(false);
    296     case FOURCC_RGGB:
    297       BayerRow0 = BayerRowRG;
    298       BayerRow1 = BayerRowGB;
    299       break;
    300     case FOURCC_BGGR:
    301       BayerRow0 = BayerRowBG;
    302       BayerRow1 = BayerRowGR;
    303       break;
    304     case FOURCC_GRBG:
    305       BayerRow0 = BayerRowGR;
    306       BayerRow1 = BayerRowBG;
    307       break;
    308     case FOURCC_GBRG:
    309       BayerRow0 = BayerRowGB;
    310       BayerRow1 = BayerRowRG;
    311       break;
    312   }
    313 
    314   for (int y = 0; y < (height - 1); y += 2) {
    315     BayerRow0(src_bayer, src_stride_bayer, dst_rgb, width);
    316     BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
    317         dst_rgb + dst_stride_rgb, width);
    318     src_bayer += src_stride_bayer * 2;
    319     dst_rgb += dst_stride_rgb * 2;
    320   }
    321   if (height & 1) {
    322     BayerRow0(src_bayer, -src_stride_bayer, dst_rgb, width);
    323   }
    324   return 0;
    325 }
    326 
    327 // Converts any Bayer RGB format to ARGB.
    328 int BayerRGBToI420(const uint8* src_bayer, int src_stride_bayer,
    329                    uint32 src_fourcc_bayer,
    330                    uint8* dst_y, int dst_stride_y,
    331                    uint8* dst_u, int dst_stride_u,
    332                    uint8* dst_v, int dst_stride_v,
    333                    int width, int height) {
    334   if (width * 4 > kMaxStride) {
    335     return -1;
    336   }
    337   // Negative height means invert the image.
    338   if (height < 0) {
    339     height = -height;
    340     int halfheight = (height + 1) >> 1;
    341     dst_y = dst_y + (height - 1) * dst_stride_y;
    342     dst_u = dst_u + (halfheight - 1) * dst_stride_u;
    343     dst_v = dst_v + (halfheight - 1) * dst_stride_v;
    344     dst_stride_y = -dst_stride_y;
    345     dst_stride_u = -dst_stride_u;
    346     dst_stride_v = -dst_stride_v;
    347   }
    348   void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer,
    349                     uint8* dst_rgb, int pix);
    350   void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer,
    351                     uint8* dst_rgb, int pix);
    352   void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix);
    353   void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
    354                       uint8* dst_u, uint8* dst_v, int width);
    355   SIMD_ALIGNED(uint8 row[kMaxStride * 2]);
    356 
    357 #if defined(HAS_ARGBTOYROW_SSSE3)
    358   if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
    359       (width % 16 == 0) &&
    360       IS_ALIGNED(row, 16) && (kMaxStride % 16 == 0) &&
    361       IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) {
    362     ARGBToYRow = ARGBToYRow_SSSE3;
    363   } else
    364 #endif
    365   {
    366     ARGBToYRow = ARGBToYRow_C;
    367   }
    368 #if defined(HAS_ARGBTOUVROW_SSSE3)
    369   if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) &&
    370       (width % 16 == 0) &&
    371       IS_ALIGNED(row, 16) && (kMaxStride % 16 == 0) &&
    372       IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) &&
    373       IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) {
    374     ARGBToUVRow = ARGBToUVRow_SSSE3;
    375   } else
    376 #endif
    377   {
    378     ARGBToUVRow = ARGBToUVRow_C;
    379   }
    380 
    381   switch (src_fourcc_bayer) {
    382     default:
    383       assert(false);
    384     case FOURCC_RGGB:
    385       BayerRow0 = BayerRowRG;
    386       BayerRow1 = BayerRowGB;
    387       break;
    388     case FOURCC_BGGR:
    389       BayerRow0 = BayerRowBG;
    390       BayerRow1 = BayerRowGR;
    391       break;
    392     case FOURCC_GRBG:
    393       BayerRow0 = BayerRowGR;
    394       BayerRow1 = BayerRowBG;
    395       break;
    396     case FOURCC_GBRG:
    397       BayerRow0 = BayerRowGB;
    398       BayerRow1 = BayerRowRG;
    399       break;
    400   }
    401 
    402   for (int y = 0; y < (height - 1); y += 2) {
    403     BayerRow0(src_bayer, src_stride_bayer, row, width);
    404     BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer,
    405               row + kMaxStride, width);
    406     ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width);
    407     ARGBToYRow(row, dst_y, width);
    408     ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width);
    409     src_bayer += src_stride_bayer * 2;
    410     dst_y += dst_stride_y * 2;
    411     dst_u += dst_stride_u;
    412     dst_v += dst_stride_v;
    413   }
    414   // TODO(fbarchard): Make sure this filters properly
    415   if (height & 1) {
    416     BayerRow0(src_bayer, src_stride_bayer, row, width);
    417     ARGBToUVRow(row, 0, dst_u, dst_v, width);
    418     ARGBToYRow(row, dst_y, width);
    419   }
    420   return 0;
    421 }
    422 
    423 }  // namespace libyuv
    424