1 /* 2 * Copyright (c) 2011 The LibYuv project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <assert.h> 12 13 #include "libyuv/cpu_id.h" 14 #include "video_common.h" 15 #include "row.h" 16 17 #define kMaxStride (2048 * 4) 18 19 namespace libyuv { 20 21 // Note: to do this with Neon vld4.8 would load ARGB values into 4 registers 22 // and vst would select which 2 components to write. The low level would need 23 // to be ARGBToBG, ARGBToGB, ARGBToRG, ARGBToGR 24 25 #if defined(WIN32) && !defined(COVERAGE_ENABLED) 26 #define HAS_ARGBTOBAYERROW_SSSE3 27 __declspec(naked) 28 static void ARGBToBayerRow_SSSE3(const uint8* src_argb, 29 uint8* dst_bayer, uint32 selector, int pix) { 30 __asm { 31 mov eax, [esp + 4] // src_argb 32 mov edx, [esp + 8] // dst_bayer 33 movd xmm7, [esp + 12] // selector 34 mov ecx, [esp + 16] // pix 35 pshufd xmm7, xmm7, 0 36 37 wloop: 38 movdqa xmm0, [eax] 39 lea eax, [eax + 16] 40 pshufb xmm0, xmm7 41 movd [edx], xmm0 42 lea edx, [edx + 4] 43 sub ecx, 4 44 ja wloop 45 ret 46 } 47 } 48 49 #elif (defined(__x86_64__) || defined(__i386__)) && \ 50 !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR) 51 52 #define HAS_ARGBTOBAYERROW_SSSE3 53 static void ARGBToBayerRow_SSSE3(const uint8* src_argb, uint8* dst_bayer, 54 uint32 selector, int pix) { 55 asm volatile( 56 "movd %3,%%xmm7\n" 57 "pshufd $0x0,%%xmm7,%%xmm7\n" 58 "1:" 59 "movdqa (%0),%%xmm0\n" 60 "lea 0x10(%0),%0\n" 61 "pshufb %%xmm7,%%xmm0\n" 62 "movd %%xmm0,(%1)\n" 63 "lea 0x4(%1),%1\n" 64 "sub $0x4,%2\n" 65 "ja 1b\n" 66 : "+r"(src_argb), // %0 67 "+r"(dst_bayer), // %1 68 "+r"(pix) // %2 69 : "r"(selector) // %3 70 : "memory" 71 ); 72 } 73 #endif 74 75 static void ARGBToBayerRow_C(const uint8* src_argb, 76 uint8* dst_bayer, uint32 selector, int pix) { 77 int index0 = selector & 0xff; 78 int index1 = (selector >> 8) & 0xff; 79 // Copy a row of Bayer. 80 for (int x = 0; x < (pix - 1); x += 2) { 81 dst_bayer[0] = src_argb[index0]; 82 dst_bayer[1] = src_argb[index1]; 83 src_argb += 8; 84 dst_bayer += 2; 85 } 86 if (pix & 1) { 87 dst_bayer[0] = src_argb[index0]; 88 } 89 } 90 91 // generate a selector mask useful for pshufb 92 static uint32 GenerateSelector(int select0, int select1) { 93 return static_cast<uint32>(select0) | 94 static_cast<uint32>((select1 + 4) << 8) | 95 static_cast<uint32>((select0 + 8) << 16) | 96 static_cast<uint32>((select1 + 12) << 24); 97 } 98 99 // Converts 32 bit ARGB to any Bayer RGB format. 100 int ARGBToBayerRGB(const uint8* src_rgb, int src_stride_rgb, 101 uint8* dst_bayer, int dst_stride_bayer, 102 uint32 dst_fourcc_bayer, 103 int width, int height) { 104 if (height < 0) { 105 height = -height; 106 src_rgb = src_rgb + (height - 1) * src_stride_rgb; 107 src_stride_rgb = -src_stride_rgb; 108 } 109 void (*ARGBToBayerRow)(const uint8* src_argb, 110 uint8* dst_bayer, uint32 selector, int pix); 111 #if defined(HAS_ARGBTOBAYERROW_SSSE3) 112 if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) && 113 (width % 4 == 0) && 114 IS_ALIGNED(src_rgb, 16) && (src_stride_rgb % 16 == 0) && 115 IS_ALIGNED(dst_bayer, 4) && (dst_stride_bayer % 4 == 0)) { 116 ARGBToBayerRow = ARGBToBayerRow_SSSE3; 117 } else 118 #endif 119 { 120 ARGBToBayerRow = ARGBToBayerRow_C; 121 } 122 123 int blue_index = 0; 124 int green_index = 1; 125 int red_index = 2; 126 127 // Now build a lookup table containing the indices for the four pixels in each 128 // 2x2 Bayer grid. 129 uint32 index_map[2]; 130 switch (dst_fourcc_bayer) { 131 default: 132 assert(false); 133 case FOURCC_RGGB: 134 index_map[0] = GenerateSelector(red_index, green_index); 135 index_map[1] = GenerateSelector(green_index, blue_index); 136 break; 137 case FOURCC_BGGR: 138 index_map[0] = GenerateSelector(blue_index, green_index); 139 index_map[1] = GenerateSelector(green_index, red_index); 140 break; 141 case FOURCC_GRBG: 142 index_map[0] = GenerateSelector(green_index, red_index); 143 index_map[1] = GenerateSelector(blue_index, green_index); 144 break; 145 case FOURCC_GBRG: 146 index_map[0] = GenerateSelector(green_index, blue_index); 147 index_map[1] = GenerateSelector(red_index, green_index); 148 break; 149 } 150 151 // Now convert. 152 for (int y = 0; y < height; ++y) { 153 ARGBToBayerRow(src_rgb, dst_bayer, index_map[y & 1], width); 154 src_rgb += src_stride_rgb; 155 dst_bayer += dst_stride_bayer; 156 } 157 return 0; 158 } 159 160 #define AVG(a,b) (((a) + (b)) >> 1) 161 162 static void BayerRowBG(const uint8* src_bayer0, int src_stride_bayer, 163 uint8* dst_rgb, int pix) { 164 const uint8* src_bayer1 = src_bayer0 + src_stride_bayer; 165 uint8 g = src_bayer0[1]; 166 uint8 r = src_bayer1[1]; 167 for (int x = 0; x < (pix - 2); x += 2) { 168 dst_rgb[0] = src_bayer0[0]; 169 dst_rgb[1] = AVG(g, src_bayer0[1]); 170 dst_rgb[2] = AVG(r, src_bayer1[1]); 171 dst_rgb[3] = 255U; 172 dst_rgb[4] = AVG(src_bayer0[0], src_bayer0[2]); 173 dst_rgb[5] = src_bayer0[1]; 174 dst_rgb[6] = src_bayer1[1]; 175 dst_rgb[7] = 255U; 176 g = src_bayer0[1]; 177 r = src_bayer1[1]; 178 src_bayer0 += 2; 179 src_bayer1 += 2; 180 dst_rgb += 8; 181 } 182 dst_rgb[0] = src_bayer0[0]; 183 dst_rgb[1] = AVG(g, src_bayer0[1]); 184 dst_rgb[2] = AVG(r, src_bayer1[1]); 185 dst_rgb[3] = 255U; 186 dst_rgb[4] = src_bayer0[0]; 187 dst_rgb[5] = src_bayer0[1]; 188 dst_rgb[6] = src_bayer1[1]; 189 dst_rgb[7] = 255U; 190 } 191 192 static void BayerRowRG(const uint8* src_bayer0, int src_stride_bayer, 193 uint8* dst_rgb, int pix) { 194 const uint8* src_bayer1 = src_bayer0 + src_stride_bayer; 195 uint8 g = src_bayer0[1]; 196 uint8 b = src_bayer1[1]; 197 for (int x = 0; x < (pix - 2); x += 2) { 198 dst_rgb[0] = AVG(b, src_bayer1[1]); 199 dst_rgb[1] = AVG(g, src_bayer0[1]); 200 dst_rgb[2] = src_bayer0[0]; 201 dst_rgb[3] = 255U; 202 dst_rgb[4] = src_bayer1[1]; 203 dst_rgb[5] = src_bayer0[1]; 204 dst_rgb[6] = AVG(src_bayer0[0], src_bayer0[2]); 205 dst_rgb[7] = 255U; 206 g = src_bayer0[1]; 207 b = src_bayer1[1]; 208 src_bayer0 += 2; 209 src_bayer1 += 2; 210 dst_rgb += 8; 211 } 212 dst_rgb[0] = AVG(b, src_bayer1[1]); 213 dst_rgb[1] = AVG(g, src_bayer0[1]); 214 dst_rgb[2] = src_bayer0[0]; 215 dst_rgb[3] = 255U; 216 dst_rgb[4] = src_bayer1[1]; 217 dst_rgb[5] = src_bayer0[1]; 218 dst_rgb[6] = src_bayer0[0]; 219 dst_rgb[7] = 255U; 220 } 221 222 static void BayerRowGB(const uint8* src_bayer0, int src_stride_bayer, 223 uint8* dst_rgb, int pix) { 224 const uint8* src_bayer1 = src_bayer0 + src_stride_bayer; 225 uint8 b = src_bayer0[1]; 226 for (int x = 0; x < (pix - 2); x += 2) { 227 dst_rgb[0] = AVG(b, src_bayer0[1]); 228 dst_rgb[1] = src_bayer0[0]; 229 dst_rgb[2] = src_bayer1[0]; 230 dst_rgb[3] = 255U; 231 dst_rgb[4] = src_bayer0[1]; 232 dst_rgb[5] = AVG(src_bayer0[0], src_bayer0[2]); 233 dst_rgb[6] = AVG(src_bayer1[0], src_bayer1[2]); 234 dst_rgb[7] = 255U; 235 b = src_bayer0[1]; 236 src_bayer0 += 2; 237 src_bayer1 += 2; 238 dst_rgb += 8; 239 } 240 dst_rgb[0] = AVG(b, src_bayer0[1]); 241 dst_rgb[1] = src_bayer0[0]; 242 dst_rgb[2] = src_bayer1[0]; 243 dst_rgb[3] = 255U; 244 dst_rgb[4] = src_bayer0[1]; 245 dst_rgb[5] = src_bayer0[0]; 246 dst_rgb[6] = src_bayer1[0]; 247 dst_rgb[7] = 255U; 248 } 249 250 static void BayerRowGR(const uint8* src_bayer0, int src_stride_bayer, 251 uint8* dst_rgb, int pix) { 252 const uint8* src_bayer1 = src_bayer0 + src_stride_bayer; 253 uint8 r = src_bayer0[1]; 254 for (int x = 0; x < (pix - 2); x += 2) { 255 dst_rgb[0] = src_bayer1[0]; 256 dst_rgb[1] = src_bayer0[0]; 257 dst_rgb[2] = AVG(r, src_bayer0[1]); 258 dst_rgb[3] = 255U; 259 dst_rgb[4] = AVG(src_bayer1[0], src_bayer1[2]); 260 dst_rgb[5] = AVG(src_bayer0[0], src_bayer0[2]); 261 dst_rgb[6] = src_bayer0[1]; 262 dst_rgb[7] = 255U; 263 r = src_bayer0[1]; 264 src_bayer0 += 2; 265 src_bayer1 += 2; 266 dst_rgb += 8; 267 } 268 dst_rgb[0] = src_bayer1[0]; 269 dst_rgb[1] = src_bayer0[0]; 270 dst_rgb[2] = AVG(r, src_bayer0[1]); 271 dst_rgb[3] = 255U; 272 dst_rgb[4] = src_bayer1[0]; 273 dst_rgb[5] = src_bayer0[0]; 274 dst_rgb[6] = src_bayer0[1]; 275 dst_rgb[7] = 255U; 276 } 277 278 // Converts any Bayer RGB format to ARGB. 279 int BayerRGBToARGB(const uint8* src_bayer, int src_stride_bayer, 280 uint32 src_fourcc_bayer, 281 uint8* dst_rgb, int dst_stride_rgb, 282 int width, int height) { 283 if (height < 0) { 284 height = -height; 285 dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb; 286 dst_stride_rgb = -dst_stride_rgb; 287 } 288 void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer, 289 uint8* dst_rgb, int pix); 290 void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer, 291 uint8* dst_rgb, int pix); 292 293 switch (src_fourcc_bayer) { 294 default: 295 assert(false); 296 case FOURCC_RGGB: 297 BayerRow0 = BayerRowRG; 298 BayerRow1 = BayerRowGB; 299 break; 300 case FOURCC_BGGR: 301 BayerRow0 = BayerRowBG; 302 BayerRow1 = BayerRowGR; 303 break; 304 case FOURCC_GRBG: 305 BayerRow0 = BayerRowGR; 306 BayerRow1 = BayerRowBG; 307 break; 308 case FOURCC_GBRG: 309 BayerRow0 = BayerRowGB; 310 BayerRow1 = BayerRowRG; 311 break; 312 } 313 314 for (int y = 0; y < (height - 1); y += 2) { 315 BayerRow0(src_bayer, src_stride_bayer, dst_rgb, width); 316 BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer, 317 dst_rgb + dst_stride_rgb, width); 318 src_bayer += src_stride_bayer * 2; 319 dst_rgb += dst_stride_rgb * 2; 320 } 321 if (height & 1) { 322 BayerRow0(src_bayer, -src_stride_bayer, dst_rgb, width); 323 } 324 return 0; 325 } 326 327 // Converts any Bayer RGB format to ARGB. 328 int BayerRGBToI420(const uint8* src_bayer, int src_stride_bayer, 329 uint32 src_fourcc_bayer, 330 uint8* dst_y, int dst_stride_y, 331 uint8* dst_u, int dst_stride_u, 332 uint8* dst_v, int dst_stride_v, 333 int width, int height) { 334 if (width * 4 > kMaxStride) { 335 return -1; 336 } 337 // Negative height means invert the image. 338 if (height < 0) { 339 height = -height; 340 int halfheight = (height + 1) >> 1; 341 dst_y = dst_y + (height - 1) * dst_stride_y; 342 dst_u = dst_u + (halfheight - 1) * dst_stride_u; 343 dst_v = dst_v + (halfheight - 1) * dst_stride_v; 344 dst_stride_y = -dst_stride_y; 345 dst_stride_u = -dst_stride_u; 346 dst_stride_v = -dst_stride_v; 347 } 348 void (*BayerRow0)(const uint8* src_bayer, int src_stride_bayer, 349 uint8* dst_rgb, int pix); 350 void (*BayerRow1)(const uint8* src_bayer, int src_stride_bayer, 351 uint8* dst_rgb, int pix); 352 void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix); 353 void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb, 354 uint8* dst_u, uint8* dst_v, int width); 355 SIMD_ALIGNED(uint8 row[kMaxStride * 2]); 356 357 #if defined(HAS_ARGBTOYROW_SSSE3) 358 if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) && 359 (width % 16 == 0) && 360 IS_ALIGNED(row, 16) && (kMaxStride % 16 == 0) && 361 IS_ALIGNED(dst_y, 16) && (dst_stride_y % 16 == 0)) { 362 ARGBToYRow = ARGBToYRow_SSSE3; 363 } else 364 #endif 365 { 366 ARGBToYRow = ARGBToYRow_C; 367 } 368 #if defined(HAS_ARGBTOUVROW_SSSE3) 369 if (libyuv::TestCpuFlag(libyuv::kCpuHasSSSE3) && 370 (width % 16 == 0) && 371 IS_ALIGNED(row, 16) && (kMaxStride % 16 == 0) && 372 IS_ALIGNED(dst_u, 8) && (dst_stride_u % 8 == 0) && 373 IS_ALIGNED(dst_v, 8) && (dst_stride_v % 8 == 0)) { 374 ARGBToUVRow = ARGBToUVRow_SSSE3; 375 } else 376 #endif 377 { 378 ARGBToUVRow = ARGBToUVRow_C; 379 } 380 381 switch (src_fourcc_bayer) { 382 default: 383 assert(false); 384 case FOURCC_RGGB: 385 BayerRow0 = BayerRowRG; 386 BayerRow1 = BayerRowGB; 387 break; 388 case FOURCC_BGGR: 389 BayerRow0 = BayerRowBG; 390 BayerRow1 = BayerRowGR; 391 break; 392 case FOURCC_GRBG: 393 BayerRow0 = BayerRowGR; 394 BayerRow1 = BayerRowBG; 395 break; 396 case FOURCC_GBRG: 397 BayerRow0 = BayerRowGB; 398 BayerRow1 = BayerRowRG; 399 break; 400 } 401 402 for (int y = 0; y < (height - 1); y += 2) { 403 BayerRow0(src_bayer, src_stride_bayer, row, width); 404 BayerRow1(src_bayer + src_stride_bayer, -src_stride_bayer, 405 row + kMaxStride, width); 406 ARGBToUVRow(row, kMaxStride, dst_u, dst_v, width); 407 ARGBToYRow(row, dst_y, width); 408 ARGBToYRow(row + kMaxStride, dst_y + dst_stride_y, width); 409 src_bayer += src_stride_bayer * 2; 410 dst_y += dst_stride_y * 2; 411 dst_u += dst_stride_u; 412 dst_v += dst_stride_v; 413 } 414 // TODO(fbarchard): Make sure this filters properly 415 if (height & 1) { 416 BayerRow0(src_bayer, src_stride_bayer, row, width); 417 ARGBToUVRow(row, 0, dst_u, dst_v, width); 418 ARGBToYRow(row, dst_y, width); 419 } 420 return 0; 421 } 422 423 } // namespace libyuv 424