1 /************************************************************************** 2 * 3 * Copyright 2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 20 * USE OR OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * The above copyright notice and this permission notice (including the 23 * next paragraph) shall be included in all copies or substantial portions 24 * of the Software. 25 * 26 **************************************************************************/ 27 28 29 /** 30 * @file 31 * YUV pixel format manipulation. 32 * 33 * @author Jose Fonseca <jfonseca (at) vmware.com> 34 */ 35 36 37 #include "util/u_format.h" 38 #include "util/u_cpu_detect.h" 39 40 #include "lp_bld_arit.h" 41 #include "lp_bld_type.h" 42 #include "lp_bld_const.h" 43 #include "lp_bld_conv.h" 44 #include "lp_bld_gather.h" 45 #include "lp_bld_format.h" 46 #include "lp_bld_init.h" 47 #include "lp_bld_logic.h" 48 49 /** 50 * Extract Y, U, V channels from packed UYVY. 51 * @param packed is a <n x i32> vector with the packed UYVY blocks 52 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) 53 */ 54 static void 55 uyvy_to_yuv_soa(struct gallivm_state *gallivm, 56 unsigned n, 57 LLVMValueRef packed, 58 LLVMValueRef i, 59 LLVMValueRef *y, 60 LLVMValueRef *u, 61 LLVMValueRef *v) 62 { 63 LLVMBuilderRef builder = gallivm->builder; 64 struct lp_type type; 65 LLVMValueRef mask; 66 67 memset(&type, 0, sizeof type); 68 type.width = 32; 69 type.length = n; 70 71 assert(lp_check_value(type, packed)); 72 assert(lp_check_value(type, i)); 73 74 /* 75 * Little endian: 76 * y = (uyvy >> (16*i + 8)) & 0xff 77 * u = (uyvy ) & 0xff 78 * v = (uyvy >> 16 ) & 0xff 79 * 80 * Big endian: 81 * y = (uyvy >> (-16*i + 16)) & 0xff 82 * u = (uyvy >> 24) & 0xff 83 * v = (uyvy >> 8) & 0xff 84 */ 85 86 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 87 /* 88 * Avoid shift with per-element count. 89 * No support on x86, gets translated to roughly 5 instructions 90 * per element. Didn't measure performance but cuts shader size 91 * by quite a bit (less difference if cpu has no sse4.1 support). 92 */ 93 if (util_cpu_caps.has_sse2 && n > 1) { 94 LLVMValueRef sel, tmp, tmp2; 95 struct lp_build_context bld32; 96 97 lp_build_context_init(&bld32, gallivm, type); 98 99 tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), ""); 100 tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), ""); 101 sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0)); 102 *y = lp_build_select(&bld32, sel, tmp, tmp2); 103 } else 104 #endif 105 { 106 LLVMValueRef shift; 107 #ifdef PIPE_ARCH_LITTLE_ENDIAN 108 shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), ""); 109 shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), ""); 110 #else 111 shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), ""); 112 shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 16), ""); 113 #endif 114 *y = LLVMBuildLShr(builder, packed, shift, ""); 115 } 116 117 #ifdef PIPE_ARCH_LITTLE_ENDIAN 118 *u = packed; 119 *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), ""); 120 #else 121 *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), ""); 122 *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), ""); 123 #endif 124 125 mask = lp_build_const_int_vec(gallivm, type, 0xff); 126 127 *y = LLVMBuildAnd(builder, *y, mask, "y"); 128 *u = LLVMBuildAnd(builder, *u, mask, "u"); 129 *v = LLVMBuildAnd(builder, *v, mask, "v"); 130 } 131 132 133 /** 134 * Extract Y, U, V channels from packed YUYV. 135 * @param packed is a <n x i32> vector with the packed YUYV blocks 136 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) 137 */ 138 static void 139 yuyv_to_yuv_soa(struct gallivm_state *gallivm, 140 unsigned n, 141 LLVMValueRef packed, 142 LLVMValueRef i, 143 LLVMValueRef *y, 144 LLVMValueRef *u, 145 LLVMValueRef *v) 146 { 147 LLVMBuilderRef builder = gallivm->builder; 148 struct lp_type type; 149 LLVMValueRef mask; 150 151 memset(&type, 0, sizeof type); 152 type.width = 32; 153 type.length = n; 154 155 assert(lp_check_value(type, packed)); 156 assert(lp_check_value(type, i)); 157 158 /* 159 * Little endian: 160 * y = (yuyv >> 16*i) & 0xff 161 * u = (yuyv >> 8 ) & 0xff 162 * v = (yuyv >> 24 ) & 0xff 163 * 164 * Big endian: 165 * y = (yuyv >> (-16*i + 24) & 0xff 166 * u = (yuyv >> 16) & 0xff 167 * v = (yuyv) & 0xff 168 */ 169 170 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 171 /* 172 * Avoid shift with per-element count. 173 * No support on x86, gets translated to roughly 5 instructions 174 * per element. Didn't measure performance but cuts shader size 175 * by quite a bit (less difference if cpu has no sse4.1 support). 176 */ 177 if (util_cpu_caps.has_sse2 && n > 1) { 178 LLVMValueRef sel, tmp; 179 struct lp_build_context bld32; 180 181 lp_build_context_init(&bld32, gallivm, type); 182 183 tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), ""); 184 sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0)); 185 *y = lp_build_select(&bld32, sel, packed, tmp); 186 } else 187 #endif 188 { 189 LLVMValueRef shift; 190 #ifdef PIPE_ARCH_LITTLE_ENDIAN 191 shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), ""); 192 #else 193 shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, -16), ""); 194 shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 24), ""); 195 #endif 196 *y = LLVMBuildLShr(builder, packed, shift, ""); 197 } 198 199 #ifdef PIPE_ARCH_LITTLE_ENDIAN 200 *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), ""); 201 *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), ""); 202 #else 203 *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), ""); 204 *v = packed; 205 #endif 206 207 mask = lp_build_const_int_vec(gallivm, type, 0xff); 208 209 *y = LLVMBuildAnd(builder, *y, mask, "y"); 210 *u = LLVMBuildAnd(builder, *u, mask, "u"); 211 *v = LLVMBuildAnd(builder, *v, mask, "v"); 212 } 213 214 215 static inline void 216 yuv_to_rgb_soa(struct gallivm_state *gallivm, 217 unsigned n, 218 LLVMValueRef y, LLVMValueRef u, LLVMValueRef v, 219 LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b) 220 { 221 LLVMBuilderRef builder = gallivm->builder; 222 struct lp_type type; 223 struct lp_build_context bld; 224 225 LLVMValueRef c0; 226 LLVMValueRef c8; 227 LLVMValueRef c16; 228 LLVMValueRef c128; 229 LLVMValueRef c255; 230 231 LLVMValueRef cy; 232 LLVMValueRef cug; 233 LLVMValueRef cub; 234 LLVMValueRef cvr; 235 LLVMValueRef cvg; 236 237 memset(&type, 0, sizeof type); 238 type.sign = TRUE; 239 type.width = 32; 240 type.length = n; 241 242 lp_build_context_init(&bld, gallivm, type); 243 244 assert(lp_check_value(type, y)); 245 assert(lp_check_value(type, u)); 246 assert(lp_check_value(type, v)); 247 248 /* 249 * Constants 250 */ 251 252 c0 = lp_build_const_int_vec(gallivm, type, 0); 253 c8 = lp_build_const_int_vec(gallivm, type, 8); 254 c16 = lp_build_const_int_vec(gallivm, type, 16); 255 c128 = lp_build_const_int_vec(gallivm, type, 128); 256 c255 = lp_build_const_int_vec(gallivm, type, 255); 257 258 cy = lp_build_const_int_vec(gallivm, type, 298); 259 cug = lp_build_const_int_vec(gallivm, type, -100); 260 cub = lp_build_const_int_vec(gallivm, type, 516); 261 cvr = lp_build_const_int_vec(gallivm, type, 409); 262 cvg = lp_build_const_int_vec(gallivm, type, -208); 263 264 /* 265 * y -= 16; 266 * u -= 128; 267 * v -= 128; 268 */ 269 270 y = LLVMBuildSub(builder, y, c16, ""); 271 u = LLVMBuildSub(builder, u, c128, ""); 272 v = LLVMBuildSub(builder, v, c128, ""); 273 274 /* 275 * r = 298 * _y + 409 * _v + 128; 276 * g = 298 * _y - 100 * _u - 208 * _v + 128; 277 * b = 298 * _y + 516 * _u + 128; 278 */ 279 280 y = LLVMBuildMul(builder, y, cy, ""); 281 y = LLVMBuildAdd(builder, y, c128, ""); 282 283 *r = LLVMBuildMul(builder, v, cvr, ""); 284 *g = LLVMBuildAdd(builder, 285 LLVMBuildMul(builder, u, cug, ""), 286 LLVMBuildMul(builder, v, cvg, ""), 287 ""); 288 *b = LLVMBuildMul(builder, u, cub, ""); 289 290 *r = LLVMBuildAdd(builder, *r, y, ""); 291 *g = LLVMBuildAdd(builder, *g, y, ""); 292 *b = LLVMBuildAdd(builder, *b, y, ""); 293 294 /* 295 * r >>= 8; 296 * g >>= 8; 297 * b >>= 8; 298 */ 299 300 *r = LLVMBuildAShr(builder, *r, c8, "r"); 301 *g = LLVMBuildAShr(builder, *g, c8, "g"); 302 *b = LLVMBuildAShr(builder, *b, c8, "b"); 303 304 /* 305 * Clamp 306 */ 307 308 *r = lp_build_clamp(&bld, *r, c0, c255); 309 *g = lp_build_clamp(&bld, *g, c0, c255); 310 *b = lp_build_clamp(&bld, *b, c0, c255); 311 } 312 313 314 static LLVMValueRef 315 rgb_to_rgba_aos(struct gallivm_state *gallivm, 316 unsigned n, 317 LLVMValueRef r, LLVMValueRef g, LLVMValueRef b) 318 { 319 LLVMBuilderRef builder = gallivm->builder; 320 struct lp_type type; 321 LLVMValueRef a; 322 LLVMValueRef rgba; 323 324 memset(&type, 0, sizeof type); 325 type.sign = TRUE; 326 type.width = 32; 327 type.length = n; 328 329 assert(lp_check_value(type, r)); 330 assert(lp_check_value(type, g)); 331 assert(lp_check_value(type, b)); 332 333 /* 334 * Make a 4 x unorm8 vector 335 */ 336 337 #ifdef PIPE_ARCH_LITTLE_ENDIAN 338 r = r; 339 g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), ""); 340 b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), ""); 341 a = lp_build_const_int_vec(gallivm, type, 0xff000000); 342 #else 343 r = LLVMBuildShl(builder, r, lp_build_const_int_vec(gallivm, type, 24), ""); 344 g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 16), ""); 345 b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 8), ""); 346 a = lp_build_const_int_vec(gallivm, type, 0x000000ff); 347 #endif 348 349 rgba = r; 350 rgba = LLVMBuildOr(builder, rgba, g, ""); 351 rgba = LLVMBuildOr(builder, rgba, b, ""); 352 rgba = LLVMBuildOr(builder, rgba, a, ""); 353 354 rgba = LLVMBuildBitCast(builder, rgba, 355 LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), ""); 356 357 return rgba; 358 } 359 360 361 /** 362 * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS 363 */ 364 static LLVMValueRef 365 uyvy_to_rgba_aos(struct gallivm_state *gallivm, 366 unsigned n, 367 LLVMValueRef packed, 368 LLVMValueRef i) 369 { 370 LLVMValueRef y, u, v; 371 LLVMValueRef r, g, b; 372 LLVMValueRef rgba; 373 374 uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v); 375 yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b); 376 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 377 378 return rgba; 379 } 380 381 382 /** 383 * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS 384 */ 385 static LLVMValueRef 386 yuyv_to_rgba_aos(struct gallivm_state *gallivm, 387 unsigned n, 388 LLVMValueRef packed, 389 LLVMValueRef i) 390 { 391 LLVMValueRef y, u, v; 392 LLVMValueRef r, g, b; 393 LLVMValueRef rgba; 394 395 yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v); 396 yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b); 397 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 398 399 return rgba; 400 } 401 402 403 /** 404 * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS 405 */ 406 static LLVMValueRef 407 rgbg_to_rgba_aos(struct gallivm_state *gallivm, 408 unsigned n, 409 LLVMValueRef packed, 410 LLVMValueRef i) 411 { 412 LLVMValueRef r, g, b; 413 LLVMValueRef rgba; 414 415 uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b); 416 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 417 418 return rgba; 419 } 420 421 422 /** 423 * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS 424 */ 425 static LLVMValueRef 426 grgb_to_rgba_aos(struct gallivm_state *gallivm, 427 unsigned n, 428 LLVMValueRef packed, 429 LLVMValueRef i) 430 { 431 LLVMValueRef r, g, b; 432 LLVMValueRef rgba; 433 434 yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b); 435 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 436 437 return rgba; 438 } 439 440 /** 441 * Convert from <n x i32> packed GR_BR to <4n x i8> RGBA AoS 442 */ 443 static LLVMValueRef 444 grbr_to_rgba_aos(struct gallivm_state *gallivm, 445 unsigned n, 446 LLVMValueRef packed, 447 LLVMValueRef i) 448 { 449 LLVMValueRef r, g, b; 450 LLVMValueRef rgba; 451 452 uyvy_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b); 453 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 454 455 return rgba; 456 } 457 458 459 /** 460 * Convert from <n x i32> packed RG_RB to <4n x i8> RGBA AoS 461 */ 462 static LLVMValueRef 463 rgrb_to_rgba_aos(struct gallivm_state *gallivm, 464 unsigned n, 465 LLVMValueRef packed, 466 LLVMValueRef i) 467 { 468 LLVMValueRef r, g, b; 469 LLVMValueRef rgba; 470 471 yuyv_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b); 472 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 473 474 return rgba; 475 } 476 477 /** 478 * @param n is the number of pixels processed 479 * @param packed is a <n x i32> vector with the packed YUYV blocks 480 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) 481 * @return a <4*n x i8> vector with the pixel RGBA values in AoS 482 */ 483 LLVMValueRef 484 lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm, 485 const struct util_format_description *format_desc, 486 unsigned n, 487 LLVMValueRef base_ptr, 488 LLVMValueRef offset, 489 LLVMValueRef i, 490 LLVMValueRef j) 491 { 492 LLVMValueRef packed; 493 LLVMValueRef rgba; 494 struct lp_type fetch_type; 495 496 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED); 497 assert(format_desc->block.bits == 32); 498 assert(format_desc->block.width == 2); 499 assert(format_desc->block.height == 1); 500 501 fetch_type = lp_type_uint(32); 502 packed = lp_build_gather(gallivm, n, 32, fetch_type, TRUE, base_ptr, offset, FALSE); 503 504 (void)j; 505 506 switch (format_desc->format) { 507 case PIPE_FORMAT_UYVY: 508 rgba = uyvy_to_rgba_aos(gallivm, n, packed, i); 509 break; 510 case PIPE_FORMAT_YUYV: 511 rgba = yuyv_to_rgba_aos(gallivm, n, packed, i); 512 break; 513 case PIPE_FORMAT_R8G8_B8G8_UNORM: 514 rgba = rgbg_to_rgba_aos(gallivm, n, packed, i); 515 break; 516 case PIPE_FORMAT_G8R8_G8B8_UNORM: 517 rgba = grgb_to_rgba_aos(gallivm, n, packed, i); 518 break; 519 case PIPE_FORMAT_G8R8_B8R8_UNORM: 520 rgba = grbr_to_rgba_aos(gallivm, n, packed, i); 521 break; 522 case PIPE_FORMAT_R8G8_R8B8_UNORM: 523 rgba = rgrb_to_rgba_aos(gallivm, n, packed, i); 524 break; 525 default: 526 assert(0); 527 rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n)); 528 break; 529 } 530 531 return rgba; 532 } 533 534