1 /************************************************************************** 2 * 3 * Copyright 2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 20 * USE OR OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * The above copyright notice and this permission notice (including the 23 * next paragraph) shall be included in all copies or substantial portions 24 * of the Software. 25 * 26 **************************************************************************/ 27 28 29 /** 30 * @file 31 * YUV pixel format manipulation. 32 * 33 * @author Jose Fonseca <jfonseca (at) vmware.com> 34 */ 35 36 37 #include "util/u_format.h" 38 #include "util/u_cpu_detect.h" 39 40 #include "lp_bld_arit.h" 41 #include "lp_bld_type.h" 42 #include "lp_bld_const.h" 43 #include "lp_bld_conv.h" 44 #include "lp_bld_gather.h" 45 #include "lp_bld_format.h" 46 #include "lp_bld_init.h" 47 #include "lp_bld_logic.h" 48 49 /** 50 * Extract Y, U, V channels from packed UYVY. 51 * @param packed is a <n x i32> vector with the packed UYVY blocks 52 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) 53 */ 54 static void 55 uyvy_to_yuv_soa(struct gallivm_state *gallivm, 56 unsigned n, 57 LLVMValueRef packed, 58 LLVMValueRef i, 59 LLVMValueRef *y, 60 LLVMValueRef *u, 61 LLVMValueRef *v) 62 { 63 LLVMBuilderRef builder = gallivm->builder; 64 struct lp_type type; 65 LLVMValueRef mask; 66 67 memset(&type, 0, sizeof type); 68 type.width = 32; 69 type.length = n; 70 71 assert(lp_check_value(type, packed)); 72 assert(lp_check_value(type, i)); 73 74 /* 75 * y = (uyvy >> (16*i + 8)) & 0xff 76 * u = (uyvy ) & 0xff 77 * v = (uyvy >> 16 ) & 0xff 78 */ 79 80 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 81 /* 82 * Avoid shift with per-element count. 83 * No support on x86, gets translated to roughly 5 instructions 84 * per element. Didn't measure performance but cuts shader size 85 * by quite a bit (less difference if cpu has no sse4.1 support). 86 */ 87 if (util_cpu_caps.has_sse2 && n > 1) { 88 LLVMValueRef sel, tmp, tmp2; 89 struct lp_build_context bld32; 90 91 lp_build_context_init(&bld32, gallivm, type); 92 93 tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), ""); 94 tmp2 = LLVMBuildLShr(builder, tmp, lp_build_const_int_vec(gallivm, type, 16), ""); 95 sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0)); 96 *y = lp_build_select(&bld32, sel, tmp, tmp2); 97 } else 98 #endif 99 { 100 LLVMValueRef shift; 101 shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), ""); 102 shift = LLVMBuildAdd(builder, shift, lp_build_const_int_vec(gallivm, type, 8), ""); 103 *y = LLVMBuildLShr(builder, packed, shift, ""); 104 } 105 106 *u = packed; 107 *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), ""); 108 109 mask = lp_build_const_int_vec(gallivm, type, 0xff); 110 111 *y = LLVMBuildAnd(builder, *y, mask, "y"); 112 *u = LLVMBuildAnd(builder, *u, mask, "u"); 113 *v = LLVMBuildAnd(builder, *v, mask, "v"); 114 } 115 116 117 /** 118 * Extract Y, U, V channels from packed YUYV. 119 * @param packed is a <n x i32> vector with the packed YUYV blocks 120 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) 121 */ 122 static void 123 yuyv_to_yuv_soa(struct gallivm_state *gallivm, 124 unsigned n, 125 LLVMValueRef packed, 126 LLVMValueRef i, 127 LLVMValueRef *y, 128 LLVMValueRef *u, 129 LLVMValueRef *v) 130 { 131 LLVMBuilderRef builder = gallivm->builder; 132 struct lp_type type; 133 LLVMValueRef mask; 134 135 memset(&type, 0, sizeof type); 136 type.width = 32; 137 type.length = n; 138 139 assert(lp_check_value(type, packed)); 140 assert(lp_check_value(type, i)); 141 142 /* 143 * y = (yuyv >> 16*i) & 0xff 144 * u = (yuyv >> 8 ) & 0xff 145 * v = (yuyv >> 24 ) & 0xff 146 */ 147 148 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 149 /* 150 * Avoid shift with per-element count. 151 * No support on x86, gets translated to roughly 5 instructions 152 * per element. Didn't measure performance but cuts shader size 153 * by quite a bit (less difference if cpu has no sse4.1 support). 154 */ 155 if (util_cpu_caps.has_sse2 && n > 1) { 156 LLVMValueRef sel, tmp; 157 struct lp_build_context bld32; 158 159 lp_build_context_init(&bld32, gallivm, type); 160 161 tmp = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 16), ""); 162 sel = lp_build_compare(gallivm, type, PIPE_FUNC_EQUAL, i, lp_build_const_int_vec(gallivm, type, 0)); 163 *y = lp_build_select(&bld32, sel, packed, tmp); 164 } else 165 #endif 166 { 167 LLVMValueRef shift; 168 shift = LLVMBuildMul(builder, i, lp_build_const_int_vec(gallivm, type, 16), ""); 169 *y = LLVMBuildLShr(builder, packed, shift, ""); 170 } 171 172 *u = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 8), ""); 173 *v = LLVMBuildLShr(builder, packed, lp_build_const_int_vec(gallivm, type, 24), ""); 174 175 mask = lp_build_const_int_vec(gallivm, type, 0xff); 176 177 *y = LLVMBuildAnd(builder, *y, mask, "y"); 178 *u = LLVMBuildAnd(builder, *u, mask, "u"); 179 *v = LLVMBuildAnd(builder, *v, mask, "v"); 180 } 181 182 183 static INLINE void 184 yuv_to_rgb_soa(struct gallivm_state *gallivm, 185 unsigned n, 186 LLVMValueRef y, LLVMValueRef u, LLVMValueRef v, 187 LLVMValueRef *r, LLVMValueRef *g, LLVMValueRef *b) 188 { 189 LLVMBuilderRef builder = gallivm->builder; 190 struct lp_type type; 191 struct lp_build_context bld; 192 193 LLVMValueRef c0; 194 LLVMValueRef c8; 195 LLVMValueRef c16; 196 LLVMValueRef c128; 197 LLVMValueRef c255; 198 199 LLVMValueRef cy; 200 LLVMValueRef cug; 201 LLVMValueRef cub; 202 LLVMValueRef cvr; 203 LLVMValueRef cvg; 204 205 memset(&type, 0, sizeof type); 206 type.sign = TRUE; 207 type.width = 32; 208 type.length = n; 209 210 lp_build_context_init(&bld, gallivm, type); 211 212 assert(lp_check_value(type, y)); 213 assert(lp_check_value(type, u)); 214 assert(lp_check_value(type, v)); 215 216 /* 217 * Constants 218 */ 219 220 c0 = lp_build_const_int_vec(gallivm, type, 0); 221 c8 = lp_build_const_int_vec(gallivm, type, 8); 222 c16 = lp_build_const_int_vec(gallivm, type, 16); 223 c128 = lp_build_const_int_vec(gallivm, type, 128); 224 c255 = lp_build_const_int_vec(gallivm, type, 255); 225 226 cy = lp_build_const_int_vec(gallivm, type, 298); 227 cug = lp_build_const_int_vec(gallivm, type, -100); 228 cub = lp_build_const_int_vec(gallivm, type, 516); 229 cvr = lp_build_const_int_vec(gallivm, type, 409); 230 cvg = lp_build_const_int_vec(gallivm, type, -208); 231 232 /* 233 * y -= 16; 234 * u -= 128; 235 * v -= 128; 236 */ 237 238 y = LLVMBuildSub(builder, y, c16, ""); 239 u = LLVMBuildSub(builder, u, c128, ""); 240 v = LLVMBuildSub(builder, v, c128, ""); 241 242 /* 243 * r = 298 * _y + 409 * _v + 128; 244 * g = 298 * _y - 100 * _u - 208 * _v + 128; 245 * b = 298 * _y + 516 * _u + 128; 246 */ 247 248 y = LLVMBuildMul(builder, y, cy, ""); 249 y = LLVMBuildAdd(builder, y, c128, ""); 250 251 *r = LLVMBuildMul(builder, v, cvr, ""); 252 *g = LLVMBuildAdd(builder, 253 LLVMBuildMul(builder, u, cug, ""), 254 LLVMBuildMul(builder, v, cvg, ""), 255 ""); 256 *b = LLVMBuildMul(builder, u, cub, ""); 257 258 *r = LLVMBuildAdd(builder, *r, y, ""); 259 *g = LLVMBuildAdd(builder, *g, y, ""); 260 *b = LLVMBuildAdd(builder, *b, y, ""); 261 262 /* 263 * r >>= 8; 264 * g >>= 8; 265 * b >>= 8; 266 */ 267 268 *r = LLVMBuildAShr(builder, *r, c8, "r"); 269 *g = LLVMBuildAShr(builder, *g, c8, "g"); 270 *b = LLVMBuildAShr(builder, *b, c8, "b"); 271 272 /* 273 * Clamp 274 */ 275 276 *r = lp_build_clamp(&bld, *r, c0, c255); 277 *g = lp_build_clamp(&bld, *g, c0, c255); 278 *b = lp_build_clamp(&bld, *b, c0, c255); 279 } 280 281 282 static LLVMValueRef 283 rgb_to_rgba_aos(struct gallivm_state *gallivm, 284 unsigned n, 285 LLVMValueRef r, LLVMValueRef g, LLVMValueRef b) 286 { 287 LLVMBuilderRef builder = gallivm->builder; 288 struct lp_type type; 289 LLVMValueRef a; 290 LLVMValueRef rgba; 291 292 memset(&type, 0, sizeof type); 293 type.sign = TRUE; 294 type.width = 32; 295 type.length = n; 296 297 assert(lp_check_value(type, r)); 298 assert(lp_check_value(type, g)); 299 assert(lp_check_value(type, b)); 300 301 /* 302 * Make a 4 x unorm8 vector 303 */ 304 305 r = r; 306 g = LLVMBuildShl(builder, g, lp_build_const_int_vec(gallivm, type, 8), ""); 307 b = LLVMBuildShl(builder, b, lp_build_const_int_vec(gallivm, type, 16), ""); 308 a = lp_build_const_int_vec(gallivm, type, 0xff000000); 309 310 rgba = r; 311 rgba = LLVMBuildOr(builder, rgba, g, ""); 312 rgba = LLVMBuildOr(builder, rgba, b, ""); 313 rgba = LLVMBuildOr(builder, rgba, a, ""); 314 315 rgba = LLVMBuildBitCast(builder, rgba, 316 LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n), ""); 317 318 return rgba; 319 } 320 321 322 /** 323 * Convert from <n x i32> packed UYVY to <4n x i8> RGBA AoS 324 */ 325 static LLVMValueRef 326 uyvy_to_rgba_aos(struct gallivm_state *gallivm, 327 unsigned n, 328 LLVMValueRef packed, 329 LLVMValueRef i) 330 { 331 LLVMValueRef y, u, v; 332 LLVMValueRef r, g, b; 333 LLVMValueRef rgba; 334 335 uyvy_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v); 336 yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b); 337 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 338 339 return rgba; 340 } 341 342 343 /** 344 * Convert from <n x i32> packed YUYV to <4n x i8> RGBA AoS 345 */ 346 static LLVMValueRef 347 yuyv_to_rgba_aos(struct gallivm_state *gallivm, 348 unsigned n, 349 LLVMValueRef packed, 350 LLVMValueRef i) 351 { 352 LLVMValueRef y, u, v; 353 LLVMValueRef r, g, b; 354 LLVMValueRef rgba; 355 356 yuyv_to_yuv_soa(gallivm, n, packed, i, &y, &u, &v); 357 yuv_to_rgb_soa(gallivm, n, y, u, v, &r, &g, &b); 358 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 359 360 return rgba; 361 } 362 363 364 /** 365 * Convert from <n x i32> packed RG_BG to <4n x i8> RGBA AoS 366 */ 367 static LLVMValueRef 368 rgbg_to_rgba_aos(struct gallivm_state *gallivm, 369 unsigned n, 370 LLVMValueRef packed, 371 LLVMValueRef i) 372 { 373 LLVMValueRef r, g, b; 374 LLVMValueRef rgba; 375 376 uyvy_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b); 377 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 378 379 return rgba; 380 } 381 382 383 /** 384 * Convert from <n x i32> packed GR_GB to <4n x i8> RGBA AoS 385 */ 386 static LLVMValueRef 387 grgb_to_rgba_aos(struct gallivm_state *gallivm, 388 unsigned n, 389 LLVMValueRef packed, 390 LLVMValueRef i) 391 { 392 LLVMValueRef r, g, b; 393 LLVMValueRef rgba; 394 395 yuyv_to_yuv_soa(gallivm, n, packed, i, &g, &r, &b); 396 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 397 398 return rgba; 399 } 400 401 /** 402 * Convert from <n x i32> packed GR_BR to <4n x i8> RGBA AoS 403 */ 404 static LLVMValueRef 405 grbr_to_rgba_aos(struct gallivm_state *gallivm, 406 unsigned n, 407 LLVMValueRef packed, 408 LLVMValueRef i) 409 { 410 LLVMValueRef r, g, b; 411 LLVMValueRef rgba; 412 413 uyvy_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b); 414 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 415 416 return rgba; 417 } 418 419 420 /** 421 * Convert from <n x i32> packed RG_RB to <4n x i8> RGBA AoS 422 */ 423 static LLVMValueRef 424 rgrb_to_rgba_aos(struct gallivm_state *gallivm, 425 unsigned n, 426 LLVMValueRef packed, 427 LLVMValueRef i) 428 { 429 LLVMValueRef r, g, b; 430 LLVMValueRef rgba; 431 432 yuyv_to_yuv_soa(gallivm, n, packed, i, &r, &g, &b); 433 rgba = rgb_to_rgba_aos(gallivm, n, r, g, b); 434 435 return rgba; 436 } 437 438 /** 439 * @param n is the number of pixels processed 440 * @param packed is a <n x i32> vector with the packed YUYV blocks 441 * @param i is a <n x i32> vector with the x pixel coordinate (0 or 1) 442 * @return a <4*n x i8> vector with the pixel RGBA values in AoS 443 */ 444 LLVMValueRef 445 lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm, 446 const struct util_format_description *format_desc, 447 unsigned n, 448 LLVMValueRef base_ptr, 449 LLVMValueRef offset, 450 LLVMValueRef i, 451 LLVMValueRef j) 452 { 453 LLVMValueRef packed; 454 LLVMValueRef rgba; 455 456 assert(format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED); 457 assert(format_desc->block.bits == 32); 458 assert(format_desc->block.width == 2); 459 assert(format_desc->block.height == 1); 460 461 packed = lp_build_gather(gallivm, n, 32, 32, base_ptr, offset); 462 463 (void)j; 464 465 switch (format_desc->format) { 466 case PIPE_FORMAT_UYVY: 467 rgba = uyvy_to_rgba_aos(gallivm, n, packed, i); 468 break; 469 case PIPE_FORMAT_YUYV: 470 rgba = yuyv_to_rgba_aos(gallivm, n, packed, i); 471 break; 472 case PIPE_FORMAT_R8G8_B8G8_UNORM: 473 rgba = rgbg_to_rgba_aos(gallivm, n, packed, i); 474 break; 475 case PIPE_FORMAT_G8R8_G8B8_UNORM: 476 rgba = grgb_to_rgba_aos(gallivm, n, packed, i); 477 break; 478 case PIPE_FORMAT_G8R8_B8R8_UNORM: 479 rgba = grbr_to_rgba_aos(gallivm, n, packed, i); 480 break; 481 case PIPE_FORMAT_R8G8_R8B8_UNORM: 482 rgba = rgrb_to_rgba_aos(gallivm, n, packed, i); 483 break; 484 default: 485 assert(0); 486 rgba = LLVMGetUndef(LLVMVectorType(LLVMInt8TypeInContext(gallivm->context), 4*n)); 487 break; 488 } 489 490 return rgba; 491 } 492 493