1 /************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 /** 29 * @file 30 * AoS pixel format manipulation. 31 * 32 * @author Jose Fonseca <jfonseca (at) vmware.com> 33 */ 34 35 36 #include "util/u_format.h" 37 #include "util/u_memory.h" 38 #include "util/u_math.h" 39 #include "util/u_pointer.h" 40 #include "util/u_string.h" 41 42 #include "lp_bld_arit.h" 43 #include "lp_bld_init.h" 44 #include "lp_bld_type.h" 45 #include "lp_bld_flow.h" 46 #include "lp_bld_const.h" 47 #include "lp_bld_conv.h" 48 #include "lp_bld_swizzle.h" 49 #include "lp_bld_gather.h" 50 #include "lp_bld_debug.h" 51 #include "lp_bld_format.h" 52 53 54 /** 55 * Basic swizzling. Rearrange the order of the unswizzled array elements 56 * according to the format description. PIPE_SWIZZLE_ZERO/ONE are supported 57 * too. 58 * Ex: if unswizzled[4] = {B, G, R, x}, then swizzled_out[4] = {R, G, B, 1}. 59 */ 60 LLVMValueRef 61 lp_build_format_swizzle_aos(const struct util_format_description *desc, 62 struct lp_build_context *bld, 63 LLVMValueRef unswizzled) 64 { 65 unsigned char swizzles[4]; 66 unsigned chan; 67 68 assert(bld->type.length % 4 == 0); 69 70 for (chan = 0; chan < 4; ++chan) { 71 enum util_format_swizzle swizzle; 72 73 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 74 /* 75 * For ZS formats do RGBA = ZZZ1 76 */ 77 if (chan == 3) { 78 swizzle = UTIL_FORMAT_SWIZZLE_1; 79 } else if (desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_NONE) { 80 swizzle = UTIL_FORMAT_SWIZZLE_0; 81 } else { 82 swizzle = desc->swizzle[0]; 83 } 84 } else { 85 swizzle = desc->swizzle[chan]; 86 } 87 swizzles[chan] = swizzle; 88 } 89 90 return lp_build_swizzle_aos(bld, unswizzled, swizzles); 91 } 92 93 94 /** 95 * Whether the format matches the vector type, apart of swizzles. 96 */ 97 static INLINE boolean 98 format_matches_type(const struct util_format_description *desc, 99 struct lp_type type) 100 { 101 enum util_format_type chan_type; 102 unsigned chan; 103 104 assert(type.length % 4 == 0); 105 106 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN || 107 desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB || 108 desc->block.width != 1 || 109 desc->block.height != 1) { 110 return FALSE; 111 } 112 113 if (type.floating) { 114 chan_type = UTIL_FORMAT_TYPE_FLOAT; 115 } else if (type.fixed) { 116 chan_type = UTIL_FORMAT_TYPE_FIXED; 117 } else if (type.sign) { 118 chan_type = UTIL_FORMAT_TYPE_SIGNED; 119 } else { 120 chan_type = UTIL_FORMAT_TYPE_UNSIGNED; 121 } 122 123 for (chan = 0; chan < desc->nr_channels; ++chan) { 124 if (desc->channel[chan].size != type.width) { 125 return FALSE; 126 } 127 128 if (desc->channel[chan].type != UTIL_FORMAT_TYPE_VOID) { 129 if (desc->channel[chan].type != chan_type || 130 desc->channel[chan].normalized != type.norm) { 131 return FALSE; 132 } 133 } 134 } 135 136 return TRUE; 137 } 138 139 140 /** 141 * Unpack a single pixel into its RGBA components. 142 * 143 * @param desc the pixel format for the packed pixel value 144 * @param packed integer pixel in a format such as PIPE_FORMAT_B8G8R8A8_UNORM 145 * 146 * @return RGBA in a float[4] or ubyte[4] or ushort[4] vector. 147 */ 148 static INLINE LLVMValueRef 149 lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm, 150 const struct util_format_description *desc, 151 LLVMValueRef packed) 152 { 153 LLVMBuilderRef builder = gallivm->builder; 154 LLVMValueRef shifted, casted, scaled, masked; 155 LLVMValueRef shifts[4]; 156 LLVMValueRef masks[4]; 157 LLVMValueRef scales[4]; 158 159 boolean normalized; 160 boolean needs_uitofp; 161 unsigned shift; 162 unsigned i; 163 164 /* TODO: Support more formats */ 165 assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); 166 assert(desc->block.width == 1); 167 assert(desc->block.height == 1); 168 assert(desc->block.bits <= 32); 169 170 /* Do the intermediate integer computations with 32bit integers since it 171 * matches floating point size */ 172 assert (LLVMTypeOf(packed) == LLVMInt32TypeInContext(gallivm->context)); 173 174 /* Broadcast the packed value to all four channels 175 * before: packed = BGRA 176 * after: packed = {BGRA, BGRA, BGRA, BGRA} 177 */ 178 packed = LLVMBuildInsertElement(builder, 179 LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), 180 packed, 181 LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)), 182 ""); 183 packed = LLVMBuildShuffleVector(builder, 184 packed, 185 LLVMGetUndef(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), 186 LLVMConstNull(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4)), 187 ""); 188 189 /* Initialize vector constants */ 190 normalized = FALSE; 191 needs_uitofp = FALSE; 192 shift = 0; 193 194 /* Loop over 4 color components */ 195 for (i = 0; i < 4; ++i) { 196 unsigned bits = desc->channel[i].size; 197 198 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { 199 shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); 200 masks[i] = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)); 201 scales[i] = LLVMConstNull(LLVMFloatTypeInContext(gallivm->context)); 202 } 203 else { 204 unsigned long long mask = (1ULL << bits) - 1; 205 206 assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); 207 208 if (bits == 32) { 209 needs_uitofp = TRUE; 210 } 211 212 shifts[i] = lp_build_const_int32(gallivm, shift); 213 masks[i] = lp_build_const_int32(gallivm, mask); 214 215 if (desc->channel[i].normalized) { 216 scales[i] = lp_build_const_float(gallivm, 1.0 / mask); 217 normalized = TRUE; 218 } 219 else 220 scales[i] = lp_build_const_float(gallivm, 1.0); 221 } 222 223 shift += bits; 224 } 225 226 /* Ex: convert packed = {BGRA, BGRA, BGRA, BGRA} 227 * into masked = {B, G, R, A} 228 */ 229 shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), ""); 230 masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), ""); 231 232 233 if (!needs_uitofp) { 234 /* UIToFP can't be expressed in SSE2 */ 235 casted = LLVMBuildSIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), ""); 236 } else { 237 casted = LLVMBuildUIToFP(builder, masked, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), ""); 238 } 239 240 /* At this point 'casted' may be a vector of floats such as 241 * {255.0, 255.0, 255.0, 255.0}. Next, if the pixel values are normalized 242 * we'll scale this to {1.0, 1.0, 1.0, 1.0}. 243 */ 244 245 if (normalized) 246 scaled = LLVMBuildFMul(builder, casted, LLVMConstVector(scales, 4), ""); 247 else 248 scaled = casted; 249 250 return scaled; 251 } 252 253 254 /** 255 * Pack a single pixel. 256 * 257 * @param rgba 4 float vector with the unpacked components. 258 * 259 * XXX: This is mostly for reference and testing -- operating a single pixel at 260 * a time is rarely if ever needed. 261 */ 262 LLVMValueRef 263 lp_build_pack_rgba_aos(struct gallivm_state *gallivm, 264 const struct util_format_description *desc, 265 LLVMValueRef rgba) 266 { 267 LLVMBuilderRef builder = gallivm->builder; 268 LLVMTypeRef type; 269 LLVMValueRef packed = NULL; 270 LLVMValueRef swizzles[4]; 271 LLVMValueRef shifted, casted, scaled, unswizzled; 272 LLVMValueRef shifts[4]; 273 LLVMValueRef scales[4]; 274 boolean normalized; 275 unsigned shift; 276 unsigned i, j; 277 278 assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); 279 assert(desc->block.width == 1); 280 assert(desc->block.height == 1); 281 282 type = LLVMIntTypeInContext(gallivm->context, desc->block.bits); 283 284 /* Unswizzle the color components into the source vector. */ 285 for (i = 0; i < 4; ++i) { 286 for (j = 0; j < 4; ++j) { 287 if (desc->swizzle[j] == i) 288 break; 289 } 290 if (j < 4) 291 swizzles[i] = lp_build_const_int32(gallivm, j); 292 else 293 swizzles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); 294 } 295 296 unswizzled = LLVMBuildShuffleVector(builder, rgba, 297 LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4)), 298 LLVMConstVector(swizzles, 4), ""); 299 300 normalized = FALSE; 301 shift = 0; 302 for (i = 0; i < 4; ++i) { 303 unsigned bits = desc->channel[i].size; 304 305 if (desc->channel[i].type == UTIL_FORMAT_TYPE_VOID) { 306 shifts[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); 307 scales[i] = LLVMGetUndef(LLVMFloatTypeInContext(gallivm->context)); 308 } 309 else { 310 unsigned mask = (1 << bits) - 1; 311 312 assert(desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED); 313 assert(bits < 32); 314 315 shifts[i] = lp_build_const_int32(gallivm, shift); 316 317 if (desc->channel[i].normalized) { 318 scales[i] = lp_build_const_float(gallivm, mask); 319 normalized = TRUE; 320 } 321 else 322 scales[i] = lp_build_const_float(gallivm, 1.0); 323 } 324 325 shift += bits; 326 } 327 328 if (normalized) 329 scaled = LLVMBuildFMul(builder, unswizzled, LLVMConstVector(scales, 4), ""); 330 else 331 scaled = unswizzled; 332 333 casted = LLVMBuildFPToSI(builder, scaled, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), ""); 334 335 shifted = LLVMBuildShl(builder, casted, LLVMConstVector(shifts, 4), ""); 336 337 /* Bitwise or all components */ 338 for (i = 0; i < 4; ++i) { 339 if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 340 LLVMValueRef component = LLVMBuildExtractElement(builder, shifted, 341 lp_build_const_int32(gallivm, i), ""); 342 if (packed) 343 packed = LLVMBuildOr(builder, packed, component, ""); 344 else 345 packed = component; 346 } 347 } 348 349 if (!packed) 350 packed = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); 351 352 if (desc->block.bits < 32) 353 packed = LLVMBuildTrunc(builder, packed, type, ""); 354 355 return packed; 356 } 357 358 359 360 361 /** 362 * Fetch a pixel into a 4 float AoS. 363 * 364 * \param format_desc describes format of the image we're fetching from 365 * \param ptr address of the pixel block (or the texel if uncompressed) 366 * \param i, j the sub-block pixel coordinates. For non-compressed formats 367 * these will always be (0, 0). 368 * \return a 4 element vector with the pixel's RGBA values. 369 */ 370 LLVMValueRef 371 lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, 372 const struct util_format_description *format_desc, 373 struct lp_type type, 374 LLVMValueRef base_ptr, 375 LLVMValueRef offset, 376 LLVMValueRef i, 377 LLVMValueRef j) 378 { 379 LLVMBuilderRef builder = gallivm->builder; 380 unsigned num_pixels = type.length / 4; 381 struct lp_build_context bld; 382 383 assert(type.length <= LP_MAX_VECTOR_LENGTH); 384 assert(type.length % 4 == 0); 385 386 lp_build_context_init(&bld, gallivm, type); 387 388 /* 389 * Trivial case 390 * 391 * The format matches the type (apart of a swizzle) so no need for 392 * scaling or converting. 393 */ 394 395 if (format_matches_type(format_desc, type) && 396 format_desc->block.bits <= type.width * 4 && 397 util_is_power_of_two(format_desc->block.bits)) { 398 LLVMValueRef packed; 399 400 /* 401 * The format matches the type (apart of a swizzle) so no need for 402 * scaling or converting. 403 */ 404 405 packed = lp_build_gather(gallivm, type.length/4, 406 format_desc->block.bits, type.width*4, 407 base_ptr, offset); 408 409 assert(format_desc->block.bits <= type.width * type.length); 410 411 packed = LLVMBuildBitCast(gallivm->builder, packed, 412 lp_build_vec_type(gallivm, type), ""); 413 414 return lp_build_format_swizzle_aos(format_desc, &bld, packed); 415 } 416 417 /* 418 * Bit arithmetic 419 */ 420 421 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && 422 (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB || 423 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) && 424 format_desc->block.width == 1 && 425 format_desc->block.height == 1 && 426 util_is_power_of_two(format_desc->block.bits) && 427 format_desc->block.bits <= 32 && 428 format_desc->is_bitmask && 429 !format_desc->is_mixed && 430 (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED || 431 format_desc->channel[1].type == UTIL_FORMAT_TYPE_UNSIGNED)) { 432 433 LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; 434 LLVMValueRef res; 435 unsigned k; 436 437 /* 438 * Unpack a pixel at a time into a <4 x float> RGBA vector 439 */ 440 441 for (k = 0; k < num_pixels; ++k) { 442 LLVMValueRef packed; 443 444 packed = lp_build_gather_elem(gallivm, num_pixels, 445 format_desc->block.bits, 32, 446 base_ptr, offset, k); 447 448 tmps[k] = lp_build_unpack_arith_rgba_aos(gallivm, 449 format_desc, 450 packed); 451 } 452 453 /* 454 * Type conversion. 455 * 456 * TODO: We could avoid floating conversion for integer to 457 * integer conversions. 458 */ 459 460 if (gallivm_debug & GALLIVM_DEBUG_PERF && !type.floating) { 461 debug_printf("%s: unpacking %s with floating point\n", 462 __FUNCTION__, format_desc->short_name); 463 } 464 465 lp_build_conv(gallivm, 466 lp_float32_vec4_type(), 467 type, 468 tmps, num_pixels, &res, 1); 469 470 return lp_build_format_swizzle_aos(format_desc, &bld, res); 471 } 472 473 /* If all channels are of same type and we are not using half-floats */ 474 if (util_format_is_array(format_desc)) { 475 return lp_build_fetch_rgba_aos_array(gallivm, format_desc, type, base_ptr, offset); 476 } 477 478 /* 479 * YUV / subsampled formats 480 */ 481 482 if (format_desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 483 struct lp_type tmp_type; 484 LLVMValueRef tmp; 485 486 memset(&tmp_type, 0, sizeof tmp_type); 487 tmp_type.width = 8; 488 tmp_type.length = num_pixels * 4; 489 tmp_type.norm = TRUE; 490 491 tmp = lp_build_fetch_subsampled_rgba_aos(gallivm, 492 format_desc, 493 num_pixels, 494 base_ptr, 495 offset, 496 i, j); 497 498 lp_build_conv(gallivm, 499 tmp_type, type, 500 &tmp, 1, &tmp, 1); 501 502 return tmp; 503 } 504 505 /* 506 * Fallback to util_format_description::fetch_rgba_8unorm(). 507 */ 508 509 if (format_desc->fetch_rgba_8unorm && 510 !type.floating && type.width == 8 && !type.sign && type.norm) { 511 /* 512 * Fallback to calling util_format_description::fetch_rgba_8unorm. 513 * 514 * This is definitely not the most efficient way of fetching pixels, as 515 * we miss the opportunity to do vectorization, but this it is a 516 * convenient for formats or scenarios for which there was no opportunity 517 * or incentive to optimize. 518 */ 519 520 LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context); 521 LLVMTypeRef pi8t = LLVMPointerType(i8t, 0); 522 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 523 LLVMValueRef function; 524 LLVMValueRef tmp_ptr; 525 LLVMValueRef tmp; 526 LLVMValueRef res; 527 unsigned k; 528 529 if (gallivm_debug & GALLIVM_DEBUG_PERF) { 530 debug_printf("%s: falling back to util_format_%s_fetch_rgba_8unorm\n", 531 __FUNCTION__, format_desc->short_name); 532 } 533 534 /* 535 * Declare and bind format_desc->fetch_rgba_8unorm(). 536 */ 537 538 { 539 /* 540 * Function to call looks like: 541 * fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j) 542 */ 543 LLVMTypeRef ret_type; 544 LLVMTypeRef arg_types[4]; 545 LLVMTypeRef function_type; 546 547 ret_type = LLVMVoidTypeInContext(gallivm->context); 548 arg_types[0] = pi8t; 549 arg_types[1] = pi8t; 550 arg_types[2] = i32t; 551 arg_types[3] = i32t; 552 function_type = LLVMFunctionType(ret_type, arg_types, 553 Elements(arg_types), 0); 554 555 /* make const pointer for the C fetch_rgba_8unorm function */ 556 function = lp_build_const_int_pointer(gallivm, 557 func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm)); 558 559 /* cast the callee pointer to the function's type */ 560 function = LLVMBuildBitCast(builder, function, 561 LLVMPointerType(function_type, 0), 562 "cast callee"); 563 } 564 565 tmp_ptr = lp_build_alloca(gallivm, i32t, ""); 566 567 res = LLVMGetUndef(LLVMVectorType(i32t, num_pixels)); 568 569 /* 570 * Invoke format_desc->fetch_rgba_8unorm() for each pixel and insert the result 571 * in the SoA vectors. 572 */ 573 574 for (k = 0; k < num_pixels; ++k) { 575 LLVMValueRef index = lp_build_const_int32(gallivm, k); 576 LLVMValueRef args[4]; 577 578 args[0] = LLVMBuildBitCast(builder, tmp_ptr, pi8t, ""); 579 args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, 580 base_ptr, offset, k); 581 582 if (num_pixels == 1) { 583 args[2] = i; 584 args[3] = j; 585 } 586 else { 587 args[2] = LLVMBuildExtractElement(builder, i, index, ""); 588 args[3] = LLVMBuildExtractElement(builder, j, index, ""); 589 } 590 591 LLVMBuildCall(builder, function, args, Elements(args), ""); 592 593 tmp = LLVMBuildLoad(builder, tmp_ptr, ""); 594 595 if (num_pixels == 1) { 596 res = tmp; 597 } 598 else { 599 res = LLVMBuildInsertElement(builder, res, tmp, index, ""); 600 } 601 } 602 603 /* Bitcast from <n x i32> to <4n x i8> */ 604 res = LLVMBuildBitCast(builder, res, bld.vec_type, ""); 605 606 return res; 607 } 608 609 /* 610 * Fallback to util_format_description::fetch_rgba_float(). 611 */ 612 613 if (format_desc->fetch_rgba_float) { 614 /* 615 * Fallback to calling util_format_description::fetch_rgba_float. 616 * 617 * This is definitely not the most efficient way of fetching pixels, as 618 * we miss the opportunity to do vectorization, but this it is a 619 * convenient for formats or scenarios for which there was no opportunity 620 * or incentive to optimize. 621 */ 622 623 LLVMTypeRef f32t = LLVMFloatTypeInContext(gallivm->context); 624 LLVMTypeRef f32x4t = LLVMVectorType(f32t, 4); 625 LLVMTypeRef pf32t = LLVMPointerType(f32t, 0); 626 LLVMTypeRef pi8t = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); 627 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 628 LLVMValueRef function; 629 LLVMValueRef tmp_ptr; 630 LLVMValueRef tmps[LP_MAX_VECTOR_LENGTH/4]; 631 LLVMValueRef res; 632 unsigned k; 633 634 if (gallivm_debug & GALLIVM_DEBUG_PERF) { 635 debug_printf("%s: falling back to util_format_%s_fetch_rgba_float\n", 636 __FUNCTION__, format_desc->short_name); 637 } 638 639 /* 640 * Declare and bind format_desc->fetch_rgba_float(). 641 */ 642 643 { 644 /* 645 * Function to call looks like: 646 * fetch(float *dst, const uint8_t *src, unsigned i, unsigned j) 647 */ 648 LLVMTypeRef ret_type; 649 LLVMTypeRef arg_types[4]; 650 651 ret_type = LLVMVoidTypeInContext(gallivm->context); 652 arg_types[0] = pf32t; 653 arg_types[1] = pi8t; 654 arg_types[2] = i32t; 655 arg_types[3] = i32t; 656 657 function = lp_build_const_func_pointer(gallivm, 658 func_to_pointer((func_pointer) format_desc->fetch_rgba_float), 659 ret_type, 660 arg_types, Elements(arg_types), 661 format_desc->short_name); 662 } 663 664 tmp_ptr = lp_build_alloca(gallivm, f32x4t, ""); 665 666 /* 667 * Invoke format_desc->fetch_rgba_float() for each pixel and insert the result 668 * in the SoA vectors. 669 */ 670 671 for (k = 0; k < num_pixels; ++k) { 672 LLVMValueRef args[4]; 673 674 args[0] = LLVMBuildBitCast(builder, tmp_ptr, pf32t, ""); 675 args[1] = lp_build_gather_elem_ptr(gallivm, num_pixels, 676 base_ptr, offset, k); 677 678 if (num_pixels == 1) { 679 args[2] = i; 680 args[3] = j; 681 } 682 else { 683 LLVMValueRef index = lp_build_const_int32(gallivm, k); 684 args[2] = LLVMBuildExtractElement(builder, i, index, ""); 685 args[3] = LLVMBuildExtractElement(builder, j, index, ""); 686 } 687 688 LLVMBuildCall(builder, function, args, Elements(args), ""); 689 690 tmps[k] = LLVMBuildLoad(builder, tmp_ptr, ""); 691 } 692 693 lp_build_conv(gallivm, 694 lp_float32_vec4_type(), 695 type, 696 tmps, num_pixels, &res, 1); 697 698 return res; 699 } 700 701 assert(0); 702 return lp_build_undef(gallivm, type); 703 } 704