1 /************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 /** 29 * @file 30 * Helper functions for swizzling/shuffling. 31 * 32 * @author Jose Fonseca <jfonseca (at) vmware.com> 33 */ 34 35 #include <inttypes.h> /* for PRIx64 macro */ 36 #include "util/u_debug.h" 37 38 #include "lp_bld_type.h" 39 #include "lp_bld_const.h" 40 #include "lp_bld_init.h" 41 #include "lp_bld_logic.h" 42 #include "lp_bld_swizzle.h" 43 #include "lp_bld_pack.h" 44 45 46 LLVMValueRef 47 lp_build_broadcast(struct gallivm_state *gallivm, 48 LLVMTypeRef vec_type, 49 LLVMValueRef scalar) 50 { 51 LLVMValueRef res; 52 53 if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) { 54 /* scalar */ 55 assert(vec_type == LLVMTypeOf(scalar)); 56 res = scalar; 57 } else { 58 LLVMBuilderRef builder = gallivm->builder; 59 const unsigned length = LLVMGetVectorSize(vec_type); 60 LLVMValueRef undef = LLVMGetUndef(vec_type); 61 /* The shuffle vector is always made of int32 elements */ 62 LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context); 63 LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length); 64 65 assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar)); 66 67 res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), ""); 68 res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), ""); 69 } 70 71 return res; 72 } 73 74 75 /** 76 * Broadcast 77 */ 78 LLVMValueRef 79 lp_build_broadcast_scalar(struct lp_build_context *bld, 80 LLVMValueRef scalar) 81 { 82 assert(lp_check_elem_type(bld->type, LLVMTypeOf(scalar))); 83 84 return lp_build_broadcast(bld->gallivm, bld->vec_type, scalar); 85 } 86 87 88 /** 89 * Combined extract and broadcast (mere shuffle in most cases) 90 */ 91 LLVMValueRef 92 lp_build_extract_broadcast(struct gallivm_state *gallivm, 93 struct lp_type src_type, 94 struct lp_type dst_type, 95 LLVMValueRef vector, 96 LLVMValueRef index) 97 { 98 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 99 LLVMValueRef res; 100 101 assert(src_type.floating == dst_type.floating); 102 assert(src_type.width == dst_type.width); 103 104 assert(lp_check_value(src_type, vector)); 105 assert(LLVMTypeOf(index) == i32t); 106 107 if (src_type.length == 1) { 108 if (dst_type.length == 1) { 109 /* 110 * Trivial scalar -> scalar. 111 */ 112 113 res = vector; 114 } 115 else { 116 /* 117 * Broadcast scalar -> vector. 118 */ 119 120 res = lp_build_broadcast(gallivm, 121 lp_build_vec_type(gallivm, dst_type), 122 vector); 123 } 124 } 125 else { 126 if (dst_type.length > 1) { 127 /* 128 * shuffle - result can be of different length. 129 */ 130 131 LLVMValueRef shuffle; 132 shuffle = lp_build_broadcast(gallivm, 133 LLVMVectorType(i32t, dst_type.length), 134 index); 135 res = LLVMBuildShuffleVector(gallivm->builder, vector, 136 LLVMGetUndef(lp_build_vec_type(gallivm, src_type)), 137 shuffle, ""); 138 } 139 else { 140 /* 141 * Trivial extract scalar from vector. 142 */ 143 res = LLVMBuildExtractElement(gallivm->builder, vector, index, ""); 144 } 145 } 146 147 return res; 148 } 149 150 151 /** 152 * Swizzle one channel into other channels. 153 */ 154 LLVMValueRef 155 lp_build_swizzle_scalar_aos(struct lp_build_context *bld, 156 LLVMValueRef a, 157 unsigned channel, 158 unsigned num_channels) 159 { 160 LLVMBuilderRef builder = bld->gallivm->builder; 161 const struct lp_type type = bld->type; 162 const unsigned n = type.length; 163 unsigned i, j; 164 165 if(a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1) 166 return a; 167 168 assert(num_channels == 2 || num_channels == 4); 169 170 /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing 171 * using shuffles here actually causes worst results. More investigation is 172 * needed. */ 173 if (LLVMIsConstant(a) || 174 type.width >= 16) { 175 /* 176 * Shuffle. 177 */ 178 LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); 179 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 180 181 for(j = 0; j < n; j += num_channels) 182 for(i = 0; i < num_channels; ++i) 183 shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0); 184 185 return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); 186 } 187 else if (num_channels == 2) { 188 /* 189 * Bit mask and shifts 190 * 191 * XY XY .... XY <= input 192 * 0Y 0Y .... 0Y 193 * YY YY .... YY 194 * YY YY .... YY <= output 195 */ 196 struct lp_type type2; 197 LLVMValueRef tmp = NULL; 198 int shift; 199 200 a = LLVMBuildAnd(builder, a, 201 lp_build_const_mask_aos(bld->gallivm, 202 type, 1 << channel, num_channels), ""); 203 204 type2 = type; 205 type2.floating = FALSE; 206 type2.width *= 2; 207 type2.length /= 2; 208 209 a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), ""); 210 211 /* 212 * Vector element 0 is always channel X. 213 * 214 * 76 54 32 10 (array numbering) 215 * Little endian reg in: YX YX YX YX 216 * Little endian reg out: YY YY YY YY if shift right (shift == -1) 217 * XX XX XX XX if shift left (shift == 1) 218 * 219 * 01 23 45 67 (array numbering) 220 * Big endian reg in: XY XY XY XY 221 * Big endian reg out: YY YY YY YY if shift left (shift == 1) 222 * XX XX XX XX if shift right (shift == -1) 223 * 224 */ 225 #ifdef PIPE_ARCH_LITTLE_ENDIAN 226 shift = channel == 0 ? 1 : -1; 227 #else 228 shift = channel == 0 ? -1 : 1; 229 #endif 230 231 if (shift > 0) { 232 tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), ""); 233 } else if (shift < 0) { 234 tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), ""); 235 } 236 237 assert(tmp); 238 if (tmp) { 239 a = LLVMBuildOr(builder, a, tmp, ""); 240 } 241 242 return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), ""); 243 } 244 else { 245 /* 246 * Bit mask and recursive shifts 247 * 248 * Little-endian registers: 249 * 250 * 7654 3210 251 * WZYX WZYX .... WZYX <= input 252 * 00Y0 00Y0 .... 00Y0 <= mask 253 * 00YY 00YY .... 00YY <= shift right 1 (shift amount -1) 254 * YYYY YYYY .... YYYY <= shift left 2 (shift amount 2) 255 * 256 * Big-endian registers: 257 * 258 * 0123 4567 259 * XYZW XYZW .... XYZW <= input 260 * 0Y00 0Y00 .... 0Y00 <= mask 261 * YY00 YY00 .... YY00 <= shift left 1 (shift amount 1) 262 * YYYY YYYY .... YYYY <= shift right 2 (shift amount -2) 263 * 264 * shifts[] gives little-endian shift amounts; we need to negate for big-endian. 265 */ 266 struct lp_type type4; 267 const int shifts[4][2] = { 268 { 1, 2}, 269 {-1, 2}, 270 { 1, -2}, 271 {-1, -2} 272 }; 273 unsigned i; 274 275 a = LLVMBuildAnd(builder, a, 276 lp_build_const_mask_aos(bld->gallivm, 277 type, 1 << channel, 4), ""); 278 279 /* 280 * Build a type where each element is an integer that cover the four 281 * channels. 282 */ 283 284 type4 = type; 285 type4.floating = FALSE; 286 type4.width *= 4; 287 type4.length /= 4; 288 289 a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), ""); 290 291 for(i = 0; i < 2; ++i) { 292 LLVMValueRef tmp = NULL; 293 int shift = shifts[channel][i]; 294 295 /* See endianness diagram above */ 296 #ifdef PIPE_ARCH_BIG_ENDIAN 297 shift = -shift; 298 #endif 299 300 if(shift > 0) 301 tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), ""); 302 if(shift < 0) 303 tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), ""); 304 305 assert(tmp); 306 if(tmp) 307 a = LLVMBuildOr(builder, a, tmp, ""); 308 } 309 310 return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), ""); 311 } 312 } 313 314 315 /** 316 * Swizzle a vector consisting of an array of XYZW structs. 317 * 318 * This fills a vector of dst_len length with the swizzled channels from src. 319 * 320 * e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in 321 * RGBA RGBA = BGR BGR BG 322 * 323 * @param swizzles the swizzle array 324 * @param num_swizzles the number of elements in swizzles 325 * @param dst_len the length of the result 326 */ 327 LLVMValueRef 328 lp_build_swizzle_aos_n(struct gallivm_state* gallivm, 329 LLVMValueRef src, 330 const unsigned char* swizzles, 331 unsigned num_swizzles, 332 unsigned dst_len) 333 { 334 LLVMBuilderRef builder = gallivm->builder; 335 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH]; 336 unsigned i; 337 338 assert(dst_len < LP_MAX_VECTOR_WIDTH); 339 340 for (i = 0; i < dst_len; ++i) { 341 int swizzle = swizzles[i % num_swizzles]; 342 343 if (swizzle == LP_BLD_SWIZZLE_DONTCARE) { 344 shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); 345 } else { 346 shuffles[i] = lp_build_const_int32(gallivm, swizzle); 347 } 348 } 349 350 return LLVMBuildShuffleVector(builder, src, LLVMGetUndef(LLVMTypeOf(src)), LLVMConstVector(shuffles, dst_len), ""); 351 } 352 353 354 LLVMValueRef 355 lp_build_swizzle_aos(struct lp_build_context *bld, 356 LLVMValueRef a, 357 const unsigned char swizzles[4]) 358 { 359 LLVMBuilderRef builder = bld->gallivm->builder; 360 const struct lp_type type = bld->type; 361 const unsigned n = type.length; 362 unsigned i, j; 363 364 if (swizzles[0] == PIPE_SWIZZLE_X && 365 swizzles[1] == PIPE_SWIZZLE_Y && 366 swizzles[2] == PIPE_SWIZZLE_Z && 367 swizzles[3] == PIPE_SWIZZLE_W) { 368 return a; 369 } 370 371 if (swizzles[0] == swizzles[1] && 372 swizzles[1] == swizzles[2] && 373 swizzles[2] == swizzles[3]) { 374 switch (swizzles[0]) { 375 case PIPE_SWIZZLE_X: 376 case PIPE_SWIZZLE_Y: 377 case PIPE_SWIZZLE_Z: 378 case PIPE_SWIZZLE_W: 379 return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4); 380 case PIPE_SWIZZLE_0: 381 return bld->zero; 382 case PIPE_SWIZZLE_1: 383 return bld->one; 384 case LP_BLD_SWIZZLE_DONTCARE: 385 return bld->undef; 386 default: 387 assert(0); 388 return bld->undef; 389 } 390 } 391 392 if (LLVMIsConstant(a) || 393 type.width >= 16) { 394 /* 395 * Shuffle. 396 */ 397 LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type)); 398 LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); 399 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 400 LLVMValueRef aux[LP_MAX_VECTOR_LENGTH]; 401 402 memset(aux, 0, sizeof aux); 403 404 for(j = 0; j < n; j += 4) { 405 for(i = 0; i < 4; ++i) { 406 unsigned shuffle; 407 switch (swizzles[i]) { 408 default: 409 assert(0); 410 /* fall through */ 411 case PIPE_SWIZZLE_X: 412 case PIPE_SWIZZLE_Y: 413 case PIPE_SWIZZLE_Z: 414 case PIPE_SWIZZLE_W: 415 shuffle = j + swizzles[i]; 416 shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); 417 break; 418 case PIPE_SWIZZLE_0: 419 shuffle = type.length + 0; 420 shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); 421 if (!aux[0]) { 422 aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0); 423 } 424 break; 425 case PIPE_SWIZZLE_1: 426 shuffle = type.length + 1; 427 shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); 428 if (!aux[1]) { 429 aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0); 430 } 431 break; 432 case LP_BLD_SWIZZLE_DONTCARE: 433 shuffles[j + i] = LLVMGetUndef(i32t); 434 break; 435 } 436 } 437 } 438 439 for (i = 0; i < n; ++i) { 440 if (!aux[i]) { 441 aux[i] = undef; 442 } 443 } 444 445 return LLVMBuildShuffleVector(builder, a, 446 LLVMConstVector(aux, n), 447 LLVMConstVector(shuffles, n), ""); 448 } else { 449 /* 450 * Bit mask and shifts. 451 * 452 * For example, this will convert BGRA to RGBA by doing 453 * 454 * Little endian: 455 * rgba = (bgra & 0x00ff0000) >> 16 456 * | (bgra & 0xff00ff00) 457 * | (bgra & 0x000000ff) << 16 458 * 459 * Big endian:A 460 * rgba = (bgra & 0x0000ff00) << 16 461 * | (bgra & 0x00ff00ff) 462 * | (bgra & 0xff000000) >> 16 463 * 464 * This is necessary not only for faster cause, but because X86 backend 465 * will refuse shuffles of <4 x i8> vectors 466 */ 467 LLVMValueRef res; 468 struct lp_type type4; 469 unsigned cond = 0; 470 int chan; 471 int shift; 472 473 /* 474 * Start with a mixture of 1 and 0. 475 */ 476 for (chan = 0; chan < 4; ++chan) { 477 if (swizzles[chan] == PIPE_SWIZZLE_1) { 478 cond |= 1 << chan; 479 } 480 } 481 res = lp_build_select_aos(bld, cond, bld->one, bld->zero, 4); 482 483 /* 484 * Build a type where each element is an integer that cover the four 485 * channels. 486 */ 487 type4 = type; 488 type4.floating = FALSE; 489 type4.width *= 4; 490 type4.length /= 4; 491 492 a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), ""); 493 res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), ""); 494 495 /* 496 * Mask and shift the channels, trying to group as many channels in the 497 * same shift as possible. The shift amount is positive for shifts left 498 * and negative for shifts right. 499 */ 500 for (shift = -3; shift <= 3; ++shift) { 501 uint64_t mask = 0; 502 503 assert(type4.width <= sizeof(mask)*8); 504 505 /* 506 * Vector element numbers follow the XYZW order, so 0 is always X, etc. 507 * After widening 4 times we have: 508 * 509 * 3210 510 * Little-endian register layout: WZYX 511 * 512 * 0123 513 * Big-endian register layout: XYZW 514 * 515 * For little-endian, higher-numbered channels are obtained by a shift right 516 * (negative shift amount) and lower-numbered channels by a shift left 517 * (positive shift amount). The opposite is true for big-endian. 518 */ 519 for (chan = 0; chan < 4; ++chan) { 520 if (swizzles[chan] < 4) { 521 /* We need to move channel swizzles[chan] into channel chan */ 522 #ifdef PIPE_ARCH_LITTLE_ENDIAN 523 if (swizzles[chan] - chan == -shift) { 524 mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width); 525 } 526 #else 527 if (swizzles[chan] - chan == shift) { 528 mask |= ((1ULL << type.width) - 1) << (type4.width - type.width) >> (swizzles[chan] * type.width); 529 } 530 #endif 531 } 532 } 533 534 if (mask) { 535 LLVMValueRef masked; 536 LLVMValueRef shifted; 537 if (0) 538 debug_printf("shift = %i, mask = %" PRIx64 "\n", shift, mask); 539 540 masked = LLVMBuildAnd(builder, a, 541 lp_build_const_int_vec(bld->gallivm, type4, mask), ""); 542 if (shift > 0) { 543 shifted = LLVMBuildShl(builder, masked, 544 lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), ""); 545 } else if (shift < 0) { 546 shifted = LLVMBuildLShr(builder, masked, 547 lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), ""); 548 } else { 549 shifted = masked; 550 } 551 552 res = LLVMBuildOr(builder, res, shifted, ""); 553 } 554 } 555 556 return LLVMBuildBitCast(builder, res, 557 lp_build_vec_type(bld->gallivm, type), ""); 558 } 559 } 560 561 562 /** 563 * Extended swizzle of a single channel of a SoA vector. 564 * 565 * @param bld building context 566 * @param unswizzled array with the 4 unswizzled values 567 * @param swizzle one of the PIPE_SWIZZLE_* 568 * 569 * @return the swizzled value. 570 */ 571 LLVMValueRef 572 lp_build_swizzle_soa_channel(struct lp_build_context *bld, 573 const LLVMValueRef *unswizzled, 574 unsigned swizzle) 575 { 576 switch (swizzle) { 577 case PIPE_SWIZZLE_X: 578 case PIPE_SWIZZLE_Y: 579 case PIPE_SWIZZLE_Z: 580 case PIPE_SWIZZLE_W: 581 return unswizzled[swizzle]; 582 case PIPE_SWIZZLE_0: 583 return bld->zero; 584 case PIPE_SWIZZLE_1: 585 return bld->one; 586 default: 587 assert(0); 588 return bld->undef; 589 } 590 } 591 592 593 /** 594 * Extended swizzle of a SoA vector. 595 * 596 * @param bld building context 597 * @param unswizzled array with the 4 unswizzled values 598 * @param swizzles array of PIPE_SWIZZLE_* 599 * @param swizzled output swizzled values 600 */ 601 void 602 lp_build_swizzle_soa(struct lp_build_context *bld, 603 const LLVMValueRef *unswizzled, 604 const unsigned char swizzles[4], 605 LLVMValueRef *swizzled) 606 { 607 unsigned chan; 608 609 for (chan = 0; chan < 4; ++chan) { 610 swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, 611 swizzles[chan]); 612 } 613 } 614 615 616 /** 617 * Do an extended swizzle of a SoA vector inplace. 618 * 619 * @param bld building context 620 * @param values intput/output array with the 4 values 621 * @param swizzles array of PIPE_SWIZZLE_* 622 */ 623 void 624 lp_build_swizzle_soa_inplace(struct lp_build_context *bld, 625 LLVMValueRef *values, 626 const unsigned char swizzles[4]) 627 { 628 LLVMValueRef unswizzled[4]; 629 unsigned chan; 630 631 for (chan = 0; chan < 4; ++chan) { 632 unswizzled[chan] = values[chan]; 633 } 634 635 lp_build_swizzle_soa(bld, unswizzled, swizzles, values); 636 } 637 638 639 /** 640 * Transpose from AOS <-> SOA 641 * 642 * @param single_type_lp type of pixels 643 * @param src the 4 * n pixel input 644 * @param dst the 4 * n pixel output 645 */ 646 void 647 lp_build_transpose_aos(struct gallivm_state *gallivm, 648 struct lp_type single_type_lp, 649 const LLVMValueRef src[4], 650 LLVMValueRef dst[4]) 651 { 652 struct lp_type double_type_lp = single_type_lp; 653 LLVMTypeRef single_type; 654 LLVMTypeRef double_type; 655 LLVMValueRef t0, t1, t2, t3; 656 657 double_type_lp.length >>= 1; 658 double_type_lp.width <<= 1; 659 660 double_type = lp_build_vec_type(gallivm, double_type_lp); 661 single_type = lp_build_vec_type(gallivm, single_type_lp); 662 663 /* Interleave x, y, z, w -> xy and zw */ 664 t0 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 0); 665 t1 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 0); 666 t2 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 1); 667 t3 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 1); 668 669 /* Cast to double width type for second interleave */ 670 t0 = LLVMBuildBitCast(gallivm->builder, t0, double_type, "t0"); 671 t1 = LLVMBuildBitCast(gallivm->builder, t1, double_type, "t1"); 672 t2 = LLVMBuildBitCast(gallivm->builder, t2, double_type, "t2"); 673 t3 = LLVMBuildBitCast(gallivm->builder, t3, double_type, "t3"); 674 675 /* Interleave xy, zw -> xyzw */ 676 dst[0] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 0); 677 dst[1] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 1); 678 dst[2] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 0); 679 dst[3] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 1); 680 681 /* Cast back to original single width type */ 682 dst[0] = LLVMBuildBitCast(gallivm->builder, dst[0], single_type, "dst0"); 683 dst[1] = LLVMBuildBitCast(gallivm->builder, dst[1], single_type, "dst1"); 684 dst[2] = LLVMBuildBitCast(gallivm->builder, dst[2], single_type, "dst2"); 685 dst[3] = LLVMBuildBitCast(gallivm->builder, dst[3], single_type, "dst3"); 686 } 687 688 689 /** 690 * Transpose from AOS <-> SOA for num_srcs 691 */ 692 void 693 lp_build_transpose_aos_n(struct gallivm_state *gallivm, 694 struct lp_type type, 695 const LLVMValueRef* src, 696 unsigned num_srcs, 697 LLVMValueRef* dst) 698 { 699 switch (num_srcs) { 700 case 1: 701 dst[0] = src[0]; 702 break; 703 704 case 2: 705 { 706 /* Note: we must use a temporary incase src == dst */ 707 LLVMValueRef lo, hi; 708 709 lo = lp_build_interleave2_half(gallivm, type, src[0], src[1], 0); 710 hi = lp_build_interleave2_half(gallivm, type, src[0], src[1], 1); 711 712 dst[0] = lo; 713 dst[1] = hi; 714 break; 715 } 716 717 case 4: 718 lp_build_transpose_aos(gallivm, type, src, dst); 719 break; 720 721 default: 722 assert(0); 723 } 724 } 725 726 727 /** 728 * Pack n-th element of aos values, 729 * pad out to destination size. 730 * i.e. x1 y1 _ _ x2 y2 _ _ will become x1 x2 _ _ 731 */ 732 LLVMValueRef 733 lp_build_pack_aos_scalars(struct gallivm_state *gallivm, 734 struct lp_type src_type, 735 struct lp_type dst_type, 736 const LLVMValueRef src, 737 unsigned channel) 738 { 739 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 740 LLVMValueRef undef = LLVMGetUndef(i32t); 741 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 742 unsigned num_src = src_type.length / 4; 743 unsigned num_dst = dst_type.length; 744 unsigned i; 745 746 assert(num_src <= num_dst); 747 748 for (i = 0; i < num_src; i++) { 749 shuffles[i] = LLVMConstInt(i32t, i * 4 + channel, 0); 750 } 751 for (i = num_src; i < num_dst; i++) { 752 shuffles[i] = undef; 753 } 754 755 if (num_dst == 1) { 756 return LLVMBuildExtractElement(gallivm->builder, src, shuffles[0], ""); 757 } 758 else { 759 return LLVMBuildShuffleVector(gallivm->builder, src, src, 760 LLVMConstVector(shuffles, num_dst), ""); 761 } 762 } 763 764 765 /** 766 * Unpack and broadcast packed aos values consisting of only the 767 * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2 768 */ 769 LLVMValueRef 770 lp_build_unpack_broadcast_aos_scalars(struct gallivm_state *gallivm, 771 struct lp_type src_type, 772 struct lp_type dst_type, 773 const LLVMValueRef src) 774 { 775 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 776 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 777 unsigned num_dst = dst_type.length; 778 unsigned num_src = dst_type.length / 4; 779 unsigned i; 780 781 assert(num_dst / 4 <= src_type.length); 782 783 for (i = 0; i < num_src; i++) { 784 shuffles[i*4] = LLVMConstInt(i32t, i, 0); 785 shuffles[i*4+1] = LLVMConstInt(i32t, i, 0); 786 shuffles[i*4+2] = LLVMConstInt(i32t, i, 0); 787 shuffles[i*4+3] = LLVMConstInt(i32t, i, 0); 788 } 789 790 if (num_src == 1) { 791 return lp_build_extract_broadcast(gallivm, src_type, dst_type, 792 src, shuffles[0]); 793 } 794 else { 795 return LLVMBuildShuffleVector(gallivm->builder, src, src, 796 LLVMConstVector(shuffles, num_dst), ""); 797 } 798 } 799 800