1 /************************************************************************** 2 * 3 * Copyright 2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 20 * USE OR OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * The above copyright notice and this permission notice (including the 23 * next paragraph) shall be included in all copies or substantial portions 24 * of the Software. 25 * 26 **************************************************************************/ 27 28 29 #include "util/u_debug.h" 30 #include "util/u_cpu_detect.h" 31 #include "util/u_math.h" 32 #include "lp_bld_debug.h" 33 #include "lp_bld_const.h" 34 #include "lp_bld_format.h" 35 #include "lp_bld_gather.h" 36 #include "lp_bld_swizzle.h" 37 #include "lp_bld_type.h" 38 #include "lp_bld_init.h" 39 #include "lp_bld_intr.h" 40 #include "lp_bld_pack.h" 41 42 43 /** 44 * Get the pointer to one element from scatter positions in memory. 45 * 46 * @sa lp_build_gather() 47 */ 48 LLVMValueRef 49 lp_build_gather_elem_ptr(struct gallivm_state *gallivm, 50 unsigned length, 51 LLVMValueRef base_ptr, 52 LLVMValueRef offsets, 53 unsigned i) 54 { 55 LLVMValueRef offset; 56 LLVMValueRef ptr; 57 58 assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)); 59 60 if (length == 1) { 61 assert(i == 0); 62 offset = offsets; 63 } else { 64 LLVMValueRef index = lp_build_const_int32(gallivm, i); 65 offset = LLVMBuildExtractElement(gallivm->builder, offsets, index, ""); 66 } 67 68 ptr = LLVMBuildGEP(gallivm->builder, base_ptr, &offset, 1, ""); 69 70 return ptr; 71 } 72 73 74 /** 75 * Gather one element from scatter positions in memory. 76 * 77 * @sa lp_build_gather() 78 */ 79 LLVMValueRef 80 lp_build_gather_elem(struct gallivm_state *gallivm, 81 unsigned length, 82 unsigned src_width, 83 unsigned dst_width, 84 boolean aligned, 85 LLVMValueRef base_ptr, 86 LLVMValueRef offsets, 87 unsigned i, 88 boolean vector_justify) 89 { 90 LLVMTypeRef src_type = LLVMIntTypeInContext(gallivm->context, src_width); 91 LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); 92 LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width); 93 LLVMValueRef ptr; 94 LLVMValueRef res; 95 96 assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)); 97 98 ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i); 99 ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, ""); 100 res = LLVMBuildLoad(gallivm->builder, ptr, ""); 101 102 /* XXX 103 * On some archs we probably really want to avoid having to deal 104 * with alignments lower than 4 bytes (if fetch size is a power of 105 * two >= 32). On x86 it doesn't matter, however. 106 * We should be able to guarantee full alignment for any kind of texture 107 * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch 108 * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends 109 * but I don't think that's quite what we wanted). 110 * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT 111 * looks like a good fit, but it seems this cap bit (and OpenGL) aren't 112 * enforcing what we want (which is what d3d10 does, the offset needs to 113 * be aligned to element size, but GL has bytes regardless of element 114 * size which would only leave us with minimum alignment restriction of 16 115 * which doesn't make much sense if the type isn't 4x32bit). Due to 116 * translation of offsets to first_elem in sampler_views it actually seems 117 * gallium could not do anything else except 16 no matter what... 118 */ 119 if (!aligned) { 120 LLVMSetAlignment(res, 1); 121 } else if (!util_is_power_of_two(src_width)) { 122 /* 123 * Full alignment is impossible, assume the caller really meant 124 * the individual elements were aligned (e.g. 3x32bit format). 125 * And yes the generated code may otherwise crash, llvm will 126 * really assume 128bit alignment with a 96bit fetch (I suppose 127 * that makes sense as it can just assume the upper 32bit to be 128 * whatever). 129 * Maybe the caller should be able to explicitly set this, but 130 * this should cover all the 3-channel formats. 131 */ 132 if (((src_width / 24) * 24 == src_width) && 133 util_is_power_of_two(src_width / 24)) { 134 LLVMSetAlignment(res, src_width / 24); 135 } else { 136 LLVMSetAlignment(res, 1); 137 } 138 } 139 140 assert(src_width <= dst_width); 141 if (src_width < dst_width) { 142 res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, ""); 143 if (vector_justify) { 144 #ifdef PIPE_ARCH_BIG_ENDIAN 145 res = LLVMBuildShl(gallivm->builder, res, 146 LLVMConstInt(dst_elem_type, dst_width - src_width, 0), ""); 147 #endif 148 } 149 } 150 151 return res; 152 } 153 154 155 /** 156 * Gather one element from scatter positions in memory. 157 * Nearly the same as above, however the individual elements 158 * may be vectors themselves, and fetches may be float type. 159 * Can also do pad vector instead of ZExt. 160 * 161 * @sa lp_build_gather() 162 */ 163 static LLVMValueRef 164 lp_build_gather_elem_vec(struct gallivm_state *gallivm, 165 unsigned length, 166 unsigned src_width, 167 LLVMTypeRef src_type, 168 struct lp_type dst_type, 169 boolean aligned, 170 LLVMValueRef base_ptr, 171 LLVMValueRef offsets, 172 unsigned i, 173 boolean vector_justify) 174 { 175 LLVMValueRef ptr, res; 176 LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); 177 assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)); 178 179 ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i); 180 ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, ""); 181 res = LLVMBuildLoad(gallivm->builder, ptr, ""); 182 183 /* XXX 184 * On some archs we probably really want to avoid having to deal 185 * with alignments lower than 4 bytes (if fetch size is a power of 186 * two >= 32). On x86 it doesn't matter, however. 187 * We should be able to guarantee full alignment for any kind of texture 188 * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch 189 * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends 190 * but I don't think that's quite what we wanted). 191 * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT 192 * looks like a good fit, but it seems this cap bit (and OpenGL) aren't 193 * enforcing what we want (which is what d3d10 does, the offset needs to 194 * be aligned to element size, but GL has bytes regardless of element 195 * size which would only leave us with minimum alignment restriction of 16 196 * which doesn't make much sense if the type isn't 4x32bit). Due to 197 * translation of offsets to first_elem in sampler_views it actually seems 198 * gallium could not do anything else except 16 no matter what... 199 */ 200 if (!aligned) { 201 LLVMSetAlignment(res, 1); 202 } else if (!util_is_power_of_two(src_width)) { 203 /* 204 * Full alignment is impossible, assume the caller really meant 205 * the individual elements were aligned (e.g. 3x32bit format). 206 * And yes the generated code may otherwise crash, llvm will 207 * really assume 128bit alignment with a 96bit fetch (I suppose 208 * that makes sense as it can just assume the upper 32bit to be 209 * whatever). 210 * Maybe the caller should be able to explicitly set this, but 211 * this should cover all the 3-channel formats. 212 */ 213 if (((src_width / 24) * 24 == src_width) && 214 util_is_power_of_two(src_width / 24)) { 215 LLVMSetAlignment(res, src_width / 24); 216 } else { 217 LLVMSetAlignment(res, 1); 218 } 219 } 220 221 assert(src_width <= dst_type.width * dst_type.length); 222 if (src_width < dst_type.width * dst_type.length) { 223 if (dst_type.length > 1) { 224 res = lp_build_pad_vector(gallivm, res, dst_type.length); 225 /* 226 * vector_justify hopefully a non-issue since we only deal 227 * with src_width >= 32 here? 228 */ 229 } else { 230 LLVMTypeRef dst_elem_type = lp_build_vec_type(gallivm, dst_type); 231 232 /* 233 * Only valid if src_ptr_type is int type... 234 */ 235 res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, ""); 236 237 if (vector_justify) { 238 #ifdef PIPE_ARCH_BIG_ENDIAN 239 res = LLVMBuildShl(gallivm->builder, res, 240 LLVMConstInt(dst_elem_type, 241 dst_type.width - src_width, 0), ""); 242 #endif 243 } 244 } 245 } 246 return res; 247 } 248 249 250 251 252 static LLVMValueRef 253 lp_build_gather_avx2(struct gallivm_state *gallivm, 254 unsigned length, 255 unsigned src_width, 256 struct lp_type dst_type, 257 LLVMValueRef base_ptr, 258 LLVMValueRef offsets) 259 { 260 LLVMBuilderRef builder = gallivm->builder; 261 LLVMTypeRef src_type, src_vec_type; 262 LLVMValueRef res; 263 struct lp_type res_type = dst_type; 264 res_type.length *= length; 265 266 if (dst_type.floating) { 267 src_type = src_width == 64 ? LLVMDoubleTypeInContext(gallivm->context) : 268 LLVMFloatTypeInContext(gallivm->context); 269 } else { 270 src_type = LLVMIntTypeInContext(gallivm->context, src_width); 271 } 272 src_vec_type = LLVMVectorType(src_type, length); 273 274 /* XXX should allow hw scaling (can handle i8, i16, i32, i64 for x86) */ 275 assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)); 276 277 if (0) { 278 /* 279 * XXX: This will cause LLVM pre 3.7 to hang; it works on LLVM 3.8 but 280 * will not use the AVX2 gather instrinsics (even with llvm 4.0), at 281 * least with Haswell. See 282 * http://lists.llvm.org/pipermail/llvm-dev/2016-January/094448.html 283 * And the generated code doing the emulation is quite a bit worse 284 * than what we get by doing it ourselves too. 285 */ 286 LLVMTypeRef i32_type = LLVMIntTypeInContext(gallivm->context, 32); 287 LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length); 288 LLVMTypeRef i1_type = LLVMIntTypeInContext(gallivm->context, 1); 289 LLVMTypeRef i1_vec_type = LLVMVectorType(i1_type, length); 290 LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0); 291 LLVMValueRef src_ptr; 292 293 base_ptr = LLVMBuildBitCast(builder, base_ptr, src_ptr_type, ""); 294 295 /* Rescale offsets from bytes to elements */ 296 LLVMValueRef scale = LLVMConstInt(i32_type, src_width/8, 0); 297 scale = lp_build_broadcast(gallivm, i32_vec_type, scale); 298 assert(LLVMTypeOf(offsets) == i32_vec_type); 299 offsets = LLVMBuildSDiv(builder, offsets, scale, ""); 300 301 src_ptr = LLVMBuildGEP(builder, base_ptr, &offsets, 1, "vector-gep"); 302 303 char intrinsic[64]; 304 util_snprintf(intrinsic, sizeof intrinsic, "llvm.masked.gather.v%u%s%u", 305 length, dst_type.floating ? "f" : "i", src_width); 306 LLVMValueRef alignment = LLVMConstInt(i32_type, src_width/8, 0); 307 LLVMValueRef mask = LLVMConstAllOnes(i1_vec_type); 308 LLVMValueRef passthru = LLVMGetUndef(src_vec_type); 309 310 LLVMValueRef args[] = { src_ptr, alignment, mask, passthru }; 311 312 res = lp_build_intrinsic(builder, intrinsic, src_vec_type, args, 4, 0); 313 } else { 314 LLVMTypeRef i8_type = LLVMIntTypeInContext(gallivm->context, 8); 315 const char *intrinsic = NULL; 316 unsigned l_idx = 0; 317 318 assert(src_width == 32 || src_width == 64); 319 if (src_width == 32) { 320 assert(length == 4 || length == 8); 321 } else { 322 assert(length == 2 || length == 4); 323 } 324 325 static const char *intrinsics[2][2][2] = { 326 327 {{"llvm.x86.avx2.gather.d.d", 328 "llvm.x86.avx2.gather.d.d.256"}, 329 {"llvm.x86.avx2.gather.d.q", 330 "llvm.x86.avx2.gather.d.q.256"}}, 331 332 {{"llvm.x86.avx2.gather.d.ps", 333 "llvm.x86.avx2.gather.d.ps.256"}, 334 {"llvm.x86.avx2.gather.d.pd", 335 "llvm.x86.avx2.gather.d.pd.256"}}, 336 }; 337 338 if ((src_width == 32 && length == 8) || 339 (src_width == 64 && length == 4)) { 340 l_idx = 1; 341 } 342 intrinsic = intrinsics[dst_type.floating][src_width == 64][l_idx]; 343 344 LLVMValueRef passthru = LLVMGetUndef(src_vec_type); 345 LLVMValueRef mask = LLVMConstAllOnes(src_vec_type); 346 mask = LLVMConstBitCast(mask, src_vec_type); 347 LLVMValueRef scale = LLVMConstInt(i8_type, 1, 0); 348 349 LLVMValueRef args[] = { passthru, base_ptr, offsets, mask, scale }; 350 351 res = lp_build_intrinsic(builder, intrinsic, src_vec_type, args, 5, 0); 352 } 353 res = LLVMBuildBitCast(builder, res, lp_build_vec_type(gallivm, res_type), ""); 354 355 return res; 356 } 357 358 359 /** 360 * Gather elements from scatter positions in memory into a single vector. 361 * Use for fetching texels from a texture. 362 * For SSE, typical values are length=4, src_width=32, dst_width=32. 363 * 364 * When src_width < dst_width, the return value can be justified in 365 * one of two ways: 366 * "integer justification" is used when the caller treats the destination 367 * as a packed integer bitmask, as described by the channels' "shift" and 368 * "width" fields; 369 * "vector justification" is used when the caller casts the destination 370 * to a vector and needs channel X to be in vector element 0. 371 * 372 * @param length length of the offsets 373 * @param src_width src element width in bits 374 * @param dst_type result element type (src will be expanded to fit, 375 * but truncation is not allowed) 376 * (this may be a vector, must be pot sized) 377 * @param aligned whether the data is guaranteed to be aligned (to src_width) 378 * @param base_ptr base pointer, needs to be a i8 pointer type. 379 * @param offsets vector with offsets 380 * @param vector_justify select vector rather than integer justification 381 */ 382 LLVMValueRef 383 lp_build_gather(struct gallivm_state *gallivm, 384 unsigned length, 385 unsigned src_width, 386 struct lp_type dst_type, 387 boolean aligned, 388 LLVMValueRef base_ptr, 389 LLVMValueRef offsets, 390 boolean vector_justify) 391 { 392 LLVMValueRef res; 393 boolean need_expansion = src_width < dst_type.width * dst_type.length; 394 boolean vec_fetch; 395 struct lp_type fetch_type, fetch_dst_type; 396 LLVMTypeRef src_type; 397 398 assert(src_width <= dst_type.width * dst_type.length); 399 400 /* 401 * This is quite a mess... 402 * Figure out if the fetch should be done as: 403 * a) scalar or vector 404 * b) float or int 405 * 406 * As an example, for a 96bit fetch expanded into 4x32bit, it is better 407 * to use (3x32bit) vector type (then pad the vector). Otherwise, the 408 * zext will cause extra instructions. 409 * However, the same isn't true for 3x16bit (the codegen for that is 410 * completely worthless on x86 simd, and for 3x8bit is is way worse 411 * still, don't try that... (To get really good code out of llvm for 412 * these cases, the only way is to decompose the fetches manually 413 * into 1x32bit/1x16bit, or 1x16/1x8bit respectively, although the latter 414 * case requires sse41, otherwise simple scalar zext is way better. 415 * But probably not important enough, so don't bother.) 416 * Also, we try to honor the floating bit of destination (but isn't 417 * possible if caller asks for instance for 2x32bit dst_type with 418 * 48bit fetch - the idea would be to use 3x16bit fetch, pad and 419 * cast to 2x32f type, so the fetch is always int and on top of that 420 * we avoid the vec pad and use scalar zext due the above mentioned 421 * issue). 422 * Note this is optimized for x86 sse2 and up backend. Could be tweaked 423 * for other archs if necessary... 424 */ 425 if (((src_width % 32) == 0) && ((src_width % dst_type.width) == 0) && 426 (dst_type.length > 1)) { 427 /* use vector fetch (if dst_type is vector) */ 428 vec_fetch = TRUE; 429 if (dst_type.floating) { 430 fetch_type = lp_type_float_vec(dst_type.width, src_width); 431 } else { 432 fetch_type = lp_type_int_vec(dst_type.width, src_width); 433 } 434 /* intentionally not using lp_build_vec_type here */ 435 src_type = LLVMVectorType(lp_build_elem_type(gallivm, fetch_type), 436 fetch_type.length); 437 fetch_dst_type = fetch_type; 438 fetch_dst_type.length = dst_type.length; 439 } else { 440 /* use scalar fetch */ 441 vec_fetch = FALSE; 442 if (dst_type.floating && ((src_width == 32) || (src_width == 64))) { 443 fetch_type = lp_type_float(src_width); 444 } else { 445 fetch_type = lp_type_int(src_width); 446 } 447 src_type = lp_build_vec_type(gallivm, fetch_type); 448 fetch_dst_type = fetch_type; 449 fetch_dst_type.width = dst_type.width * dst_type.length; 450 } 451 452 if (length == 1) { 453 /* Scalar */ 454 res = lp_build_gather_elem_vec(gallivm, length, 455 src_width, src_type, fetch_dst_type, 456 aligned, base_ptr, offsets, 0, 457 vector_justify); 458 return LLVMBuildBitCast(gallivm->builder, res, 459 lp_build_vec_type(gallivm, dst_type), ""); 460 /* 461 * Excluding expansion from these paths because if you need it for 462 * 32bit/64bit fetches you're doing it wrong (this is gather, not 463 * conversion) and it would be awkward for floats. 464 */ 465 } else if (util_cpu_caps.has_avx2 && !need_expansion && 466 src_width == 32 && (length == 4 || length == 8)) { 467 return lp_build_gather_avx2(gallivm, length, src_width, dst_type, 468 base_ptr, offsets); 469 /* 470 * This looks bad on paper wrt throughtput/latency on Haswell. 471 * Even on Broadwell it doesn't look stellar. 472 * Albeit no measurements were done (but tested to work). 473 * Should definitely enable on Skylake. 474 * (In general, should be more of a win if the fetch is 256bit wide - 475 * this is true for the 32bit case above too.) 476 */ 477 } else if (0 && util_cpu_caps.has_avx2 && !need_expansion && 478 src_width == 64 && (length == 2 || length == 4)) { 479 return lp_build_gather_avx2(gallivm, length, src_width, dst_type, 480 base_ptr, offsets); 481 } else { 482 /* Vector */ 483 484 LLVMValueRef elems[LP_MAX_VECTOR_WIDTH / 8]; 485 unsigned i; 486 boolean vec_zext = FALSE; 487 struct lp_type res_type, gather_res_type; 488 LLVMTypeRef res_t, gather_res_t; 489 490 res_type = fetch_dst_type; 491 res_type.length *= length; 492 gather_res_type = res_type; 493 494 if (src_width == 16 && dst_type.width == 32 && dst_type.length == 1) { 495 /* 496 * Note that llvm is never able to optimize zext/insert combos 497 * directly (i.e. zero the simd reg, then place the elements into 498 * the appropriate place directly). (I think this has to do with 499 * scalar/vector transition.) And scalar 16->32bit zext simd loads 500 * aren't possible (instead loading to scalar reg first). 501 * No idea about other archs... 502 * We could do this manually, but instead we just use a vector 503 * zext, which is simple enough (and, in fact, llvm might optimize 504 * this away). 505 * (We're not trying that with other bit widths as that might not be 506 * easier, in particular with 8 bit values at least with only sse2.) 507 */ 508 assert(vec_fetch == FALSE); 509 gather_res_type.width /= 2; 510 fetch_dst_type = fetch_type; 511 src_type = lp_build_vec_type(gallivm, fetch_type); 512 vec_zext = TRUE; 513 } 514 res_t = lp_build_vec_type(gallivm, res_type); 515 gather_res_t = lp_build_vec_type(gallivm, gather_res_type); 516 res = LLVMGetUndef(gather_res_t); 517 for (i = 0; i < length; ++i) { 518 LLVMValueRef index = lp_build_const_int32(gallivm, i); 519 elems[i] = lp_build_gather_elem_vec(gallivm, length, 520 src_width, src_type, fetch_dst_type, 521 aligned, base_ptr, offsets, i, 522 vector_justify); 523 if (!vec_fetch) { 524 res = LLVMBuildInsertElement(gallivm->builder, res, elems[i], index, ""); 525 } 526 } 527 if (vec_zext) { 528 res = LLVMBuildZExt(gallivm->builder, res, res_t, ""); 529 if (vector_justify) { 530 #ifdef PIPE_ARCH_BIG_ENDIAN 531 unsigned sv = dst_type.width - src_width; 532 res = LLVMBuildShl(gallivm->builder, res, 533 lp_build_const_int_vec(gallivm, res_type, sv), ""); 534 #endif 535 } 536 } 537 if (vec_fetch) { 538 /* 539 * Do bitcast now otherwise llvm might get some funny ideas wrt 540 * float/int types... 541 */ 542 for (i = 0; i < length; i++) { 543 elems[i] = LLVMBuildBitCast(gallivm->builder, elems[i], 544 lp_build_vec_type(gallivm, dst_type), ""); 545 } 546 res = lp_build_concat(gallivm, elems, dst_type, length); 547 } else { 548 struct lp_type really_final_type = dst_type; 549 assert(res_type.length * res_type.width == 550 dst_type.length * dst_type.width * length); 551 really_final_type.length *= length; 552 res = LLVMBuildBitCast(gallivm->builder, res, 553 lp_build_vec_type(gallivm, really_final_type), ""); 554 } 555 } 556 557 return res; 558 } 559 560 LLVMValueRef 561 lp_build_gather_values(struct gallivm_state * gallivm, 562 LLVMValueRef * values, 563 unsigned value_count) 564 { 565 LLVMTypeRef vec_type = LLVMVectorType(LLVMTypeOf(values[0]), value_count); 566 LLVMBuilderRef builder = gallivm->builder; 567 LLVMValueRef vec = LLVMGetUndef(vec_type); 568 unsigned i; 569 570 for (i = 0; i < value_count; i++) { 571 LLVMValueRef index = lp_build_const_int32(gallivm, i); 572 vec = LLVMBuildInsertElement(builder, vec, values[i], index, ""); 573 } 574 return vec; 575 } 576