1 /************************************************************************** 2 * 3 * Copyright 2010 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 /** 29 * @file 30 * Texture sampling -- AoS. 31 * 32 * @author Jose Fonseca <jfonseca (at) vmware.com> 33 * @author Brian Paul <brianp (at) vmware.com> 34 */ 35 36 #include "pipe/p_defines.h" 37 #include "pipe/p_state.h" 38 #include "util/u_debug.h" 39 #include "util/u_dump.h" 40 #include "util/u_memory.h" 41 #include "util/u_math.h" 42 #include "util/u_format.h" 43 #include "util/u_cpu_detect.h" 44 #include "lp_bld_debug.h" 45 #include "lp_bld_type.h" 46 #include "lp_bld_const.h" 47 #include "lp_bld_conv.h" 48 #include "lp_bld_arit.h" 49 #include "lp_bld_bitarit.h" 50 #include "lp_bld_logic.h" 51 #include "lp_bld_swizzle.h" 52 #include "lp_bld_pack.h" 53 #include "lp_bld_flow.h" 54 #include "lp_bld_gather.h" 55 #include "lp_bld_format.h" 56 #include "lp_bld_init.h" 57 #include "lp_bld_sample.h" 58 #include "lp_bld_sample_aos.h" 59 #include "lp_bld_quad.h" 60 61 62 /** 63 * Build LLVM code for texture coord wrapping, for nearest filtering, 64 * for scaled integer texcoords. 65 * \param block_length is the length of the pixel block along the 66 * coordinate axis 67 * \param coord the incoming texcoord (s,t,r or q) scaled to the texture size 68 * \param length the texture size along one dimension 69 * \param stride pixel stride along the coordinate axis (in bytes) 70 * \param is_pot if TRUE, length is a power of two 71 * \param wrap_mode one of PIPE_TEX_WRAP_x 72 * \param out_offset byte offset for the wrapped coordinate 73 * \param out_i resulting sub-block pixel coordinate for coord0 74 */ 75 static void 76 lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, 77 unsigned block_length, 78 LLVMValueRef coord, 79 LLVMValueRef coord_f, 80 LLVMValueRef length, 81 LLVMValueRef stride, 82 boolean is_pot, 83 unsigned wrap_mode, 84 LLVMValueRef *out_offset, 85 LLVMValueRef *out_i) 86 { 87 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 88 LLVMBuilderRef builder = bld->gallivm->builder; 89 LLVMValueRef length_minus_one; 90 91 length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); 92 93 switch(wrap_mode) { 94 case PIPE_TEX_WRAP_REPEAT: 95 if(is_pot) 96 coord = LLVMBuildAnd(builder, coord, length_minus_one, ""); 97 else { 98 struct lp_build_context *coord_bld = &bld->coord_bld; 99 LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); 100 coord = lp_build_fract_safe(coord_bld, coord_f); 101 coord = lp_build_mul(coord_bld, coord, length_f); 102 coord = lp_build_itrunc(coord_bld, coord); 103 } 104 break; 105 106 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 107 coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); 108 coord = lp_build_min(int_coord_bld, coord, length_minus_one); 109 break; 110 111 case PIPE_TEX_WRAP_CLAMP: 112 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 113 case PIPE_TEX_WRAP_MIRROR_REPEAT: 114 case PIPE_TEX_WRAP_MIRROR_CLAMP: 115 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 116 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 117 default: 118 assert(0); 119 } 120 121 lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride, 122 out_offset, out_i); 123 } 124 125 126 /** 127 * Build LLVM code for texture coord wrapping, for nearest filtering, 128 * for float texcoords. 129 * \param coord the incoming texcoord (s,t,r or q) 130 * \param length the texture size along one dimension 131 * \param is_pot if TRUE, length is a power of two 132 * \param wrap_mode one of PIPE_TEX_WRAP_x 133 * \param icoord the texcoord after wrapping, as int 134 */ 135 static void 136 lp_build_sample_wrap_nearest_float(struct lp_build_sample_context *bld, 137 LLVMValueRef coord, 138 LLVMValueRef length, 139 boolean is_pot, 140 unsigned wrap_mode, 141 LLVMValueRef *icoord) 142 { 143 struct lp_build_context *coord_bld = &bld->coord_bld; 144 LLVMValueRef length_minus_one; 145 146 switch(wrap_mode) { 147 case PIPE_TEX_WRAP_REPEAT: 148 /* take fraction, unnormalize */ 149 coord = lp_build_fract_safe(coord_bld, coord); 150 coord = lp_build_mul(coord_bld, coord, length); 151 *icoord = lp_build_itrunc(coord_bld, coord); 152 break; 153 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 154 length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one); 155 if (bld->static_state->normalized_coords) { 156 /* scale coord to length */ 157 coord = lp_build_mul(coord_bld, coord, length); 158 } 159 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, 160 length_minus_one); 161 *icoord = lp_build_itrunc(coord_bld, coord); 162 break; 163 164 case PIPE_TEX_WRAP_CLAMP: 165 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 166 case PIPE_TEX_WRAP_MIRROR_REPEAT: 167 case PIPE_TEX_WRAP_MIRROR_CLAMP: 168 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 169 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 170 default: 171 assert(0); 172 } 173 } 174 175 176 /** 177 * Build LLVM code for texture coord wrapping, for linear filtering, 178 * for scaled integer texcoords. 179 * \param block_length is the length of the pixel block along the 180 * coordinate axis 181 * \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size 182 * \param length the texture size along one dimension 183 * \param stride pixel stride along the coordinate axis (in bytes) 184 * \param is_pot if TRUE, length is a power of two 185 * \param wrap_mode one of PIPE_TEX_WRAP_x 186 * \param offset0 resulting relative offset for coord0 187 * \param offset1 resulting relative offset for coord0 + 1 188 * \param i0 resulting sub-block pixel coordinate for coord0 189 * \param i1 resulting sub-block pixel coordinate for coord0 + 1 190 */ 191 static void 192 lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, 193 unsigned block_length, 194 LLVMValueRef coord0, 195 LLVMValueRef *weight_i, 196 LLVMValueRef coord_f, 197 LLVMValueRef length, 198 LLVMValueRef stride, 199 boolean is_pot, 200 unsigned wrap_mode, 201 LLVMValueRef *offset0, 202 LLVMValueRef *offset1, 203 LLVMValueRef *i0, 204 LLVMValueRef *i1) 205 { 206 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 207 LLVMBuilderRef builder = bld->gallivm->builder; 208 LLVMValueRef length_minus_one; 209 LLVMValueRef lmask, umask, mask; 210 211 /* 212 * If the pixel block covers more than one pixel then there is no easy 213 * way to calculate offset1 relative to offset0. Instead, compute them 214 * independently. Otherwise, try to compute offset0 and offset1 with 215 * a single stride multiplication. 216 */ 217 218 length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); 219 220 if (block_length != 1) { 221 LLVMValueRef coord1; 222 switch(wrap_mode) { 223 case PIPE_TEX_WRAP_REPEAT: 224 if (is_pot) { 225 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 226 coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, ""); 227 coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, ""); 228 } 229 else { 230 LLVMValueRef mask; 231 LLVMValueRef weight; 232 LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length); 233 lp_build_coord_repeat_npot_linear(bld, coord_f, 234 length, length_f, 235 &coord0, &weight); 236 mask = lp_build_compare(bld->gallivm, int_coord_bld->type, 237 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); 238 coord1 = LLVMBuildAnd(builder, 239 lp_build_add(int_coord_bld, coord0, 240 int_coord_bld->one), 241 mask, ""); 242 weight = lp_build_mul_imm(&bld->coord_bld, weight, 256); 243 *weight_i = lp_build_itrunc(&bld->coord_bld, weight); 244 } 245 break; 246 247 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 248 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 249 coord0 = lp_build_clamp(int_coord_bld, coord0, int_coord_bld->zero, 250 length_minus_one); 251 coord1 = lp_build_clamp(int_coord_bld, coord1, int_coord_bld->zero, 252 length_minus_one); 253 break; 254 255 case PIPE_TEX_WRAP_CLAMP: 256 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 257 case PIPE_TEX_WRAP_MIRROR_REPEAT: 258 case PIPE_TEX_WRAP_MIRROR_CLAMP: 259 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 260 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 261 default: 262 assert(0); 263 coord0 = int_coord_bld->zero; 264 coord1 = int_coord_bld->zero; 265 break; 266 } 267 lp_build_sample_partial_offset(int_coord_bld, block_length, coord0, stride, 268 offset0, i0); 269 lp_build_sample_partial_offset(int_coord_bld, block_length, coord1, stride, 270 offset1, i1); 271 return; 272 } 273 274 *i0 = int_coord_bld->zero; 275 *i1 = int_coord_bld->zero; 276 277 switch(wrap_mode) { 278 case PIPE_TEX_WRAP_REPEAT: 279 if (is_pot) { 280 coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, ""); 281 } 282 else { 283 LLVMValueRef weight; 284 LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length); 285 lp_build_coord_repeat_npot_linear(bld, coord_f, 286 length, length_f, 287 &coord0, &weight); 288 weight = lp_build_mul_imm(&bld->coord_bld, weight, 256); 289 *weight_i = lp_build_itrunc(&bld->coord_bld, weight); 290 } 291 292 mask = lp_build_compare(bld->gallivm, int_coord_bld->type, 293 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); 294 295 *offset0 = lp_build_mul(int_coord_bld, coord0, stride); 296 *offset1 = LLVMBuildAnd(builder, 297 lp_build_add(int_coord_bld, *offset0, stride), 298 mask, ""); 299 break; 300 301 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 302 /* XXX this might be slower than the separate path 303 * on some newer cpus. With sse41 this is 8 instructions vs. 7 304 * - at least on SNB this is almost certainly slower since 305 * min/max are cheaper than selects, and the muls aren't bad. 306 */ 307 lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, 308 PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero); 309 umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, 310 PIPE_FUNC_LESS, coord0, length_minus_one); 311 312 coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero); 313 coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one); 314 315 mask = LLVMBuildAnd(builder, lmask, umask, ""); 316 317 *offset0 = lp_build_mul(int_coord_bld, coord0, stride); 318 *offset1 = lp_build_add(int_coord_bld, 319 *offset0, 320 LLVMBuildAnd(builder, stride, mask, "")); 321 break; 322 323 case PIPE_TEX_WRAP_CLAMP: 324 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 325 case PIPE_TEX_WRAP_MIRROR_REPEAT: 326 case PIPE_TEX_WRAP_MIRROR_CLAMP: 327 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 328 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 329 default: 330 assert(0); 331 *offset0 = int_coord_bld->zero; 332 *offset1 = int_coord_bld->zero; 333 break; 334 } 335 } 336 337 338 /** 339 * Build LLVM code for texture coord wrapping, for linear filtering, 340 * for float texcoords. 341 * \param block_length is the length of the pixel block along the 342 * coordinate axis 343 * \param coord the incoming texcoord (s,t,r or q) 344 * \param length the texture size along one dimension 345 * \param is_pot if TRUE, length is a power of two 346 * \param wrap_mode one of PIPE_TEX_WRAP_x 347 * \param coord0 the first texcoord after wrapping, as int 348 * \param coord1 the second texcoord after wrapping, as int 349 * \param weight the filter weight as int (0-255) 350 * \param force_nearest if this coord actually uses nearest filtering 351 */ 352 static void 353 lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld, 354 unsigned block_length, 355 LLVMValueRef coord, 356 LLVMValueRef length, 357 boolean is_pot, 358 unsigned wrap_mode, 359 LLVMValueRef *coord0, 360 LLVMValueRef *coord1, 361 LLVMValueRef *weight, 362 unsigned force_nearest) 363 { 364 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 365 struct lp_build_context *coord_bld = &bld->coord_bld; 366 LLVMBuilderRef builder = bld->gallivm->builder; 367 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5); 368 LLVMValueRef length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one); 369 370 switch(wrap_mode) { 371 case PIPE_TEX_WRAP_REPEAT: 372 if (is_pot) { 373 /* mul by size and subtract 0.5 */ 374 coord = lp_build_mul(coord_bld, coord, length); 375 if (!force_nearest) 376 coord = lp_build_sub(coord_bld, coord, half); 377 *coord1 = lp_build_add(coord_bld, coord, coord_bld->one); 378 /* convert to int, compute lerp weight */ 379 lp_build_ifloor_fract(coord_bld, coord, coord0, weight); 380 *coord1 = lp_build_ifloor(coord_bld, *coord1); 381 /* repeat wrap */ 382 length_minus_one = lp_build_itrunc(coord_bld, length_minus_one); 383 *coord0 = LLVMBuildAnd(builder, *coord0, length_minus_one, ""); 384 *coord1 = LLVMBuildAnd(builder, *coord1, length_minus_one, ""); 385 } 386 else { 387 LLVMValueRef mask; 388 /* wrap with normalized floats is just fract */ 389 coord = lp_build_fract(coord_bld, coord); 390 /* unnormalize */ 391 coord = lp_build_mul(coord_bld, coord, length); 392 /* 393 * we avoided the 0.5/length division, have to fix up wrong 394 * edge cases with selects 395 */ 396 *coord1 = lp_build_add(coord_bld, coord, half); 397 coord = lp_build_sub(coord_bld, coord, half); 398 *weight = lp_build_fract(coord_bld, coord); 399 mask = lp_build_compare(coord_bld->gallivm, coord_bld->type, 400 PIPE_FUNC_LESS, coord, coord_bld->zero); 401 *coord0 = lp_build_select(coord_bld, mask, length_minus_one, coord); 402 *coord0 = lp_build_itrunc(coord_bld, *coord0); 403 mask = lp_build_compare(coord_bld->gallivm, coord_bld->type, 404 PIPE_FUNC_LESS, *coord1, length); 405 *coord1 = lp_build_select(coord_bld, mask, *coord1, coord_bld->zero); 406 *coord1 = lp_build_itrunc(coord_bld, *coord1); 407 } 408 break; 409 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 410 if (bld->static_state->normalized_coords) { 411 /* mul by tex size */ 412 coord = lp_build_mul(coord_bld, coord, length); 413 } 414 /* subtract 0.5 */ 415 if (!force_nearest) { 416 coord = lp_build_sub(coord_bld, coord, half); 417 } 418 /* clamp to [0, length - 1] */ 419 coord = lp_build_min(coord_bld, coord, length_minus_one); 420 coord = lp_build_max(coord_bld, coord, coord_bld->zero); 421 *coord1 = lp_build_add(coord_bld, coord, coord_bld->one); 422 /* convert to int, compute lerp weight */ 423 lp_build_ifloor_fract(coord_bld, coord, coord0, weight); 424 /* coord1 = min(coord1, length-1) */ 425 *coord1 = lp_build_min(coord_bld, *coord1, length_minus_one); 426 *coord1 = lp_build_itrunc(coord_bld, *coord1); 427 break; 428 default: 429 assert(0); 430 *coord0 = int_coord_bld->zero; 431 *coord1 = int_coord_bld->zero; 432 *weight = coord_bld->zero; 433 break; 434 } 435 *weight = lp_build_mul_imm(coord_bld, *weight, 256); 436 *weight = lp_build_itrunc(coord_bld, *weight); 437 return; 438 } 439 440 441 /** 442 * Fetch texels for image with nearest sampling. 443 * Return filtered color as two vectors of 16-bit fixed point values. 444 */ 445 static void 446 lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld, 447 LLVMValueRef data_ptr, 448 LLVMValueRef offset, 449 LLVMValueRef x_subcoord, 450 LLVMValueRef y_subcoord, 451 LLVMValueRef *colors_lo, 452 LLVMValueRef *colors_hi) 453 { 454 /* 455 * Fetch the pixels as 4 x 32bit (rgba order might differ): 456 * 457 * rgba0 rgba1 rgba2 rgba3 458 * 459 * bit cast them into 16 x u8 460 * 461 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3 462 * 463 * unpack them into two 8 x i16: 464 * 465 * r0 g0 b0 a0 r1 g1 b1 a1 466 * r2 g2 b2 a2 r3 g3 b3 a3 467 * 468 * The higher 8 bits of the resulting elements will be zero. 469 */ 470 LLVMBuilderRef builder = bld->gallivm->builder; 471 LLVMValueRef rgba8; 472 struct lp_build_context h16, u8n; 473 LLVMTypeRef u8n_vec_type; 474 475 lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16, bld->vector_width)); 476 lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width)); 477 u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type); 478 479 if (util_format_is_rgba8_variant(bld->format_desc)) { 480 /* 481 * Given the format is a rgba8, just read the pixels as is, 482 * without any swizzling. Swizzling will be done later. 483 */ 484 rgba8 = lp_build_gather(bld->gallivm, 485 bld->texel_type.length, 486 bld->format_desc->block.bits, 487 bld->texel_type.width, 488 data_ptr, offset); 489 490 rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, ""); 491 } 492 else { 493 rgba8 = lp_build_fetch_rgba_aos(bld->gallivm, 494 bld->format_desc, 495 u8n.type, 496 data_ptr, offset, 497 x_subcoord, 498 y_subcoord); 499 } 500 501 /* Expand one 4*rgba8 to two 2*rgba16 */ 502 lp_build_unpack2(bld->gallivm, u8n.type, h16.type, 503 rgba8, 504 colors_lo, colors_hi); 505 } 506 507 508 /** 509 * Sample a single texture image with nearest sampling. 510 * If sampling a cube texture, r = cube face in [0,5]. 511 * Return filtered color as two vectors of 16-bit fixed point values. 512 */ 513 static void 514 lp_build_sample_image_nearest(struct lp_build_sample_context *bld, 515 LLVMValueRef int_size, 516 LLVMValueRef row_stride_vec, 517 LLVMValueRef img_stride_vec, 518 LLVMValueRef data_ptr, 519 LLVMValueRef s, 520 LLVMValueRef t, 521 LLVMValueRef r, 522 LLVMValueRef *colors_lo, 523 LLVMValueRef *colors_hi) 524 { 525 const unsigned dims = bld->dims; 526 LLVMBuilderRef builder = bld->gallivm->builder; 527 struct lp_build_context i32; 528 LLVMTypeRef i32_vec_type; 529 LLVMValueRef i32_c8; 530 LLVMValueRef width_vec, height_vec, depth_vec; 531 LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL; 532 LLVMValueRef s_float, t_float = NULL, r_float = NULL; 533 LLVMValueRef x_stride; 534 LLVMValueRef x_offset, offset; 535 LLVMValueRef x_subcoord, y_subcoord, z_subcoord; 536 537 lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width)); 538 539 i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type); 540 541 lp_build_extract_image_sizes(bld, 542 bld->int_size_type, 543 bld->int_coord_type, 544 int_size, 545 &width_vec, 546 &height_vec, 547 &depth_vec); 548 549 s_float = s; t_float = t; r_float = r; 550 551 if (bld->static_state->normalized_coords) { 552 LLVMValueRef scaled_size; 553 LLVMValueRef flt_size; 554 555 /* scale size by 256 (8 fractional bits) */ 556 scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8); 557 558 flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size); 559 560 lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); 561 } 562 else { 563 /* scale coords by 256 (8 fractional bits) */ 564 s = lp_build_mul_imm(&bld->coord_bld, s, 256); 565 if (dims >= 2) 566 t = lp_build_mul_imm(&bld->coord_bld, t, 256); 567 if (dims >= 3) 568 r = lp_build_mul_imm(&bld->coord_bld, r, 256); 569 } 570 571 /* convert float to int */ 572 s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); 573 if (dims >= 2) 574 t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); 575 if (dims >= 3) 576 r = LLVMBuildFPToSI(builder, r, i32_vec_type, ""); 577 578 /* compute floor (shift right 8) */ 579 i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8); 580 s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); 581 if (dims >= 2) 582 t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); 583 if (dims >= 3) 584 r_ipart = LLVMBuildAShr(builder, r, i32_c8, ""); 585 586 /* get pixel, row, image strides */ 587 x_stride = lp_build_const_vec(bld->gallivm, 588 bld->int_coord_bld.type, 589 bld->format_desc->block.bits/8); 590 591 /* Do texcoord wrapping, compute texel offset */ 592 lp_build_sample_wrap_nearest_int(bld, 593 bld->format_desc->block.width, 594 s_ipart, s_float, 595 width_vec, x_stride, 596 bld->static_state->pot_width, 597 bld->static_state->wrap_s, 598 &x_offset, &x_subcoord); 599 offset = x_offset; 600 if (dims >= 2) { 601 LLVMValueRef y_offset; 602 lp_build_sample_wrap_nearest_int(bld, 603 bld->format_desc->block.height, 604 t_ipart, t_float, 605 height_vec, row_stride_vec, 606 bld->static_state->pot_height, 607 bld->static_state->wrap_t, 608 &y_offset, &y_subcoord); 609 offset = lp_build_add(&bld->int_coord_bld, offset, y_offset); 610 if (dims >= 3) { 611 LLVMValueRef z_offset; 612 lp_build_sample_wrap_nearest_int(bld, 613 1, /* block length (depth) */ 614 r_ipart, r_float, 615 depth_vec, img_stride_vec, 616 bld->static_state->pot_depth, 617 bld->static_state->wrap_r, 618 &z_offset, &z_subcoord); 619 offset = lp_build_add(&bld->int_coord_bld, offset, z_offset); 620 } 621 else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { 622 LLVMValueRef z_offset; 623 /* The r coord is the cube face in [0,5] */ 624 z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); 625 offset = lp_build_add(&bld->int_coord_bld, offset, z_offset); 626 } 627 } 628 629 lp_build_sample_fetch_image_nearest(bld, data_ptr, offset, 630 x_subcoord, y_subcoord, 631 colors_lo, colors_hi); 632 } 633 634 635 /** 636 * Sample a single texture image with nearest sampling. 637 * If sampling a cube texture, r = cube face in [0,5]. 638 * Return filtered color as two vectors of 16-bit fixed point values. 639 * Does address calcs (except offsets) with floats. 640 * Useful for AVX which has support for 8x32 floats but not 8x32 ints. 641 */ 642 static void 643 lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld, 644 LLVMValueRef int_size, 645 LLVMValueRef row_stride_vec, 646 LLVMValueRef img_stride_vec, 647 LLVMValueRef data_ptr, 648 LLVMValueRef s, 649 LLVMValueRef t, 650 LLVMValueRef r, 651 LLVMValueRef *colors_lo, 652 LLVMValueRef *colors_hi) 653 { 654 const unsigned dims = bld->dims; 655 LLVMValueRef width_vec, height_vec, depth_vec; 656 LLVMValueRef offset; 657 LLVMValueRef x_subcoord, y_subcoord; 658 LLVMValueRef x_icoord = NULL, y_icoord = NULL, z_icoord = NULL; 659 LLVMValueRef flt_size; 660 661 flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size); 662 663 lp_build_extract_image_sizes(bld, 664 bld->float_size_type, 665 bld->coord_type, 666 flt_size, 667 &width_vec, 668 &height_vec, 669 &depth_vec); 670 671 /* Do texcoord wrapping */ 672 lp_build_sample_wrap_nearest_float(bld, 673 s, width_vec, 674 bld->static_state->pot_width, 675 bld->static_state->wrap_s, 676 &x_icoord); 677 678 if (dims >= 2) { 679 lp_build_sample_wrap_nearest_float(bld, 680 t, height_vec, 681 bld->static_state->pot_height, 682 bld->static_state->wrap_t, 683 &y_icoord); 684 685 if (dims >= 3) { 686 lp_build_sample_wrap_nearest_float(bld, 687 r, depth_vec, 688 bld->static_state->pot_depth, 689 bld->static_state->wrap_r, 690 &z_icoord); 691 } 692 else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { 693 z_icoord = r; 694 } 695 } 696 697 /* 698 * From here on we deal with ints, and we should split up the 256bit 699 * vectors manually for better generated code. 700 */ 701 702 /* 703 * compute texel offsets - 704 * cannot do offset calc with floats, difficult for block-based formats, 705 * and not enough precision anyway. 706 */ 707 lp_build_sample_offset(&bld->int_coord_bld, 708 bld->format_desc, 709 x_icoord, y_icoord, 710 z_icoord, 711 row_stride_vec, img_stride_vec, 712 &offset, 713 &x_subcoord, &y_subcoord); 714 715 lp_build_sample_fetch_image_nearest(bld, data_ptr, offset, 716 x_subcoord, y_subcoord, 717 colors_lo, colors_hi); 718 } 719 720 721 /** 722 * Fetch texels for image with linear sampling. 723 * Return filtered color as two vectors of 16-bit fixed point values. 724 */ 725 static void 726 lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld, 727 LLVMValueRef data_ptr, 728 LLVMValueRef offset[2][2][2], 729 LLVMValueRef x_subcoord[2], 730 LLVMValueRef y_subcoord[2], 731 LLVMValueRef s_fpart, 732 LLVMValueRef t_fpart, 733 LLVMValueRef r_fpart, 734 LLVMValueRef *colors_lo, 735 LLVMValueRef *colors_hi) 736 { 737 const unsigned dims = bld->dims; 738 LLVMBuilderRef builder = bld->gallivm->builder; 739 struct lp_build_context h16, u8n; 740 LLVMTypeRef h16_vec_type, u8n_vec_type; 741 LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); 742 LLVMValueRef shuffles_lo[LP_MAX_VECTOR_LENGTH]; 743 LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH]; 744 LLVMValueRef shuffle_lo, shuffle_hi; 745 LLVMValueRef s_fpart_lo, s_fpart_hi; 746 LLVMValueRef t_fpart_lo = NULL, t_fpart_hi = NULL; 747 LLVMValueRef r_fpart_lo = NULL, r_fpart_hi = NULL; 748 LLVMValueRef neighbors_lo[2][2][2]; /* [z][y][x] */ 749 LLVMValueRef neighbors_hi[2][2][2]; /* [z][y][x] */ 750 LLVMValueRef packed_lo, packed_hi; 751 unsigned i, j, k; 752 unsigned numj, numk; 753 754 lp_build_context_init(&h16, bld->gallivm, lp_type_ufixed(16, bld->vector_width)); 755 lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width)); 756 h16_vec_type = lp_build_vec_type(bld->gallivm, h16.type); 757 u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type); 758 759 /* 760 * Transform 4 x i32 in 761 * 762 * s_fpart = {s0, s1, s2, s3} 763 * 764 * into 8 x i16 765 * 766 * s_fpart = {00, s0, 00, s1, 00, s2, 00, s3} 767 * 768 * into two 8 x i16 769 * 770 * s_fpart_lo = {s0, s0, s0, s0, s1, s1, s1, s1} 771 * s_fpart_hi = {s2, s2, s2, s2, s3, s3, s3, s3} 772 * 773 * and likewise for t_fpart. There is no risk of loosing precision here 774 * since the fractional parts only use the lower 8bits. 775 */ 776 s_fpart = LLVMBuildBitCast(builder, s_fpart, h16_vec_type, ""); 777 if (dims >= 2) 778 t_fpart = LLVMBuildBitCast(builder, t_fpart, h16_vec_type, ""); 779 if (dims >= 3) 780 r_fpart = LLVMBuildBitCast(builder, r_fpart, h16_vec_type, ""); 781 782 for (j = 0; j < h16.type.length; j += 4) { 783 #ifdef PIPE_ARCH_LITTLE_ENDIAN 784 unsigned subindex = 0; 785 #else 786 unsigned subindex = 1; 787 #endif 788 LLVMValueRef index; 789 790 index = LLVMConstInt(elem_type, j/2 + subindex, 0); 791 for (i = 0; i < 4; ++i) 792 shuffles_lo[j + i] = index; 793 794 index = LLVMConstInt(elem_type, h16.type.length/2 + j/2 + subindex, 0); 795 for (i = 0; i < 4; ++i) 796 shuffles_hi[j + i] = index; 797 } 798 799 shuffle_lo = LLVMConstVector(shuffles_lo, h16.type.length); 800 shuffle_hi = LLVMConstVector(shuffles_hi, h16.type.length); 801 802 s_fpart_lo = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, 803 shuffle_lo, ""); 804 s_fpart_hi = LLVMBuildShuffleVector(builder, s_fpart, h16.undef, 805 shuffle_hi, ""); 806 if (dims >= 2) { 807 t_fpart_lo = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, 808 shuffle_lo, ""); 809 t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, 810 shuffle_hi, ""); 811 } 812 if (dims >= 3) { 813 r_fpart_lo = LLVMBuildShuffleVector(builder, r_fpart, h16.undef, 814 shuffle_lo, ""); 815 r_fpart_hi = LLVMBuildShuffleVector(builder, r_fpart, h16.undef, 816 shuffle_hi, ""); 817 } 818 819 /* 820 * Fetch the pixels as 4 x 32bit (rgba order might differ): 821 * 822 * rgba0 rgba1 rgba2 rgba3 823 * 824 * bit cast them into 16 x u8 825 * 826 * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3 827 * 828 * unpack them into two 8 x i16: 829 * 830 * r0 g0 b0 a0 r1 g1 b1 a1 831 * r2 g2 b2 a2 r3 g3 b3 a3 832 * 833 * The higher 8 bits of the resulting elements will be zero. 834 */ 835 numj = 1 + (dims >= 2); 836 numk = 1 + (dims >= 3); 837 838 for (k = 0; k < numk; k++) { 839 for (j = 0; j < numj; j++) { 840 for (i = 0; i < 2; i++) { 841 LLVMValueRef rgba8; 842 843 if (util_format_is_rgba8_variant(bld->format_desc)) { 844 /* 845 * Given the format is a rgba8, just read the pixels as is, 846 * without any swizzling. Swizzling will be done later. 847 */ 848 rgba8 = lp_build_gather(bld->gallivm, 849 bld->texel_type.length, 850 bld->format_desc->block.bits, 851 bld->texel_type.width, 852 data_ptr, offset[k][j][i]); 853 854 rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, ""); 855 } 856 else { 857 rgba8 = lp_build_fetch_rgba_aos(bld->gallivm, 858 bld->format_desc, 859 u8n.type, 860 data_ptr, offset[k][j][i], 861 x_subcoord[i], 862 y_subcoord[j]); 863 } 864 865 /* Expand one 4*rgba8 to two 2*rgba16 */ 866 lp_build_unpack2(bld->gallivm, u8n.type, h16.type, 867 rgba8, 868 &neighbors_lo[k][j][i], &neighbors_hi[k][j][i]); 869 } 870 } 871 } 872 873 /* 874 * Linear interpolation with 8.8 fixed point. 875 */ 876 if (bld->static_state->force_nearest_s) { 877 /* special case 1-D lerp */ 878 packed_lo = lp_build_lerp(&h16, 879 t_fpart_lo, 880 neighbors_lo[0][0][0], 881 neighbors_lo[0][0][1]); 882 883 packed_hi = lp_build_lerp(&h16, 884 t_fpart_hi, 885 neighbors_hi[0][1][0], 886 neighbors_hi[0][1][0]); 887 } 888 else if (bld->static_state->force_nearest_t) { 889 /* special case 1-D lerp */ 890 packed_lo = lp_build_lerp(&h16, 891 s_fpart_lo, 892 neighbors_lo[0][0][0], 893 neighbors_lo[0][0][1]); 894 895 packed_hi = lp_build_lerp(&h16, 896 s_fpart_hi, 897 neighbors_hi[0][0][0], 898 neighbors_hi[0][0][1]); 899 } 900 else { 901 /* general 1/2/3-D lerping */ 902 if (dims == 1) { 903 packed_lo = lp_build_lerp(&h16, 904 s_fpart_lo, 905 neighbors_lo[0][0][0], 906 neighbors_lo[0][0][1]); 907 908 packed_hi = lp_build_lerp(&h16, 909 s_fpart_hi, 910 neighbors_hi[0][0][0], 911 neighbors_hi[0][0][1]); 912 } 913 else { 914 /* 2-D lerp */ 915 packed_lo = lp_build_lerp_2d(&h16, 916 s_fpart_lo, t_fpart_lo, 917 neighbors_lo[0][0][0], 918 neighbors_lo[0][0][1], 919 neighbors_lo[0][1][0], 920 neighbors_lo[0][1][1]); 921 922 packed_hi = lp_build_lerp_2d(&h16, 923 s_fpart_hi, t_fpart_hi, 924 neighbors_hi[0][0][0], 925 neighbors_hi[0][0][1], 926 neighbors_hi[0][1][0], 927 neighbors_hi[0][1][1]); 928 929 if (dims >= 3) { 930 LLVMValueRef packed_lo2, packed_hi2; 931 932 /* lerp in the second z slice */ 933 packed_lo2 = lp_build_lerp_2d(&h16, 934 s_fpart_lo, t_fpart_lo, 935 neighbors_lo[1][0][0], 936 neighbors_lo[1][0][1], 937 neighbors_lo[1][1][0], 938 neighbors_lo[1][1][1]); 939 940 packed_hi2 = lp_build_lerp_2d(&h16, 941 s_fpart_hi, t_fpart_hi, 942 neighbors_hi[1][0][0], 943 neighbors_hi[1][0][1], 944 neighbors_hi[1][1][0], 945 neighbors_hi[1][1][1]); 946 /* interp between two z slices */ 947 packed_lo = lp_build_lerp(&h16, r_fpart_lo, 948 packed_lo, packed_lo2); 949 packed_hi = lp_build_lerp(&h16, r_fpart_hi, 950 packed_hi, packed_hi2); 951 } 952 } 953 } 954 955 *colors_lo = packed_lo; 956 *colors_hi = packed_hi; 957 } 958 959 /** 960 * Sample a single texture image with (bi-)(tri-)linear sampling. 961 * Return filtered color as two vectors of 16-bit fixed point values. 962 */ 963 static void 964 lp_build_sample_image_linear(struct lp_build_sample_context *bld, 965 LLVMValueRef int_size, 966 LLVMValueRef row_stride_vec, 967 LLVMValueRef img_stride_vec, 968 LLVMValueRef data_ptr, 969 LLVMValueRef s, 970 LLVMValueRef t, 971 LLVMValueRef r, 972 LLVMValueRef *colors_lo, 973 LLVMValueRef *colors_hi) 974 { 975 const unsigned dims = bld->dims; 976 LLVMBuilderRef builder = bld->gallivm->builder; 977 struct lp_build_context i32; 978 LLVMTypeRef i32_vec_type; 979 LLVMValueRef i32_c8, i32_c128, i32_c255; 980 LLVMValueRef width_vec, height_vec, depth_vec; 981 LLVMValueRef s_ipart, s_fpart, s_float; 982 LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_float = NULL; 983 LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_float = NULL; 984 LLVMValueRef x_stride, y_stride, z_stride; 985 LLVMValueRef x_offset0, x_offset1; 986 LLVMValueRef y_offset0, y_offset1; 987 LLVMValueRef z_offset0, z_offset1; 988 LLVMValueRef offset[2][2][2]; /* [z][y][x] */ 989 LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2]; 990 unsigned x, y, z; 991 992 lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width)); 993 994 i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type); 995 996 lp_build_extract_image_sizes(bld, 997 bld->int_size_type, 998 bld->int_coord_type, 999 int_size, 1000 &width_vec, 1001 &height_vec, 1002 &depth_vec); 1003 1004 s_float = s; t_float = t; r_float = r; 1005 1006 if (bld->static_state->normalized_coords) { 1007 LLVMValueRef scaled_size; 1008 LLVMValueRef flt_size; 1009 1010 /* scale size by 256 (8 fractional bits) */ 1011 scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8); 1012 1013 flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size); 1014 1015 lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); 1016 } 1017 else { 1018 /* scale coords by 256 (8 fractional bits) */ 1019 s = lp_build_mul_imm(&bld->coord_bld, s, 256); 1020 if (dims >= 2) 1021 t = lp_build_mul_imm(&bld->coord_bld, t, 256); 1022 if (dims >= 3) 1023 r = lp_build_mul_imm(&bld->coord_bld, r, 256); 1024 } 1025 1026 /* convert float to int */ 1027 s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); 1028 if (dims >= 2) 1029 t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); 1030 if (dims >= 3) 1031 r = LLVMBuildFPToSI(builder, r, i32_vec_type, ""); 1032 1033 /* subtract 0.5 (add -128) */ 1034 i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128); 1035 if (!bld->static_state->force_nearest_s) { 1036 s = LLVMBuildAdd(builder, s, i32_c128, ""); 1037 } 1038 if (dims >= 2 && !bld->static_state->force_nearest_t) { 1039 t = LLVMBuildAdd(builder, t, i32_c128, ""); 1040 } 1041 if (dims >= 3) { 1042 r = LLVMBuildAdd(builder, r, i32_c128, ""); 1043 } 1044 1045 /* compute floor (shift right 8) */ 1046 i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8); 1047 s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); 1048 if (dims >= 2) 1049 t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); 1050 if (dims >= 3) 1051 r_ipart = LLVMBuildAShr(builder, r, i32_c8, ""); 1052 1053 /* compute fractional part (AND with 0xff) */ 1054 i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255); 1055 s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); 1056 if (dims >= 2) 1057 t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); 1058 if (dims >= 3) 1059 r_fpart = LLVMBuildAnd(builder, r, i32_c255, ""); 1060 1061 /* get pixel, row and image strides */ 1062 x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type, 1063 bld->format_desc->block.bits/8); 1064 y_stride = row_stride_vec; 1065 z_stride = img_stride_vec; 1066 1067 /* do texcoord wrapping and compute texel offsets */ 1068 lp_build_sample_wrap_linear_int(bld, 1069 bld->format_desc->block.width, 1070 s_ipart, &s_fpart, s_float, 1071 width_vec, x_stride, 1072 bld->static_state->pot_width, 1073 bld->static_state->wrap_s, 1074 &x_offset0, &x_offset1, 1075 &x_subcoord[0], &x_subcoord[1]); 1076 for (z = 0; z < 2; z++) { 1077 for (y = 0; y < 2; y++) { 1078 offset[z][y][0] = x_offset0; 1079 offset[z][y][1] = x_offset1; 1080 } 1081 } 1082 1083 if (dims >= 2) { 1084 lp_build_sample_wrap_linear_int(bld, 1085 bld->format_desc->block.height, 1086 t_ipart, &t_fpart, t_float, 1087 height_vec, y_stride, 1088 bld->static_state->pot_height, 1089 bld->static_state->wrap_t, 1090 &y_offset0, &y_offset1, 1091 &y_subcoord[0], &y_subcoord[1]); 1092 1093 for (z = 0; z < 2; z++) { 1094 for (x = 0; x < 2; x++) { 1095 offset[z][0][x] = lp_build_add(&bld->int_coord_bld, 1096 offset[z][0][x], y_offset0); 1097 offset[z][1][x] = lp_build_add(&bld->int_coord_bld, 1098 offset[z][1][x], y_offset1); 1099 } 1100 } 1101 } 1102 1103 if (dims >= 3) { 1104 lp_build_sample_wrap_linear_int(bld, 1105 bld->format_desc->block.height, 1106 r_ipart, &r_fpart, r_float, 1107 depth_vec, z_stride, 1108 bld->static_state->pot_depth, 1109 bld->static_state->wrap_r, 1110 &z_offset0, &z_offset1, 1111 &z_subcoord[0], &z_subcoord[1]); 1112 for (y = 0; y < 2; y++) { 1113 for (x = 0; x < 2; x++) { 1114 offset[0][y][x] = lp_build_add(&bld->int_coord_bld, 1115 offset[0][y][x], z_offset0); 1116 offset[1][y][x] = lp_build_add(&bld->int_coord_bld, 1117 offset[1][y][x], z_offset1); 1118 } 1119 } 1120 } 1121 else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { 1122 LLVMValueRef z_offset; 1123 z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); 1124 for (y = 0; y < 2; y++) { 1125 for (x = 0; x < 2; x++) { 1126 /* The r coord is the cube face in [0,5] */ 1127 offset[0][y][x] = lp_build_add(&bld->int_coord_bld, 1128 offset[0][y][x], z_offset); 1129 } 1130 } 1131 } 1132 1133 lp_build_sample_fetch_image_linear(bld, data_ptr, offset, 1134 x_subcoord, y_subcoord, 1135 s_fpart, t_fpart, r_fpart, 1136 colors_lo, colors_hi); 1137 } 1138 1139 1140 /** 1141 * Sample a single texture image with (bi-)(tri-)linear sampling. 1142 * Return filtered color as two vectors of 16-bit fixed point values. 1143 * Does address calcs (except offsets) with floats. 1144 * Useful for AVX which has support for 8x32 floats but not 8x32 ints. 1145 */ 1146 static void 1147 lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld, 1148 LLVMValueRef int_size, 1149 LLVMValueRef row_stride_vec, 1150 LLVMValueRef img_stride_vec, 1151 LLVMValueRef data_ptr, 1152 LLVMValueRef s, 1153 LLVMValueRef t, 1154 LLVMValueRef r, 1155 LLVMValueRef *colors_lo, 1156 LLVMValueRef *colors_hi) 1157 { 1158 const unsigned dims = bld->dims; 1159 LLVMValueRef width_vec, height_vec, depth_vec; 1160 LLVMValueRef s_fpart; 1161 LLVMValueRef t_fpart = NULL; 1162 LLVMValueRef r_fpart = NULL; 1163 LLVMValueRef x_stride, y_stride, z_stride; 1164 LLVMValueRef x_offset0, x_offset1; 1165 LLVMValueRef y_offset0, y_offset1; 1166 LLVMValueRef z_offset0, z_offset1; 1167 LLVMValueRef offset[2][2][2]; /* [z][y][x] */ 1168 LLVMValueRef x_subcoord[2], y_subcoord[2]; 1169 LLVMValueRef flt_size; 1170 LLVMValueRef x_icoord0, x_icoord1; 1171 LLVMValueRef y_icoord0, y_icoord1; 1172 LLVMValueRef z_icoord0, z_icoord1; 1173 unsigned x, y, z; 1174 1175 flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size); 1176 1177 lp_build_extract_image_sizes(bld, 1178 bld->float_size_type, 1179 bld->coord_type, 1180 flt_size, 1181 &width_vec, 1182 &height_vec, 1183 &depth_vec); 1184 1185 /* do texcoord wrapping and compute texel offsets */ 1186 lp_build_sample_wrap_linear_float(bld, 1187 bld->format_desc->block.width, 1188 s, width_vec, 1189 bld->static_state->pot_width, 1190 bld->static_state->wrap_s, 1191 &x_icoord0, &x_icoord1, 1192 &s_fpart, 1193 bld->static_state->force_nearest_s); 1194 1195 if (dims >= 2) { 1196 lp_build_sample_wrap_linear_float(bld, 1197 bld->format_desc->block.height, 1198 t, height_vec, 1199 bld->static_state->pot_height, 1200 bld->static_state->wrap_t, 1201 &y_icoord0, &y_icoord1, 1202 &t_fpart, 1203 bld->static_state->force_nearest_t); 1204 1205 if (dims >= 3) { 1206 lp_build_sample_wrap_linear_float(bld, 1207 bld->format_desc->block.height, 1208 r, depth_vec, 1209 bld->static_state->pot_depth, 1210 bld->static_state->wrap_r, 1211 &z_icoord0, &z_icoord1, 1212 &r_fpart, 0); 1213 } 1214 } 1215 1216 /* 1217 * From here on we deal with ints, and we should split up the 256bit 1218 * vectors manually for better generated code. 1219 */ 1220 1221 /* get pixel, row and image strides */ 1222 x_stride = lp_build_const_vec(bld->gallivm, 1223 bld->int_coord_bld.type, 1224 bld->format_desc->block.bits/8); 1225 y_stride = row_stride_vec; 1226 z_stride = img_stride_vec; 1227 1228 /* 1229 * compute texel offset - 1230 * cannot do offset calc with floats, difficult for block-based formats, 1231 * and not enough precision anyway. 1232 */ 1233 lp_build_sample_partial_offset(&bld->int_coord_bld, 1234 bld->format_desc->block.width, 1235 x_icoord0, x_stride, 1236 &x_offset0, &x_subcoord[0]); 1237 lp_build_sample_partial_offset(&bld->int_coord_bld, 1238 bld->format_desc->block.width, 1239 x_icoord1, x_stride, 1240 &x_offset1, &x_subcoord[1]); 1241 for (z = 0; z < 2; z++) { 1242 for (y = 0; y < 2; y++) { 1243 offset[z][y][0] = x_offset0; 1244 offset[z][y][1] = x_offset1; 1245 } 1246 } 1247 1248 if (dims >= 2) { 1249 lp_build_sample_partial_offset(&bld->int_coord_bld, 1250 bld->format_desc->block.height, 1251 y_icoord0, y_stride, 1252 &y_offset0, &y_subcoord[0]); 1253 lp_build_sample_partial_offset(&bld->int_coord_bld, 1254 bld->format_desc->block.height, 1255 y_icoord1, y_stride, 1256 &y_offset1, &y_subcoord[1]); 1257 for (z = 0; z < 2; z++) { 1258 for (x = 0; x < 2; x++) { 1259 offset[z][0][x] = lp_build_add(&bld->int_coord_bld, 1260 offset[z][0][x], y_offset0); 1261 offset[z][1][x] = lp_build_add(&bld->int_coord_bld, 1262 offset[z][1][x], y_offset1); 1263 } 1264 } 1265 } 1266 1267 if (dims >= 3) { 1268 LLVMValueRef z_subcoord[2]; 1269 lp_build_sample_partial_offset(&bld->int_coord_bld, 1270 1, 1271 z_icoord0, z_stride, 1272 &z_offset0, &z_subcoord[0]); 1273 lp_build_sample_partial_offset(&bld->int_coord_bld, 1274 1, 1275 z_icoord1, z_stride, 1276 &z_offset1, &z_subcoord[1]); 1277 for (y = 0; y < 2; y++) { 1278 for (x = 0; x < 2; x++) { 1279 offset[0][y][x] = lp_build_add(&bld->int_coord_bld, 1280 offset[0][y][x], z_offset0); 1281 offset[1][y][x] = lp_build_add(&bld->int_coord_bld, 1282 offset[1][y][x], z_offset1); 1283 } 1284 } 1285 } 1286 else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { 1287 LLVMValueRef z_offset; 1288 z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); 1289 for (y = 0; y < 2; y++) { 1290 for (x = 0; x < 2; x++) { 1291 /* The r coord is the cube face in [0,5] */ 1292 offset[0][y][x] = lp_build_add(&bld->int_coord_bld, 1293 offset[0][y][x], z_offset); 1294 } 1295 } 1296 } 1297 1298 lp_build_sample_fetch_image_linear(bld, data_ptr, offset, 1299 x_subcoord, y_subcoord, 1300 s_fpart, t_fpart, r_fpart, 1301 colors_lo, colors_hi); 1302 } 1303 1304 1305 /** 1306 * Sample the texture/mipmap using given image filter and mip filter. 1307 * data0_ptr and data1_ptr point to the two mipmap levels to sample 1308 * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes. 1309 * If we're using nearest miplevel sampling the '1' values will be null/unused. 1310 */ 1311 static void 1312 lp_build_sample_mipmap(struct lp_build_sample_context *bld, 1313 unsigned img_filter, 1314 unsigned mip_filter, 1315 LLVMValueRef s, 1316 LLVMValueRef t, 1317 LLVMValueRef r, 1318 LLVMValueRef ilevel0, 1319 LLVMValueRef ilevel1, 1320 LLVMValueRef lod_fpart, 1321 LLVMValueRef colors_lo_var, 1322 LLVMValueRef colors_hi_var) 1323 { 1324 LLVMBuilderRef builder = bld->gallivm->builder; 1325 LLVMValueRef size0; 1326 LLVMValueRef size1; 1327 LLVMValueRef row_stride0_vec = NULL; 1328 LLVMValueRef row_stride1_vec = NULL; 1329 LLVMValueRef img_stride0_vec = NULL; 1330 LLVMValueRef img_stride1_vec = NULL; 1331 LLVMValueRef data_ptr0; 1332 LLVMValueRef data_ptr1; 1333 LLVMValueRef colors0_lo, colors0_hi; 1334 LLVMValueRef colors1_lo, colors1_hi; 1335 1336 /* sample the first mipmap level */ 1337 lp_build_mipmap_level_sizes(bld, ilevel0, 1338 &size0, 1339 &row_stride0_vec, &img_stride0_vec); 1340 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); 1341 if (util_cpu_caps.has_avx && bld->coord_type.length > 4) { 1342 if (img_filter == PIPE_TEX_FILTER_NEAREST) { 1343 lp_build_sample_image_nearest_afloat(bld, 1344 size0, 1345 row_stride0_vec, img_stride0_vec, 1346 data_ptr0, s, t, r, 1347 &colors0_lo, &colors0_hi); 1348 } 1349 else { 1350 assert(img_filter == PIPE_TEX_FILTER_LINEAR); 1351 lp_build_sample_image_linear_afloat(bld, 1352 size0, 1353 row_stride0_vec, img_stride0_vec, 1354 data_ptr0, s, t, r, 1355 &colors0_lo, &colors0_hi); 1356 } 1357 } 1358 else { 1359 if (img_filter == PIPE_TEX_FILTER_NEAREST) { 1360 lp_build_sample_image_nearest(bld, 1361 size0, 1362 row_stride0_vec, img_stride0_vec, 1363 data_ptr0, s, t, r, 1364 &colors0_lo, &colors0_hi); 1365 } 1366 else { 1367 assert(img_filter == PIPE_TEX_FILTER_LINEAR); 1368 lp_build_sample_image_linear(bld, 1369 size0, 1370 row_stride0_vec, img_stride0_vec, 1371 data_ptr0, s, t, r, 1372 &colors0_lo, &colors0_hi); 1373 } 1374 } 1375 1376 /* Store the first level's colors in the output variables */ 1377 LLVMBuildStore(builder, colors0_lo, colors_lo_var); 1378 LLVMBuildStore(builder, colors0_hi, colors_hi_var); 1379 1380 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { 1381 LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm, 1382 bld->perquadf_bld.type, 256.0); 1383 LLVMTypeRef i32vec_type = lp_build_vec_type(bld->gallivm, bld->perquadi_bld.type); 1384 struct lp_build_if_state if_ctx; 1385 LLVMValueRef need_lerp; 1386 unsigned num_quads = bld->coord_bld.type.length / 4; 1387 unsigned i; 1388 1389 lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16vec_scale, ""); 1390 lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16"); 1391 1392 /* need_lerp = lod_fpart > 0 */ 1393 if (num_quads == 1) { 1394 need_lerp = LLVMBuildICmp(builder, LLVMIntSGT, 1395 lod_fpart, bld->perquadi_bld.zero, 1396 "need_lerp"); 1397 } 1398 else { 1399 /* 1400 * We'll do mip filtering if any of the quads need it. 1401 * It might be better to split the vectors here and only fetch/filter 1402 * quads which need it. 1403 */ 1404 /* 1405 * We need to clamp lod_fpart here since we can get negative 1406 * values which would screw up filtering if not all 1407 * lod_fpart values have same sign. 1408 * We can however then skip the greater than comparison. 1409 */ 1410 lod_fpart = lp_build_max(&bld->perquadi_bld, lod_fpart, 1411 bld->perquadi_bld.zero); 1412 need_lerp = lp_build_any_true_range(&bld->perquadi_bld, num_quads, lod_fpart); 1413 } 1414 1415 lp_build_if(&if_ctx, bld->gallivm, need_lerp); 1416 { 1417 struct lp_build_context h16_bld; 1418 1419 lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16, bld->vector_width)); 1420 1421 /* sample the second mipmap level */ 1422 lp_build_mipmap_level_sizes(bld, ilevel1, 1423 &size1, 1424 &row_stride1_vec, &img_stride1_vec); 1425 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); 1426 1427 if (util_cpu_caps.has_avx && bld->coord_type.length > 4) { 1428 if (img_filter == PIPE_TEX_FILTER_NEAREST) { 1429 lp_build_sample_image_nearest_afloat(bld, 1430 size1, 1431 row_stride1_vec, img_stride1_vec, 1432 data_ptr1, s, t, r, 1433 &colors1_lo, &colors1_hi); 1434 } 1435 else { 1436 lp_build_sample_image_linear_afloat(bld, 1437 size1, 1438 row_stride1_vec, img_stride1_vec, 1439 data_ptr1, s, t, r, 1440 &colors1_lo, &colors1_hi); 1441 } 1442 } 1443 else { 1444 if (img_filter == PIPE_TEX_FILTER_NEAREST) { 1445 lp_build_sample_image_nearest(bld, 1446 size1, 1447 row_stride1_vec, img_stride1_vec, 1448 data_ptr1, s, t, r, 1449 &colors1_lo, &colors1_hi); 1450 } 1451 else { 1452 lp_build_sample_image_linear(bld, 1453 size1, 1454 row_stride1_vec, img_stride1_vec, 1455 data_ptr1, s, t, r, 1456 &colors1_lo, &colors1_hi); 1457 } 1458 } 1459 1460 /* interpolate samples from the two mipmap levels */ 1461 1462 if (num_quads == 1) { 1463 lod_fpart = LLVMBuildTrunc(builder, lod_fpart, h16_bld.elem_type, ""); 1464 lod_fpart = lp_build_broadcast_scalar(&h16_bld, lod_fpart); 1465 1466 #if HAVE_LLVM == 0x208 1467 /* This is a work-around for a bug in LLVM 2.8. 1468 * Evidently, something goes wrong in the construction of the 1469 * lod_fpart short[8] vector. Adding this no-effect shuffle seems 1470 * to force the vector to be properly constructed. 1471 * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f). 1472 */ 1473 { 1474 LLVMValueRef shuffles[8], shuffle; 1475 assert(h16_bld.type.length <= Elements(shuffles)); 1476 for (i = 0; i < h16_bld.type.length; i++) 1477 shuffles[i] = lp_build_const_int32(bld->gallivm, 2 * (i & 1)); 1478 shuffle = LLVMConstVector(shuffles, h16_bld.type.length); 1479 lod_fpart = LLVMBuildShuffleVector(builder, 1480 lod_fpart, lod_fpart, 1481 shuffle, ""); 1482 } 1483 #endif 1484 1485 colors0_lo = lp_build_lerp(&h16_bld, lod_fpart, 1486 colors0_lo, colors1_lo); 1487 colors0_hi = lp_build_lerp(&h16_bld, lod_fpart, 1488 colors0_hi, colors1_hi); 1489 } 1490 else { 1491 LLVMValueRef lod_parts[LP_MAX_VECTOR_LENGTH/16]; 1492 struct lp_type perquadi16_type = bld->perquadi_bld.type; 1493 perquadi16_type.width /= 2; 1494 perquadi16_type.length *= 2; 1495 lod_fpart = LLVMBuildBitCast(builder, lod_fpart, 1496 lp_build_vec_type(bld->gallivm, 1497 perquadi16_type), ""); 1498 /* XXX this only works for exactly 2 quads. More quads need shuffle */ 1499 assert(num_quads == 2); 1500 for (i = 0; i < num_quads; i++) { 1501 LLVMValueRef indexi2 = lp_build_const_int32(bld->gallivm, i*2); 1502 lod_parts[i] = lp_build_extract_broadcast(bld->gallivm, 1503 perquadi16_type, 1504 h16_bld.type, 1505 lod_fpart, 1506 indexi2); 1507 } 1508 colors0_lo = lp_build_lerp(&h16_bld, lod_parts[0], 1509 colors0_lo, colors1_lo); 1510 colors0_hi = lp_build_lerp(&h16_bld, lod_parts[1], 1511 colors0_hi, colors1_hi); 1512 } 1513 1514 LLVMBuildStore(builder, colors0_lo, colors_lo_var); 1515 LLVMBuildStore(builder, colors0_hi, colors_hi_var); 1516 } 1517 lp_build_endif(&if_ctx); 1518 } 1519 } 1520 1521 1522 1523 /** 1524 * Texture sampling in AoS format. Used when sampling common 32-bit/texel 1525 * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes 1526 * but only limited texture coord wrap modes. 1527 */ 1528 void 1529 lp_build_sample_aos(struct lp_build_sample_context *bld, 1530 unsigned unit, 1531 LLVMValueRef s, 1532 LLVMValueRef t, 1533 LLVMValueRef r, 1534 LLVMValueRef lod_ipart, 1535 LLVMValueRef lod_fpart, 1536 LLVMValueRef ilevel0, 1537 LLVMValueRef ilevel1, 1538 LLVMValueRef texel_out[4]) 1539 { 1540 struct lp_build_context *int_bld = &bld->int_bld; 1541 LLVMBuilderRef builder = bld->gallivm->builder; 1542 const unsigned mip_filter = bld->static_state->min_mip_filter; 1543 const unsigned min_filter = bld->static_state->min_img_filter; 1544 const unsigned mag_filter = bld->static_state->mag_img_filter; 1545 const unsigned dims = bld->dims; 1546 LLVMValueRef packed, packed_lo, packed_hi; 1547 LLVMValueRef unswizzled[4]; 1548 struct lp_build_context h16_bld; 1549 1550 /* we only support the common/simple wrap modes at this time */ 1551 assert(lp_is_simple_wrap_mode(bld->static_state->wrap_s)); 1552 if (dims >= 2) 1553 assert(lp_is_simple_wrap_mode(bld->static_state->wrap_t)); 1554 if (dims >= 3) 1555 assert(lp_is_simple_wrap_mode(bld->static_state->wrap_r)); 1556 1557 1558 /* make 16-bit fixed-pt builder context */ 1559 lp_build_context_init(&h16_bld, bld->gallivm, lp_type_ufixed(16, bld->vector_width)); 1560 1561 /* 1562 * Get/interpolate texture colors. 1563 */ 1564 1565 packed_lo = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_lo"); 1566 packed_hi = lp_build_alloca(bld->gallivm, h16_bld.vec_type, "packed_hi"); 1567 1568 if (min_filter == mag_filter) { 1569 /* no need to distinguish between minification and magnification */ 1570 lp_build_sample_mipmap(bld, 1571 min_filter, mip_filter, 1572 s, t, r, 1573 ilevel0, ilevel1, lod_fpart, 1574 packed_lo, packed_hi); 1575 } 1576 else { 1577 /* Emit conditional to choose min image filter or mag image filter 1578 * depending on the lod being > 0 or <= 0, respectively. 1579 */ 1580 struct lp_build_if_state if_ctx; 1581 LLVMValueRef minify; 1582 1583 /* minify = lod >= 0.0 */ 1584 minify = LLVMBuildICmp(builder, LLVMIntSGE, 1585 lod_ipart, int_bld->zero, ""); 1586 1587 lp_build_if(&if_ctx, bld->gallivm, minify); 1588 { 1589 /* Use the minification filter */ 1590 lp_build_sample_mipmap(bld, 1591 min_filter, mip_filter, 1592 s, t, r, 1593 ilevel0, ilevel1, lod_fpart, 1594 packed_lo, packed_hi); 1595 } 1596 lp_build_else(&if_ctx); 1597 { 1598 /* Use the magnification filter */ 1599 lp_build_sample_mipmap(bld, 1600 mag_filter, PIPE_TEX_MIPFILTER_NONE, 1601 s, t, r, 1602 ilevel0, NULL, NULL, 1603 packed_lo, packed_hi); 1604 } 1605 lp_build_endif(&if_ctx); 1606 } 1607 1608 /* 1609 * combine the values stored in 'packed_lo' and 'packed_hi' variables 1610 * into 'packed' 1611 */ 1612 packed = lp_build_pack2(bld->gallivm, 1613 h16_bld.type, lp_type_unorm(8, bld->vector_width), 1614 LLVMBuildLoad(builder, packed_lo, ""), 1615 LLVMBuildLoad(builder, packed_hi, "")); 1616 1617 /* 1618 * Convert to SoA and swizzle. 1619 */ 1620 lp_build_rgba8_to_f32_soa(bld->gallivm, 1621 bld->texel_type, 1622 packed, unswizzled); 1623 1624 if (util_format_is_rgba8_variant(bld->format_desc)) { 1625 lp_build_format_swizzle_soa(bld->format_desc, 1626 &bld->texel_bld, 1627 unswizzled, texel_out); 1628 } 1629 else { 1630 texel_out[0] = unswizzled[0]; 1631 texel_out[1] = unswizzled[1]; 1632 texel_out[2] = unswizzled[2]; 1633 texel_out[3] = unswizzled[3]; 1634 } 1635 } 1636