1 /************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 /** 29 * @file 30 * Texture sampling -- SoA. 31 * 32 * @author Jose Fonseca <jfonseca (at) vmware.com> 33 * @author Brian Paul <brianp (at) vmware.com> 34 */ 35 36 #include "pipe/p_defines.h" 37 #include "pipe/p_state.h" 38 #include "pipe/p_shader_tokens.h" 39 #include "util/u_debug.h" 40 #include "util/u_dump.h" 41 #include "util/u_memory.h" 42 #include "util/u_math.h" 43 #include "util/u_format.h" 44 #include "util/u_cpu_detect.h" 45 #include "util/format_rgb9e5.h" 46 #include "lp_bld_debug.h" 47 #include "lp_bld_type.h" 48 #include "lp_bld_const.h" 49 #include "lp_bld_conv.h" 50 #include "lp_bld_arit.h" 51 #include "lp_bld_bitarit.h" 52 #include "lp_bld_logic.h" 53 #include "lp_bld_printf.h" 54 #include "lp_bld_swizzle.h" 55 #include "lp_bld_flow.h" 56 #include "lp_bld_gather.h" 57 #include "lp_bld_format.h" 58 #include "lp_bld_sample.h" 59 #include "lp_bld_sample_aos.h" 60 #include "lp_bld_struct.h" 61 #include "lp_bld_quad.h" 62 #include "lp_bld_pack.h" 63 #include "lp_bld_intr.h" 64 65 66 /** 67 * Generate code to fetch a texel from a texture at int coords (x, y, z). 68 * The computation depends on whether the texture is 1D, 2D or 3D. 69 * The result, texel, will be float vectors: 70 * texel[0] = red values 71 * texel[1] = green values 72 * texel[2] = blue values 73 * texel[3] = alpha values 74 */ 75 static void 76 lp_build_sample_texel_soa(struct lp_build_sample_context *bld, 77 LLVMValueRef width, 78 LLVMValueRef height, 79 LLVMValueRef depth, 80 LLVMValueRef x, 81 LLVMValueRef y, 82 LLVMValueRef z, 83 LLVMValueRef y_stride, 84 LLVMValueRef z_stride, 85 LLVMValueRef data_ptr, 86 LLVMValueRef mipoffsets, 87 LLVMValueRef texel_out[4]) 88 { 89 const struct lp_static_sampler_state *static_state = bld->static_sampler_state; 90 const unsigned dims = bld->dims; 91 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 92 LLVMBuilderRef builder = bld->gallivm->builder; 93 LLVMValueRef offset; 94 LLVMValueRef i, j; 95 LLVMValueRef use_border = NULL; 96 97 /* use_border = x < 0 || x >= width || y < 0 || y >= height */ 98 if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s, 99 static_state->min_img_filter, 100 static_state->mag_img_filter)) { 101 LLVMValueRef b1, b2; 102 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero); 103 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width); 104 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2"); 105 } 106 107 if (dims >= 2 && 108 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t, 109 static_state->min_img_filter, 110 static_state->mag_img_filter)) { 111 LLVMValueRef b1, b2; 112 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero); 113 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height); 114 if (use_border) { 115 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1"); 116 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2"); 117 } 118 else { 119 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2"); 120 } 121 } 122 123 if (dims == 3 && 124 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r, 125 static_state->min_img_filter, 126 static_state->mag_img_filter)) { 127 LLVMValueRef b1, b2; 128 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero); 129 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth); 130 if (use_border) { 131 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1"); 132 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2"); 133 } 134 else { 135 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2"); 136 } 137 } 138 139 /* convert x,y,z coords to linear offset from start of texture, in bytes */ 140 lp_build_sample_offset(&bld->int_coord_bld, 141 bld->format_desc, 142 x, y, z, y_stride, z_stride, 143 &offset, &i, &j); 144 if (mipoffsets) { 145 offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets); 146 } 147 148 if (use_border) { 149 /* If we can sample the border color, it means that texcoords may 150 * lie outside the bounds of the texture image. We need to do 151 * something to prevent reading out of bounds and causing a segfault. 152 * 153 * Simply AND the texture coords with !use_border. This will cause 154 * coords which are out of bounds to become zero. Zero's guaranteed 155 * to be inside the texture image. 156 */ 157 offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border); 158 } 159 160 lp_build_fetch_rgba_soa(bld->gallivm, 161 bld->format_desc, 162 bld->texel_type, TRUE, 163 data_ptr, offset, 164 i, j, 165 bld->cache, 166 texel_out); 167 168 /* 169 * Note: if we find an app which frequently samples the texture border 170 * we might want to implement a true conditional here to avoid sampling 171 * the texture whenever possible (since that's quite a bit of code). 172 * Ex: 173 * if (use_border) { 174 * texel = border_color; 175 * } 176 * else { 177 * texel = sample_texture(coord); 178 * } 179 * As it is now, we always sample the texture, then selectively replace 180 * the texel color results with the border color. 181 */ 182 183 if (use_border) { 184 /* select texel color or border color depending on use_border. */ 185 const struct util_format_description *format_desc = bld->format_desc; 186 int chan; 187 struct lp_type border_type = bld->texel_type; 188 border_type.length = 4; 189 /* 190 * Only replace channels which are actually present. The others should 191 * get optimized away eventually by sampler_view swizzle anyway but it's 192 * easier too. 193 */ 194 for (chan = 0; chan < 4; chan++) { 195 unsigned chan_s; 196 /* reverse-map channel... */ 197 for (chan_s = 0; chan_s < 4; chan_s++) { 198 if (chan_s == format_desc->swizzle[chan]) { 199 break; 200 } 201 } 202 if (chan_s <= 3) { 203 /* use the already clamped color */ 204 LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan); 205 LLVMValueRef border_chan; 206 207 border_chan = lp_build_extract_broadcast(bld->gallivm, 208 border_type, 209 bld->texel_type, 210 bld->border_color_clamped, 211 idx); 212 texel_out[chan] = lp_build_select(&bld->texel_bld, use_border, 213 border_chan, texel_out[chan]); 214 } 215 } 216 } 217 } 218 219 220 /** 221 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR_REPEAT mode. 222 * (Note that with pot sizes could do this much more easily post-scale 223 * with some bit arithmetic.) 224 */ 225 static LLVMValueRef 226 lp_build_coord_mirror(struct lp_build_sample_context *bld, 227 LLVMValueRef coord, boolean posOnly) 228 { 229 struct lp_build_context *coord_bld = &bld->coord_bld; 230 LLVMValueRef fract; 231 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5); 232 233 /* 234 * We can just use 2*(x - round(0.5*x)) to do all the mirroring, 235 * it all works out. (The result is in range [-1, 1.0], negative if 236 * the coord is in the "odd" section, otherwise positive.) 237 */ 238 239 coord = lp_build_mul(coord_bld, coord, half); 240 fract = lp_build_round(coord_bld, coord); 241 fract = lp_build_sub(coord_bld, coord, fract); 242 coord = lp_build_add(coord_bld, fract, fract); 243 244 if (posOnly) { 245 /* 246 * Theoretically it's not quite 100% accurate because the spec says 247 * that ultimately a scaled coord of -x.0 should map to int coord 248 * -x + 1 with mirroring, not -x (this does not matter for bilinear 249 * filtering). 250 */ 251 coord = lp_build_abs(coord_bld, coord); 252 /* kill off NaNs */ 253 /* XXX: not safe without arch rounding, fract can be anything. */ 254 coord = lp_build_max_ext(coord_bld, coord, coord_bld->zero, 255 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); 256 } 257 258 return coord; 259 } 260 261 262 /** 263 * Helper to compute the first coord and the weight for 264 * linear wrap repeat npot textures 265 */ 266 void 267 lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld, 268 LLVMValueRef coord_f, 269 LLVMValueRef length_i, 270 LLVMValueRef length_f, 271 LLVMValueRef *coord0_i, 272 LLVMValueRef *weight_f) 273 { 274 struct lp_build_context *coord_bld = &bld->coord_bld; 275 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 276 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5); 277 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i, 278 int_coord_bld->one); 279 LLVMValueRef mask; 280 /* wrap with normalized floats is just fract */ 281 coord_f = lp_build_fract(coord_bld, coord_f); 282 /* mul by size and subtract 0.5 */ 283 coord_f = lp_build_mul(coord_bld, coord_f, length_f); 284 coord_f = lp_build_sub(coord_bld, coord_f, half); 285 /* 286 * we avoided the 0.5/length division before the repeat wrap, 287 * now need to fix up edge cases with selects 288 */ 289 /* 290 * Note we do a float (unordered) compare so we can eliminate NaNs. 291 * (Otherwise would need fract_safe above). 292 */ 293 mask = lp_build_compare(coord_bld->gallivm, coord_bld->type, 294 PIPE_FUNC_LESS, coord_f, coord_bld->zero); 295 296 /* convert to int, compute lerp weight */ 297 lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f); 298 *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i); 299 } 300 301 302 /** 303 * Build LLVM code for texture wrap mode for linear filtering. 304 * \param x0_out returns first integer texcoord 305 * \param x1_out returns second integer texcoord 306 * \param weight_out returns linear interpolation weight 307 */ 308 static void 309 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, 310 boolean is_gather, 311 LLVMValueRef coord, 312 LLVMValueRef length, 313 LLVMValueRef length_f, 314 LLVMValueRef offset, 315 boolean is_pot, 316 unsigned wrap_mode, 317 LLVMValueRef *x0_out, 318 LLVMValueRef *x1_out, 319 LLVMValueRef *weight_out) 320 { 321 struct lp_build_context *coord_bld = &bld->coord_bld; 322 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 323 LLVMBuilderRef builder = bld->gallivm->builder; 324 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5); 325 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); 326 LLVMValueRef coord0, coord1, weight; 327 328 switch(wrap_mode) { 329 case PIPE_TEX_WRAP_REPEAT: 330 if (is_pot) { 331 /* mul by size and subtract 0.5 */ 332 coord = lp_build_mul(coord_bld, coord, length_f); 333 coord = lp_build_sub(coord_bld, coord, half); 334 if (offset) { 335 offset = lp_build_int_to_float(coord_bld, offset); 336 coord = lp_build_add(coord_bld, coord, offset); 337 } 338 /* convert to int, compute lerp weight */ 339 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 340 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 341 /* repeat wrap */ 342 coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, ""); 343 coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, ""); 344 } 345 else { 346 LLVMValueRef mask; 347 if (offset) { 348 offset = lp_build_int_to_float(coord_bld, offset); 349 offset = lp_build_div(coord_bld, offset, length_f); 350 coord = lp_build_add(coord_bld, coord, offset); 351 } 352 lp_build_coord_repeat_npot_linear(bld, coord, 353 length, length_f, 354 &coord0, &weight); 355 mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, 356 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); 357 coord1 = LLVMBuildAnd(builder, 358 lp_build_add(int_coord_bld, coord0, int_coord_bld->one), 359 mask, ""); 360 } 361 break; 362 363 case PIPE_TEX_WRAP_CLAMP: 364 if (bld->static_sampler_state->normalized_coords) { 365 /* scale coord to length */ 366 coord = lp_build_mul(coord_bld, coord, length_f); 367 } 368 if (offset) { 369 offset = lp_build_int_to_float(coord_bld, offset); 370 coord = lp_build_add(coord_bld, coord, offset); 371 } 372 373 /* 374 * clamp to [0, length] 375 * 376 * Unlike some other wrap modes, this should be correct for gather 377 * too. GL_CLAMP explicitly does this clamp on the coord prior to 378 * actual wrapping (which is per sample). 379 */ 380 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f); 381 382 coord = lp_build_sub(coord_bld, coord, half); 383 384 /* convert to int, compute lerp weight */ 385 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 386 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 387 break; 388 389 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 390 { 391 struct lp_build_context abs_coord_bld = bld->coord_bld; 392 abs_coord_bld.type.sign = FALSE; 393 394 if (bld->static_sampler_state->normalized_coords) { 395 /* mul by tex size */ 396 coord = lp_build_mul(coord_bld, coord, length_f); 397 } 398 if (offset) { 399 offset = lp_build_int_to_float(coord_bld, offset); 400 coord = lp_build_add(coord_bld, coord, offset); 401 } 402 403 /* clamp to length max */ 404 coord = lp_build_min_ext(coord_bld, coord, length_f, 405 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); 406 if (!is_gather) { 407 /* subtract 0.5 */ 408 coord = lp_build_sub(coord_bld, coord, half); 409 /* clamp to [0, length - 0.5] */ 410 coord = lp_build_max(coord_bld, coord, coord_bld->zero); 411 /* convert to int, compute lerp weight */ 412 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight); 413 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 414 } else { 415 /* 416 * The non-gather path will end up with coords 0, 1 if coord was 417 * smaller than 0.5 (with corresponding weight 0.0 so it doesn't 418 * really matter what the second coord is). But for gather, we 419 * really need to end up with coords 0, 0. 420 */ 421 coord = lp_build_max(coord_bld, coord, coord_bld->zero); 422 coord0 = lp_build_sub(coord_bld, coord, half); 423 coord1 = lp_build_add(coord_bld, coord, half); 424 /* Values range ([-0.5, length_f - 0.5], [0.5, length_f + 0.5] */ 425 coord0 = lp_build_itrunc(coord_bld, coord0); 426 coord1 = lp_build_itrunc(coord_bld, coord1); 427 weight = coord_bld->undef; 428 } 429 /* coord1 = min(coord1, length-1) */ 430 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); 431 break; 432 } 433 434 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 435 if (bld->static_sampler_state->normalized_coords) { 436 /* scale coord to length */ 437 coord = lp_build_mul(coord_bld, coord, length_f); 438 } 439 if (offset) { 440 offset = lp_build_int_to_float(coord_bld, offset); 441 coord = lp_build_add(coord_bld, coord, offset); 442 } 443 /* 444 * We don't need any clamp. Technically, for very large (pos or neg) 445 * (or infinite) values, clamp against [-length, length] would be 446 * correct, but we don't need to guarantee any specific 447 * result for such coords (the ifloor will be undefined, but for modes 448 * requiring border all resulting coords are safe). 449 */ 450 coord = lp_build_sub(coord_bld, coord, half); 451 /* convert to int, compute lerp weight */ 452 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 453 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 454 break; 455 456 case PIPE_TEX_WRAP_MIRROR_REPEAT: 457 if (offset) { 458 offset = lp_build_int_to_float(coord_bld, offset); 459 offset = lp_build_div(coord_bld, offset, length_f); 460 coord = lp_build_add(coord_bld, coord, offset); 461 } 462 if (!is_gather) { 463 /* compute mirror function */ 464 coord = lp_build_coord_mirror(bld, coord, TRUE); 465 466 /* scale coord to length */ 467 coord = lp_build_mul(coord_bld, coord, length_f); 468 coord = lp_build_sub(coord_bld, coord, half); 469 470 /* convert to int, compute lerp weight */ 471 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 472 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 473 474 /* coord0 = max(coord0, 0) */ 475 coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero); 476 /* coord1 = min(coord1, length-1) */ 477 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); 478 } else { 479 /* 480 * This is pretty reasonable in the end, all what the tests care 481 * about is nasty edge cases (scaled coords x.5, so the individual 482 * coords are actually integers, which is REALLY tricky to get right 483 * due to this working differently both for negative numbers as well 484 * as for even/odd cases). But with enough magic it's not too complex 485 * after all. 486 * Maybe should try a bit arithmetic one though for POT textures... 487 */ 488 LLVMValueRef isNeg; 489 /* 490 * Wrapping just once still works, even though it means we can 491 * get "wrong" sign due to performing mirror in the middle of the 492 * two coords (because this can only happen very near the odd/even 493 * edges, so both coords will actually end up as 0 or length - 1 494 * in the end). 495 * For GL4 gather with per-sample offsets we'd need to the mirroring 496 * per coord too. 497 */ 498 coord = lp_build_coord_mirror(bld, coord, FALSE); 499 coord = lp_build_mul(coord_bld, coord, length_f); 500 501 /* 502 * NaNs should be safe here, we'll do away with them with 503 * the ones' complement plus min. 504 */ 505 coord0 = lp_build_sub(coord_bld, coord, half); 506 coord0 = lp_build_ifloor(coord_bld, coord0); 507 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 508 /* ones complement for neg numbers (mirror(negX) = X - 1) */ 509 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, 510 coord0, int_coord_bld->zero); 511 coord0 = lp_build_xor(int_coord_bld, coord0, isNeg); 512 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, 513 coord1, int_coord_bld->zero); 514 coord1 = lp_build_xor(int_coord_bld, coord1, isNeg); 515 coord0 = lp_build_min(int_coord_bld, coord0, length_minus_one); 516 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); 517 518 weight = coord_bld->undef; 519 } 520 break; 521 522 case PIPE_TEX_WRAP_MIRROR_CLAMP: 523 if (bld->static_sampler_state->normalized_coords) { 524 /* scale coord to length */ 525 coord = lp_build_mul(coord_bld, coord, length_f); 526 } 527 if (offset) { 528 offset = lp_build_int_to_float(coord_bld, offset); 529 coord = lp_build_add(coord_bld, coord, offset); 530 } 531 /* 532 * XXX: probably not correct for gather, albeit I'm not 533 * entirely sure as it's poorly specified. The wrapping looks 534 * correct according to the spec which is against gl 1.2.1, 535 * however negative values will be swapped - gl re-specified 536 * wrapping with newer versions (no more pre-clamp except with 537 * GL_CLAMP). 538 */ 539 coord = lp_build_abs(coord_bld, coord); 540 541 /* clamp to [0, length] */ 542 coord = lp_build_min_ext(coord_bld, coord, length_f, 543 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); 544 545 coord = lp_build_sub(coord_bld, coord, half); 546 547 /* convert to int, compute lerp weight */ 548 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 549 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 550 break; 551 552 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 553 { 554 struct lp_build_context abs_coord_bld = bld->coord_bld; 555 abs_coord_bld.type.sign = FALSE; 556 557 if (bld->static_sampler_state->normalized_coords) { 558 /* scale coord to length */ 559 coord = lp_build_mul(coord_bld, coord, length_f); 560 } 561 if (offset) { 562 offset = lp_build_int_to_float(coord_bld, offset); 563 coord = lp_build_add(coord_bld, coord, offset); 564 } 565 if (!is_gather) { 566 coord = lp_build_abs(coord_bld, coord); 567 568 /* clamp to length max */ 569 coord = lp_build_min_ext(coord_bld, coord, length_f, 570 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); 571 /* subtract 0.5 */ 572 coord = lp_build_sub(coord_bld, coord, half); 573 /* clamp to [0, length - 0.5] */ 574 coord = lp_build_max(coord_bld, coord, coord_bld->zero); 575 576 /* convert to int, compute lerp weight */ 577 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight); 578 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 579 /* coord1 = min(coord1, length-1) */ 580 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); 581 } else { 582 /* 583 * The non-gather path will swap coord0/1 if coord was negative, 584 * which is ok for filtering since the filter weight matches 585 * accordingly. Also, if coord is close to zero, coord0/1 will 586 * be 0 and 1, instead of 0 and 0 (again ok due to filter 587 * weight being 0.0). Both issues need to be fixed for gather. 588 */ 589 LLVMValueRef isNeg; 590 591 /* 592 * Actually wanted to cheat here and use: 593 * coord1 = lp_build_iround(coord_bld, coord); 594 * but it's not good enough for some tests (even piglit 595 * textureGather is set up in a way so the coords area always 596 * .5, that is right at the crossover points). 597 * So do ordinary sub/floor, then do ones' complement 598 * for negative numbers. 599 * (Note can't just do sub|add/abs/itrunc per coord neither - 600 * because the spec demands that mirror(3.0) = 3 but 601 * mirror(-3.0) = 2.) 602 */ 603 coord = lp_build_sub(coord_bld, coord, half); 604 coord0 = lp_build_ifloor(coord_bld, coord); 605 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 606 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, coord0, 607 int_coord_bld->zero); 608 coord0 = lp_build_xor(int_coord_bld, isNeg, coord0); 609 coord0 = lp_build_min(int_coord_bld, coord0, length_minus_one); 610 611 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, coord1, 612 int_coord_bld->zero); 613 coord1 = lp_build_xor(int_coord_bld, isNeg, coord1); 614 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); 615 616 weight = coord_bld->undef; 617 } 618 } 619 break; 620 621 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 622 { 623 if (bld->static_sampler_state->normalized_coords) { 624 /* scale coord to length */ 625 coord = lp_build_mul(coord_bld, coord, length_f); 626 } 627 if (offset) { 628 offset = lp_build_int_to_float(coord_bld, offset); 629 coord = lp_build_add(coord_bld, coord, offset); 630 } 631 /* 632 * XXX: probably not correct for gather due to swapped 633 * order if coord is negative (same rationale as for 634 * MIRROR_CLAMP). 635 */ 636 coord = lp_build_abs(coord_bld, coord); 637 638 /* 639 * We don't need any clamp. Technically, for very large 640 * (or infinite) values, clamp against length would be 641 * correct, but we don't need to guarantee any specific 642 * result for such coords (the ifloor will be undefined, but 643 * for modes requiring border all resulting coords are safe). 644 */ 645 coord = lp_build_sub(coord_bld, coord, half); 646 647 /* convert to int, compute lerp weight */ 648 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 649 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 650 } 651 break; 652 653 default: 654 assert(0); 655 coord0 = NULL; 656 coord1 = NULL; 657 weight = NULL; 658 } 659 660 *x0_out = coord0; 661 *x1_out = coord1; 662 *weight_out = weight; 663 } 664 665 666 /** 667 * Build LLVM code for texture wrap mode for nearest filtering. 668 * \param coord the incoming texcoord (nominally in [0,1]) 669 * \param length the texture size along one dimension, as int vector 670 * \param length_f the texture size along one dimension, as float vector 671 * \param offset texel offset along one dimension (as int vector) 672 * \param is_pot if TRUE, length is a power of two 673 * \param wrap_mode one of PIPE_TEX_WRAP_x 674 */ 675 static LLVMValueRef 676 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, 677 LLVMValueRef coord, 678 LLVMValueRef length, 679 LLVMValueRef length_f, 680 LLVMValueRef offset, 681 boolean is_pot, 682 unsigned wrap_mode) 683 { 684 struct lp_build_context *coord_bld = &bld->coord_bld; 685 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 686 LLVMBuilderRef builder = bld->gallivm->builder; 687 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); 688 LLVMValueRef icoord; 689 690 switch(wrap_mode) { 691 case PIPE_TEX_WRAP_REPEAT: 692 if (is_pot) { 693 coord = lp_build_mul(coord_bld, coord, length_f); 694 icoord = lp_build_ifloor(coord_bld, coord); 695 if (offset) { 696 icoord = lp_build_add(int_coord_bld, icoord, offset); 697 } 698 icoord = LLVMBuildAnd(builder, icoord, length_minus_one, ""); 699 } 700 else { 701 if (offset) { 702 offset = lp_build_int_to_float(coord_bld, offset); 703 offset = lp_build_div(coord_bld, offset, length_f); 704 coord = lp_build_add(coord_bld, coord, offset); 705 } 706 /* take fraction, unnormalize */ 707 coord = lp_build_fract_safe(coord_bld, coord); 708 coord = lp_build_mul(coord_bld, coord, length_f); 709 icoord = lp_build_itrunc(coord_bld, coord); 710 } 711 break; 712 713 case PIPE_TEX_WRAP_CLAMP: 714 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 715 if (bld->static_sampler_state->normalized_coords) { 716 /* scale coord to length */ 717 coord = lp_build_mul(coord_bld, coord, length_f); 718 } 719 720 if (offset) { 721 offset = lp_build_int_to_float(coord_bld, offset); 722 coord = lp_build_add(coord_bld, coord, offset); 723 } 724 /* floor */ 725 /* use itrunc instead since we clamp to 0 anyway */ 726 icoord = lp_build_itrunc(coord_bld, coord); 727 728 /* clamp to [0, length - 1]. */ 729 icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero, 730 length_minus_one); 731 break; 732 733 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 734 if (bld->static_sampler_state->normalized_coords) { 735 /* scale coord to length */ 736 coord = lp_build_mul(coord_bld, coord, length_f); 737 } 738 /* no clamp necessary, border masking will handle this */ 739 icoord = lp_build_ifloor(coord_bld, coord); 740 if (offset) { 741 icoord = lp_build_add(int_coord_bld, icoord, offset); 742 } 743 break; 744 745 case PIPE_TEX_WRAP_MIRROR_REPEAT: 746 if (offset) { 747 offset = lp_build_int_to_float(coord_bld, offset); 748 offset = lp_build_div(coord_bld, offset, length_f); 749 coord = lp_build_add(coord_bld, coord, offset); 750 } 751 /* compute mirror function */ 752 coord = lp_build_coord_mirror(bld, coord, TRUE); 753 754 /* scale coord to length */ 755 assert(bld->static_sampler_state->normalized_coords); 756 coord = lp_build_mul(coord_bld, coord, length_f); 757 758 /* itrunc == ifloor here */ 759 icoord = lp_build_itrunc(coord_bld, coord); 760 761 /* clamp to [0, length - 1] */ 762 icoord = lp_build_min(int_coord_bld, icoord, length_minus_one); 763 break; 764 765 case PIPE_TEX_WRAP_MIRROR_CLAMP: 766 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 767 if (bld->static_sampler_state->normalized_coords) { 768 /* scale coord to length */ 769 coord = lp_build_mul(coord_bld, coord, length_f); 770 } 771 if (offset) { 772 offset = lp_build_int_to_float(coord_bld, offset); 773 coord = lp_build_add(coord_bld, coord, offset); 774 } 775 coord = lp_build_abs(coord_bld, coord); 776 777 /* itrunc == ifloor here */ 778 icoord = lp_build_itrunc(coord_bld, coord); 779 /* 780 * Use unsigned min due to possible undef values (NaNs, overflow) 781 */ 782 { 783 struct lp_build_context abs_coord_bld = *int_coord_bld; 784 abs_coord_bld.type.sign = FALSE; 785 /* clamp to [0, length - 1] */ 786 icoord = lp_build_min(&abs_coord_bld, icoord, length_minus_one); 787 } 788 break; 789 790 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 791 if (bld->static_sampler_state->normalized_coords) { 792 /* scale coord to length */ 793 coord = lp_build_mul(coord_bld, coord, length_f); 794 } 795 if (offset) { 796 offset = lp_build_int_to_float(coord_bld, offset); 797 coord = lp_build_add(coord_bld, coord, offset); 798 } 799 coord = lp_build_abs(coord_bld, coord); 800 801 /* itrunc == ifloor here */ 802 icoord = lp_build_itrunc(coord_bld, coord); 803 break; 804 805 default: 806 assert(0); 807 icoord = NULL; 808 } 809 810 return icoord; 811 } 812 813 814 /** 815 * Do shadow test/comparison. 816 * \param p shadow ref value 817 * \param texel the texel to compare against 818 */ 819 static LLVMValueRef 820 lp_build_sample_comparefunc(struct lp_build_sample_context *bld, 821 LLVMValueRef p, 822 LLVMValueRef texel) 823 { 824 struct lp_build_context *texel_bld = &bld->texel_bld; 825 LLVMValueRef res; 826 827 if (0) { 828 //lp_build_print_value(bld->gallivm, "shadow cmp coord", p); 829 lp_build_print_value(bld->gallivm, "shadow cmp texel", texel); 830 } 831 832 /* result = (p FUNC texel) ? 1 : 0 */ 833 /* 834 * honor d3d10 floating point rules here, which state that comparisons 835 * are ordered except NOT_EQUAL which is unordered. 836 */ 837 if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) { 838 res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func, 839 p, texel); 840 } 841 else { 842 res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func, 843 p, texel); 844 } 845 return res; 846 } 847 848 849 /** 850 * Generate code to sample a mipmap level with nearest filtering. 851 * If sampling a cube texture, r = cube face in [0,5]. 852 */ 853 static void 854 lp_build_sample_image_nearest(struct lp_build_sample_context *bld, 855 LLVMValueRef size, 856 LLVMValueRef row_stride_vec, 857 LLVMValueRef img_stride_vec, 858 LLVMValueRef data_ptr, 859 LLVMValueRef mipoffsets, 860 const LLVMValueRef *coords, 861 const LLVMValueRef *offsets, 862 LLVMValueRef colors_out[4]) 863 { 864 const unsigned dims = bld->dims; 865 LLVMValueRef width_vec; 866 LLVMValueRef height_vec; 867 LLVMValueRef depth_vec; 868 LLVMValueRef flt_size; 869 LLVMValueRef flt_width_vec; 870 LLVMValueRef flt_height_vec; 871 LLVMValueRef flt_depth_vec; 872 LLVMValueRef x, y = NULL, z = NULL; 873 874 lp_build_extract_image_sizes(bld, 875 &bld->int_size_bld, 876 bld->int_coord_type, 877 size, 878 &width_vec, &height_vec, &depth_vec); 879 880 flt_size = lp_build_int_to_float(&bld->float_size_bld, size); 881 882 lp_build_extract_image_sizes(bld, 883 &bld->float_size_bld, 884 bld->coord_type, 885 flt_size, 886 &flt_width_vec, &flt_height_vec, &flt_depth_vec); 887 888 /* 889 * Compute integer texcoords. 890 */ 891 x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec, 892 flt_width_vec, offsets[0], 893 bld->static_texture_state->pot_width, 894 bld->static_sampler_state->wrap_s); 895 lp_build_name(x, "tex.x.wrapped"); 896 897 if (dims >= 2) { 898 y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec, 899 flt_height_vec, offsets[1], 900 bld->static_texture_state->pot_height, 901 bld->static_sampler_state->wrap_t); 902 lp_build_name(y, "tex.y.wrapped"); 903 904 if (dims == 3) { 905 z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec, 906 flt_depth_vec, offsets[2], 907 bld->static_texture_state->pot_depth, 908 bld->static_sampler_state->wrap_r); 909 lp_build_name(z, "tex.z.wrapped"); 910 } 911 } 912 if (has_layer_coord(bld->static_texture_state->target)) { 913 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) { 914 /* add cube layer to face */ 915 z = lp_build_add(&bld->int_coord_bld, coords[2], coords[3]); 916 } 917 else { 918 z = coords[2]; 919 } 920 lp_build_name(z, "tex.z.layer"); 921 } 922 923 /* 924 * Get texture colors. 925 */ 926 lp_build_sample_texel_soa(bld, 927 width_vec, height_vec, depth_vec, 928 x, y, z, 929 row_stride_vec, img_stride_vec, 930 data_ptr, mipoffsets, colors_out); 931 932 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) { 933 LLVMValueRef cmpval; 934 cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]); 935 /* this is really just a AND 1.0, cmpval but llvm is clever enough */ 936 colors_out[0] = lp_build_select(&bld->texel_bld, cmpval, 937 bld->texel_bld.one, bld->texel_bld.zero); 938 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0]; 939 } 940 941 } 942 943 944 /** 945 * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly. 946 */ 947 static LLVMValueRef 948 lp_build_masklerp(struct lp_build_context *bld, 949 LLVMValueRef weight, 950 LLVMValueRef mask0, 951 LLVMValueRef mask1) 952 { 953 struct gallivm_state *gallivm = bld->gallivm; 954 LLVMBuilderRef builder = gallivm->builder; 955 LLVMValueRef weight2; 956 957 weight2 = lp_build_sub(bld, bld->one, weight); 958 weight = LLVMBuildBitCast(builder, weight, 959 lp_build_int_vec_type(gallivm, bld->type), ""); 960 weight2 = LLVMBuildBitCast(builder, weight2, 961 lp_build_int_vec_type(gallivm, bld->type), ""); 962 weight = LLVMBuildAnd(builder, weight, mask1, ""); 963 weight2 = LLVMBuildAnd(builder, weight2, mask0, ""); 964 weight = LLVMBuildBitCast(builder, weight, bld->vec_type, ""); 965 weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, ""); 966 return lp_build_add(bld, weight, weight2); 967 } 968 969 /** 970 * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly. 971 */ 972 static LLVMValueRef 973 lp_build_masklerp2d(struct lp_build_context *bld, 974 LLVMValueRef weight0, 975 LLVMValueRef weight1, 976 LLVMValueRef mask00, 977 LLVMValueRef mask01, 978 LLVMValueRef mask10, 979 LLVMValueRef mask11) 980 { 981 LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01); 982 LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11); 983 return lp_build_lerp(bld, weight1, val0, val1, 0); 984 } 985 986 /* 987 * this is a bit excessive code for something OpenGL just recommends 988 * but does not require. 989 */ 990 #define ACCURATE_CUBE_CORNERS 1 991 992 /** 993 * Generate code to sample a mipmap level with linear filtering. 994 * If sampling a cube texture, r = cube face in [0,5]. 995 * If linear_mask is present, only pixels having their mask set 996 * will receive linear filtering, the rest will use nearest. 997 */ 998 static void 999 lp_build_sample_image_linear(struct lp_build_sample_context *bld, 1000 boolean is_gather, 1001 LLVMValueRef size, 1002 LLVMValueRef linear_mask, 1003 LLVMValueRef row_stride_vec, 1004 LLVMValueRef img_stride_vec, 1005 LLVMValueRef data_ptr, 1006 LLVMValueRef mipoffsets, 1007 const LLVMValueRef *coords, 1008 const LLVMValueRef *offsets, 1009 LLVMValueRef colors_out[4]) 1010 { 1011 LLVMBuilderRef builder = bld->gallivm->builder; 1012 struct lp_build_context *ivec_bld = &bld->int_coord_bld; 1013 struct lp_build_context *coord_bld = &bld->coord_bld; 1014 struct lp_build_context *texel_bld = &bld->texel_bld; 1015 const unsigned dims = bld->dims; 1016 LLVMValueRef width_vec; 1017 LLVMValueRef height_vec; 1018 LLVMValueRef depth_vec; 1019 LLVMValueRef flt_size; 1020 LLVMValueRef flt_width_vec; 1021 LLVMValueRef flt_height_vec; 1022 LLVMValueRef flt_depth_vec; 1023 LLVMValueRef fall_off[4], have_corners; 1024 LLVMValueRef z1 = NULL; 1025 LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL; 1026 LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL; 1027 LLVMValueRef y00 = NULL, y01 = NULL, y10 = NULL, y11 = NULL; 1028 LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL; 1029 LLVMValueRef xs[4], ys[4], zs[4]; 1030 LLVMValueRef neighbors[2][2][4]; 1031 int chan, texel_index; 1032 boolean seamless_cube_filter, accurate_cube_corners; 1033 unsigned chan_swiz = bld->static_texture_state->swizzle_r; 1034 1035 seamless_cube_filter = (bld->static_texture_state->target == PIPE_TEXTURE_CUBE || 1036 bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) && 1037 bld->static_sampler_state->seamless_cube_map; 1038 1039 accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter; 1040 1041 lp_build_extract_image_sizes(bld, 1042 &bld->int_size_bld, 1043 bld->int_coord_type, 1044 size, 1045 &width_vec, &height_vec, &depth_vec); 1046 1047 flt_size = lp_build_int_to_float(&bld->float_size_bld, size); 1048 1049 lp_build_extract_image_sizes(bld, 1050 &bld->float_size_bld, 1051 bld->coord_type, 1052 flt_size, 1053 &flt_width_vec, &flt_height_vec, &flt_depth_vec); 1054 1055 /* 1056 * Compute integer texcoords. 1057 */ 1058 1059 if (!seamless_cube_filter) { 1060 lp_build_sample_wrap_linear(bld, is_gather, coords[0], width_vec, 1061 flt_width_vec, offsets[0], 1062 bld->static_texture_state->pot_width, 1063 bld->static_sampler_state->wrap_s, 1064 &x00, &x01, &s_fpart); 1065 lp_build_name(x00, "tex.x0.wrapped"); 1066 lp_build_name(x01, "tex.x1.wrapped"); 1067 x10 = x00; 1068 x11 = x01; 1069 1070 if (dims >= 2) { 1071 lp_build_sample_wrap_linear(bld, is_gather, coords[1], height_vec, 1072 flt_height_vec, offsets[1], 1073 bld->static_texture_state->pot_height, 1074 bld->static_sampler_state->wrap_t, 1075 &y00, &y10, &t_fpart); 1076 lp_build_name(y00, "tex.y0.wrapped"); 1077 lp_build_name(y10, "tex.y1.wrapped"); 1078 y01 = y00; 1079 y11 = y10; 1080 1081 if (dims == 3) { 1082 lp_build_sample_wrap_linear(bld, is_gather, coords[2], depth_vec, 1083 flt_depth_vec, offsets[2], 1084 bld->static_texture_state->pot_depth, 1085 bld->static_sampler_state->wrap_r, 1086 &z00, &z1, &r_fpart); 1087 z01 = z10 = z11 = z00; 1088 lp_build_name(z00, "tex.z0.wrapped"); 1089 lp_build_name(z1, "tex.z1.wrapped"); 1090 } 1091 } 1092 if (has_layer_coord(bld->static_texture_state->target)) { 1093 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) { 1094 /* add cube layer to face */ 1095 z00 = z01 = z10 = z11 = z1 = 1096 lp_build_add(&bld->int_coord_bld, coords[2], coords[3]); 1097 } 1098 else { 1099 z00 = z01 = z10 = z11 = z1 = coords[2]; /* cube face or layer */ 1100 } 1101 lp_build_name(z00, "tex.z0.layer"); 1102 lp_build_name(z1, "tex.z1.layer"); 1103 } 1104 } 1105 else { 1106 struct lp_build_if_state edge_if; 1107 LLVMTypeRef int1t; 1108 LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2]; 1109 LLVMValueRef coord0, coord1, have_edge, have_corner; 1110 LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, fall_off_y; 1111 LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp; 1112 LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped; 1113 LLVMValueRef face = coords[2]; 1114 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5f); 1115 LLVMValueRef length_minus_one = lp_build_sub(ivec_bld, width_vec, ivec_bld->one); 1116 /* XXX drop height calcs. Could (should) do this without seamless filtering too */ 1117 height_vec = width_vec; 1118 flt_height_vec = flt_width_vec; 1119 1120 /* XXX the overflow logic is actually sort of duplicated with trilinear, 1121 * since an overflow in one mip should also have a corresponding overflow 1122 * in another. 1123 */ 1124 /* should always have normalized coords, and offsets are undefined */ 1125 assert(bld->static_sampler_state->normalized_coords); 1126 /* 1127 * The coords should all be between [0,1] however we can have NaNs, 1128 * which will wreak havoc. In particular the y1_clamped value below 1129 * can be -INT_MAX (on x86) and be propagated right through (probably 1130 * other values might be bogus in the end too). 1131 * So kill off the NaNs here. 1132 */ 1133 coord0 = lp_build_max_ext(coord_bld, coords[0], coord_bld->zero, 1134 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); 1135 coord0 = lp_build_mul(coord_bld, coord0, flt_width_vec); 1136 /* instead of clamp, build mask if overflowed */ 1137 coord0 = lp_build_sub(coord_bld, coord0, half); 1138 /* convert to int, compute lerp weight */ 1139 /* not ideal with AVX (and no AVX2) */ 1140 lp_build_ifloor_fract(coord_bld, coord0, &x0, &s_fpart); 1141 x1 = lp_build_add(ivec_bld, x0, ivec_bld->one); 1142 coord1 = lp_build_max_ext(coord_bld, coords[1], coord_bld->zero, 1143 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); 1144 coord1 = lp_build_mul(coord_bld, coord1, flt_height_vec); 1145 coord1 = lp_build_sub(coord_bld, coord1, half); 1146 lp_build_ifloor_fract(coord_bld, coord1, &y0, &t_fpart); 1147 y1 = lp_build_add(ivec_bld, y0, ivec_bld->one); 1148 1149 fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero); 1150 fall_off[1] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, x1, length_minus_one); 1151 fall_off[2] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, y0, ivec_bld->zero); 1152 fall_off[3] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, y1, length_minus_one); 1153 1154 fall_off_x = lp_build_or(ivec_bld, fall_off[0], fall_off[1]); 1155 fall_off_y = lp_build_or(ivec_bld, fall_off[2], fall_off[3]); 1156 have_edge = lp_build_or(ivec_bld, fall_off_x, fall_off_y); 1157 have_edge = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_edge); 1158 1159 /* needed for accurate corner filtering branch later, rely on 0 init */ 1160 int1t = LLVMInt1TypeInContext(bld->gallivm->context); 1161 have_corners = lp_build_alloca(bld->gallivm, int1t, "have_corner"); 1162 1163 for (texel_index = 0; texel_index < 4; texel_index++) { 1164 xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs"); 1165 ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys"); 1166 zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs"); 1167 } 1168 1169 lp_build_if(&edge_if, bld->gallivm, have_edge); 1170 1171 have_corner = lp_build_and(ivec_bld, fall_off_x, fall_off_y); 1172 have_corner = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_corner); 1173 LLVMBuildStore(builder, have_corner, have_corners); 1174 1175 /* 1176 * Need to feed clamped values here for cheap corner handling, 1177 * but only for y coord (as when falling off both edges we only 1178 * fall off the x one) - this should be sufficient. 1179 */ 1180 y0_clamped = lp_build_max(ivec_bld, y0, ivec_bld->zero); 1181 y1_clamped = lp_build_min(ivec_bld, y1, length_minus_one); 1182 1183 /* 1184 * Get all possible new coords. 1185 */ 1186 lp_build_cube_new_coords(ivec_bld, face, 1187 x0, x1, y0_clamped, y1_clamped, 1188 length_minus_one, 1189 new_faces, new_xcoords, new_ycoords); 1190 1191 /* handle fall off x-, x+ direction */ 1192 /* determine new coords, face (not both fall_off vars can be true at same time) */ 1193 x00 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][0], x0); 1194 y00 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][0], y0_clamped); 1195 x10 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][1], x0); 1196 y10 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][1], y1_clamped); 1197 x01 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][0], x1); 1198 y01 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][0], y0_clamped); 1199 x11 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][1], x1); 1200 y11 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][1], y1_clamped); 1201 1202 z00 = z10 = lp_build_select(ivec_bld, fall_off[0], new_faces[0], face); 1203 z01 = z11 = lp_build_select(ivec_bld, fall_off[1], new_faces[1], face); 1204 1205 /* handle fall off y-, y+ direction */ 1206 /* 1207 * Cheap corner logic: just hack up things so a texel doesn't fall 1208 * off both sides (which means filter weights will be wrong but we'll only 1209 * use valid texels in the filter). 1210 * This means however (y) coords must additionally be clamped (see above). 1211 * This corner handling should be fully OpenGL (but not d3d10) compliant. 1212 */ 1213 fall_off_ym_notxm = lp_build_andnot(ivec_bld, fall_off[2], fall_off[0]); 1214 fall_off_ym_notxp = lp_build_andnot(ivec_bld, fall_off[2], fall_off[1]); 1215 fall_off_yp_notxm = lp_build_andnot(ivec_bld, fall_off[3], fall_off[0]); 1216 fall_off_yp_notxp = lp_build_andnot(ivec_bld, fall_off[3], fall_off[1]); 1217 1218 x00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_xcoords[2][0], x00); 1219 y00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_ycoords[2][0], y00); 1220 x01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_xcoords[2][1], x01); 1221 y01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_ycoords[2][1], y01); 1222 x10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_xcoords[3][0], x10); 1223 y10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_ycoords[3][0], y10); 1224 x11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_xcoords[3][1], x11); 1225 y11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_ycoords[3][1], y11); 1226 1227 z00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_faces[2], z00); 1228 z01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_faces[2], z01); 1229 z10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_faces[3], z10); 1230 z11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_faces[3], z11); 1231 1232 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) { 1233 /* now can add cube layer to face (per sample) */ 1234 z00 = lp_build_add(ivec_bld, z00, coords[3]); 1235 z01 = lp_build_add(ivec_bld, z01, coords[3]); 1236 z10 = lp_build_add(ivec_bld, z10, coords[3]); 1237 z11 = lp_build_add(ivec_bld, z11, coords[3]); 1238 } 1239 1240 LLVMBuildStore(builder, x00, xs[0]); 1241 LLVMBuildStore(builder, x01, xs[1]); 1242 LLVMBuildStore(builder, x10, xs[2]); 1243 LLVMBuildStore(builder, x11, xs[3]); 1244 LLVMBuildStore(builder, y00, ys[0]); 1245 LLVMBuildStore(builder, y01, ys[1]); 1246 LLVMBuildStore(builder, y10, ys[2]); 1247 LLVMBuildStore(builder, y11, ys[3]); 1248 LLVMBuildStore(builder, z00, zs[0]); 1249 LLVMBuildStore(builder, z01, zs[1]); 1250 LLVMBuildStore(builder, z10, zs[2]); 1251 LLVMBuildStore(builder, z11, zs[3]); 1252 1253 lp_build_else(&edge_if); 1254 1255 LLVMBuildStore(builder, x0, xs[0]); 1256 LLVMBuildStore(builder, x1, xs[1]); 1257 LLVMBuildStore(builder, x0, xs[2]); 1258 LLVMBuildStore(builder, x1, xs[3]); 1259 LLVMBuildStore(builder, y0, ys[0]); 1260 LLVMBuildStore(builder, y0, ys[1]); 1261 LLVMBuildStore(builder, y1, ys[2]); 1262 LLVMBuildStore(builder, y1, ys[3]); 1263 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) { 1264 LLVMValueRef cube_layer = lp_build_add(ivec_bld, face, coords[3]); 1265 LLVMBuildStore(builder, cube_layer, zs[0]); 1266 LLVMBuildStore(builder, cube_layer, zs[1]); 1267 LLVMBuildStore(builder, cube_layer, zs[2]); 1268 LLVMBuildStore(builder, cube_layer, zs[3]); 1269 } 1270 else { 1271 LLVMBuildStore(builder, face, zs[0]); 1272 LLVMBuildStore(builder, face, zs[1]); 1273 LLVMBuildStore(builder, face, zs[2]); 1274 LLVMBuildStore(builder, face, zs[3]); 1275 } 1276 1277 lp_build_endif(&edge_if); 1278 1279 x00 = LLVMBuildLoad(builder, xs[0], ""); 1280 x01 = LLVMBuildLoad(builder, xs[1], ""); 1281 x10 = LLVMBuildLoad(builder, xs[2], ""); 1282 x11 = LLVMBuildLoad(builder, xs[3], ""); 1283 y00 = LLVMBuildLoad(builder, ys[0], ""); 1284 y01 = LLVMBuildLoad(builder, ys[1], ""); 1285 y10 = LLVMBuildLoad(builder, ys[2], ""); 1286 y11 = LLVMBuildLoad(builder, ys[3], ""); 1287 z00 = LLVMBuildLoad(builder, zs[0], ""); 1288 z01 = LLVMBuildLoad(builder, zs[1], ""); 1289 z10 = LLVMBuildLoad(builder, zs[2], ""); 1290 z11 = LLVMBuildLoad(builder, zs[3], ""); 1291 } 1292 1293 if (linear_mask) { 1294 /* 1295 * Whack filter weights into place. Whatever texel had more weight is 1296 * the one which should have been selected by nearest filtering hence 1297 * just use 100% weight for it. 1298 */ 1299 struct lp_build_context *c_bld = &bld->coord_bld; 1300 LLVMValueRef w1_mask, w1_weight; 1301 LLVMValueRef half = lp_build_const_vec(bld->gallivm, c_bld->type, 0.5f); 1302 1303 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, s_fpart, half); 1304 /* this select is really just a "and" */ 1305 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero); 1306 s_fpart = lp_build_select(c_bld, linear_mask, s_fpart, w1_weight); 1307 if (dims >= 2) { 1308 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, t_fpart, half); 1309 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero); 1310 t_fpart = lp_build_select(c_bld, linear_mask, t_fpart, w1_weight); 1311 if (dims == 3) { 1312 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, r_fpart, half); 1313 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero); 1314 r_fpart = lp_build_select(c_bld, linear_mask, r_fpart, w1_weight); 1315 } 1316 } 1317 } 1318 1319 /* 1320 * Get texture colors. 1321 */ 1322 /* get x0/x1 texels */ 1323 lp_build_sample_texel_soa(bld, 1324 width_vec, height_vec, depth_vec, 1325 x00, y00, z00, 1326 row_stride_vec, img_stride_vec, 1327 data_ptr, mipoffsets, neighbors[0][0]); 1328 lp_build_sample_texel_soa(bld, 1329 width_vec, height_vec, depth_vec, 1330 x01, y01, z01, 1331 row_stride_vec, img_stride_vec, 1332 data_ptr, mipoffsets, neighbors[0][1]); 1333 1334 if (dims == 1) { 1335 assert(!is_gather); 1336 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) { 1337 /* Interpolate two samples from 1D image to produce one color */ 1338 for (chan = 0; chan < 4; chan++) { 1339 colors_out[chan] = lp_build_lerp(texel_bld, s_fpart, 1340 neighbors[0][0][chan], 1341 neighbors[0][1][chan], 1342 0); 1343 } 1344 } 1345 else { 1346 LLVMValueRef cmpval0, cmpval1; 1347 cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]); 1348 cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]); 1349 /* simplified lerp, AND mask with weight and add */ 1350 colors_out[0] = lp_build_masklerp(texel_bld, s_fpart, 1351 cmpval0, cmpval1); 1352 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0]; 1353 } 1354 } 1355 else { 1356 /* 2D/3D texture */ 1357 struct lp_build_if_state corner_if; 1358 LLVMValueRef colors0[4], colorss[4]; 1359 1360 /* get x0/x1 texels at y1 */ 1361 lp_build_sample_texel_soa(bld, 1362 width_vec, height_vec, depth_vec, 1363 x10, y10, z10, 1364 row_stride_vec, img_stride_vec, 1365 data_ptr, mipoffsets, neighbors[1][0]); 1366 lp_build_sample_texel_soa(bld, 1367 width_vec, height_vec, depth_vec, 1368 x11, y11, z11, 1369 row_stride_vec, img_stride_vec, 1370 data_ptr, mipoffsets, neighbors[1][1]); 1371 1372 /* 1373 * To avoid having to duplicate linear_mask / fetch code use 1374 * another branch (with corner condition though edge would work 1375 * as well) here. 1376 */ 1377 if (accurate_cube_corners) { 1378 LLVMValueRef c00, c01, c10, c11, c00f, c01f, c10f, c11f; 1379 LLVMValueRef have_corner, one_third; 1380 1381 colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs0"); 1382 colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs1"); 1383 colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs2"); 1384 colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs3"); 1385 1386 have_corner = LLVMBuildLoad(builder, have_corners, ""); 1387 1388 lp_build_if(&corner_if, bld->gallivm, have_corner); 1389 1390 one_third = lp_build_const_vec(bld->gallivm, coord_bld->type, 1391 1.0f/3.0f); 1392 1393 /* find corner */ 1394 c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]); 1395 c00f = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, ""); 1396 c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]); 1397 c01f = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, ""); 1398 c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]); 1399 c10f = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, ""); 1400 c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]); 1401 c11f = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, ""); 1402 1403 if (!is_gather) { 1404 /* 1405 * we can't use standard 2d lerp as we need per-element weight 1406 * in case of corners, so just calculate bilinear result as 1407 * w00*s00 + w01*s01 + w10*s10 + w11*s11. 1408 * (This is actually less work than using 2d lerp, 7 vs. 9 1409 * instructions, however calculating the weights needs another 6, 1410 * so actually probably not slower than 2d lerp only for 4 channels 1411 * as weights only need to be calculated once - of course fixing 1412 * the weights has additional cost.) 1413 */ 1414 LLVMValueRef w00, w01, w10, w11, wx0, wy0, c_weight, tmp; 1415 wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart); 1416 wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart); 1417 w00 = lp_build_mul(coord_bld, wx0, wy0); 1418 w01 = lp_build_mul(coord_bld, s_fpart, wy0); 1419 w10 = lp_build_mul(coord_bld, wx0, t_fpart); 1420 w11 = lp_build_mul(coord_bld, s_fpart, t_fpart); 1421 1422 /* find corner weight */ 1423 c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero); 1424 c_weight = lp_build_select(coord_bld, c01, w01, c_weight); 1425 c_weight = lp_build_select(coord_bld, c10, w10, c_weight); 1426 c_weight = lp_build_select(coord_bld, c11, w11, c_weight); 1427 1428 /* 1429 * add 1/3 of the corner weight to the weight of the 3 other 1430 * samples and null out corner weight. 1431 */ 1432 c_weight = lp_build_mul(coord_bld, c_weight, one_third); 1433 w00 = lp_build_add(coord_bld, w00, c_weight); 1434 w00 = lp_build_andnot(coord_bld, w00, c00f); 1435 w01 = lp_build_add(coord_bld, w01, c_weight); 1436 w01 = lp_build_andnot(coord_bld, w01, c01f); 1437 w10 = lp_build_add(coord_bld, w10, c_weight); 1438 w10 = lp_build_andnot(coord_bld, w10, c10f); 1439 w11 = lp_build_add(coord_bld, w11, c_weight); 1440 w11 = lp_build_andnot(coord_bld, w11, c11f); 1441 1442 if (bld->static_sampler_state->compare_mode == 1443 PIPE_TEX_COMPARE_NONE) { 1444 for (chan = 0; chan < 4; chan++) { 1445 colors0[chan] = lp_build_mul(coord_bld, w00, 1446 neighbors[0][0][chan]); 1447 tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]); 1448 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]); 1449 tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]); 1450 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]); 1451 tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]); 1452 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]); 1453 } 1454 } 1455 else { 1456 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11; 1457 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], 1458 neighbors[0][0][0]); 1459 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], 1460 neighbors[0][1][0]); 1461 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], 1462 neighbors[1][0][0]); 1463 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], 1464 neighbors[1][1][0]); 1465 /* 1466 * inputs to interpolation are just masks so just add 1467 * masked weights together 1468 */ 1469 cmpval00 = LLVMBuildBitCast(builder, cmpval00, 1470 coord_bld->vec_type, ""); 1471 cmpval01 = LLVMBuildBitCast(builder, cmpval01, 1472 coord_bld->vec_type, ""); 1473 cmpval10 = LLVMBuildBitCast(builder, cmpval10, 1474 coord_bld->vec_type, ""); 1475 cmpval11 = LLVMBuildBitCast(builder, cmpval11, 1476 coord_bld->vec_type, ""); 1477 colors0[0] = lp_build_and(coord_bld, w00, cmpval00); 1478 tmp = lp_build_and(coord_bld, w01, cmpval01); 1479 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]); 1480 tmp = lp_build_and(coord_bld, w10, cmpval10); 1481 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]); 1482 tmp = lp_build_and(coord_bld, w11, cmpval11); 1483 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]); 1484 colors0[1] = colors0[2] = colors0[3] = colors0[0]; 1485 } 1486 } 1487 else { 1488 /* 1489 * We don't have any weights to adjust, so instead calculate 1490 * the fourth texel as simply the average of the other 3. 1491 * (This would work for non-gather too, however we'd have 1492 * a boatload more of the select stuff due to there being 1493 * 4 times as many colors as weights.) 1494 */ 1495 LLVMValueRef col00, col01, col10, col11; 1496 LLVMValueRef colc, colc0, colc1; 1497 col10 = lp_build_swizzle_soa_channel(texel_bld, 1498 neighbors[1][0], chan_swiz); 1499 col11 = lp_build_swizzle_soa_channel(texel_bld, 1500 neighbors[1][1], chan_swiz); 1501 col01 = lp_build_swizzle_soa_channel(texel_bld, 1502 neighbors[0][1], chan_swiz); 1503 col00 = lp_build_swizzle_soa_channel(texel_bld, 1504 neighbors[0][0], chan_swiz); 1505 1506 /* 1507 * The spec says for comparison filtering, the comparison 1508 * must happen before synthesizing the new value. 1509 * This means all gathered values are always 0 or 1, 1510 * except for the non-existing texel, which can be 0,1/3,2/3,1... 1511 * Seems like we'd be allowed to just return 0 or 1 too, so we 1512 * could simplify and pass down the compare mask values to the 1513 * end (using int arithmetic/compare on the mask values to 1514 * construct the fourth texel) and only there convert to floats 1515 * but it's probably not worth it (it might be easier for the cpu 1516 * but not for the code)... 1517 */ 1518 if (bld->static_sampler_state->compare_mode != 1519 PIPE_TEX_COMPARE_NONE) { 1520 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11; 1521 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], col00); 1522 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], col01); 1523 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], col10); 1524 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], col11); 1525 col00 = lp_build_select(texel_bld, cmpval00, 1526 texel_bld->one, texel_bld->zero); 1527 col01 = lp_build_select(texel_bld, cmpval01, 1528 texel_bld->one, texel_bld->zero); 1529 col10 = lp_build_select(texel_bld, cmpval10, 1530 texel_bld->one, texel_bld->zero); 1531 col11 = lp_build_select(texel_bld, cmpval11, 1532 texel_bld->one, texel_bld->zero); 1533 } 1534 1535 /* 1536 * Null out corner color. 1537 */ 1538 col00 = lp_build_andnot(coord_bld, col00, c00f); 1539 col01 = lp_build_andnot(coord_bld, col01, c01f); 1540 col10 = lp_build_andnot(coord_bld, col10, c10f); 1541 col11 = lp_build_andnot(coord_bld, col11, c11f); 1542 1543 /* 1544 * New corner texel color is all colors added / 3. 1545 */ 1546 colc0 = lp_build_add(coord_bld, col00, col01); 1547 colc1 = lp_build_add(coord_bld, col10, col11); 1548 colc = lp_build_add(coord_bld, colc0, colc1); 1549 colc = lp_build_mul(coord_bld, one_third, colc); 1550 1551 /* 1552 * Replace the corner texel color with the new value. 1553 */ 1554 col00 = lp_build_select(coord_bld, c00, colc, col00); 1555 col01 = lp_build_select(coord_bld, c01, colc, col01); 1556 col10 = lp_build_select(coord_bld, c10, colc, col10); 1557 col11 = lp_build_select(coord_bld, c11, colc, col11); 1558 1559 colors0[0] = col10; 1560 colors0[1] = col11; 1561 colors0[2] = col01; 1562 colors0[3] = col00; 1563 } 1564 1565 LLVMBuildStore(builder, colors0[0], colorss[0]); 1566 LLVMBuildStore(builder, colors0[1], colorss[1]); 1567 LLVMBuildStore(builder, colors0[2], colorss[2]); 1568 LLVMBuildStore(builder, colors0[3], colorss[3]); 1569 1570 lp_build_else(&corner_if); 1571 } 1572 1573 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) { 1574 if (is_gather) { 1575 /* 1576 * Just assign the red channel (no component selection yet). 1577 * This is a bit hackish, we usually do the swizzle at the 1578 * end of sampling (much less values to swizzle), but this 1579 * obviously cannot work when using gather. 1580 */ 1581 colors0[0] = lp_build_swizzle_soa_channel(texel_bld, 1582 neighbors[1][0], 1583 chan_swiz); 1584 colors0[1] = lp_build_swizzle_soa_channel(texel_bld, 1585 neighbors[1][1], 1586 chan_swiz); 1587 colors0[2] = lp_build_swizzle_soa_channel(texel_bld, 1588 neighbors[0][1], 1589 chan_swiz); 1590 colors0[3] = lp_build_swizzle_soa_channel(texel_bld, 1591 neighbors[0][0], 1592 chan_swiz); 1593 } 1594 else { 1595 /* Bilinear interpolate the four samples from the 2D image / 3D slice */ 1596 for (chan = 0; chan < 4; chan++) { 1597 colors0[chan] = lp_build_lerp_2d(texel_bld, 1598 s_fpart, t_fpart, 1599 neighbors[0][0][chan], 1600 neighbors[0][1][chan], 1601 neighbors[1][0][chan], 1602 neighbors[1][1][chan], 1603 0); 1604 } 1605 } 1606 } 1607 else { 1608 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11; 1609 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]); 1610 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]); 1611 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]); 1612 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]); 1613 1614 if (is_gather) { 1615 /* more hacks for swizzling, should be X, ONE or ZERO... */ 1616 colors0[0] = lp_build_select(texel_bld, cmpval10, 1617 texel_bld->one, texel_bld->zero); 1618 colors0[1] = lp_build_select(texel_bld, cmpval11, 1619 texel_bld->one, texel_bld->zero); 1620 colors0[2] = lp_build_select(texel_bld, cmpval01, 1621 texel_bld->one, texel_bld->zero); 1622 colors0[3] = lp_build_select(texel_bld, cmpval00, 1623 texel_bld->one, texel_bld->zero); 1624 } 1625 else { 1626 colors0[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart, 1627 cmpval00, cmpval01, cmpval10, cmpval11); 1628 colors0[1] = colors0[2] = colors0[3] = colors0[0]; 1629 } 1630 } 1631 1632 if (accurate_cube_corners) { 1633 LLVMBuildStore(builder, colors0[0], colorss[0]); 1634 LLVMBuildStore(builder, colors0[1], colorss[1]); 1635 LLVMBuildStore(builder, colors0[2], colorss[2]); 1636 LLVMBuildStore(builder, colors0[3], colorss[3]); 1637 1638 lp_build_endif(&corner_if); 1639 1640 colors0[0] = LLVMBuildLoad(builder, colorss[0], ""); 1641 colors0[1] = LLVMBuildLoad(builder, colorss[1], ""); 1642 colors0[2] = LLVMBuildLoad(builder, colorss[2], ""); 1643 colors0[3] = LLVMBuildLoad(builder, colorss[3], ""); 1644 } 1645 1646 if (dims == 3) { 1647 LLVMValueRef neighbors1[2][2][4]; 1648 LLVMValueRef colors1[4]; 1649 1650 assert(!is_gather); 1651 1652 /* get x0/x1/y0/y1 texels at z1 */ 1653 lp_build_sample_texel_soa(bld, 1654 width_vec, height_vec, depth_vec, 1655 x00, y00, z1, 1656 row_stride_vec, img_stride_vec, 1657 data_ptr, mipoffsets, neighbors1[0][0]); 1658 lp_build_sample_texel_soa(bld, 1659 width_vec, height_vec, depth_vec, 1660 x01, y01, z1, 1661 row_stride_vec, img_stride_vec, 1662 data_ptr, mipoffsets, neighbors1[0][1]); 1663 lp_build_sample_texel_soa(bld, 1664 width_vec, height_vec, depth_vec, 1665 x10, y10, z1, 1666 row_stride_vec, img_stride_vec, 1667 data_ptr, mipoffsets, neighbors1[1][0]); 1668 lp_build_sample_texel_soa(bld, 1669 width_vec, height_vec, depth_vec, 1670 x11, y11, z1, 1671 row_stride_vec, img_stride_vec, 1672 data_ptr, mipoffsets, neighbors1[1][1]); 1673 1674 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) { 1675 /* Bilinear interpolate the four samples from the second Z slice */ 1676 for (chan = 0; chan < 4; chan++) { 1677 colors1[chan] = lp_build_lerp_2d(texel_bld, 1678 s_fpart, t_fpart, 1679 neighbors1[0][0][chan], 1680 neighbors1[0][1][chan], 1681 neighbors1[1][0][chan], 1682 neighbors1[1][1][chan], 1683 0); 1684 } 1685 /* Linearly interpolate the two samples from the two 3D slices */ 1686 for (chan = 0; chan < 4; chan++) { 1687 colors_out[chan] = lp_build_lerp(texel_bld, 1688 r_fpart, 1689 colors0[chan], colors1[chan], 1690 0); 1691 } 1692 } 1693 else { 1694 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11; 1695 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]); 1696 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]); 1697 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]); 1698 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]); 1699 colors1[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart, 1700 cmpval00, cmpval01, cmpval10, cmpval11); 1701 /* Linearly interpolate the two samples from the two 3D slices */ 1702 colors_out[0] = lp_build_lerp(texel_bld, 1703 r_fpart, 1704 colors0[0], colors1[0], 1705 0); 1706 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0]; 1707 } 1708 } 1709 else { 1710 /* 2D tex */ 1711 for (chan = 0; chan < 4; chan++) { 1712 colors_out[chan] = colors0[chan]; 1713 } 1714 } 1715 } 1716 if (is_gather) { 1717 /* 1718 * For gather, we can't do our usual channel swizzling done later, 1719 * so do it here. It only really matters for 0/1 swizzles in case 1720 * of comparison filtering, since in this case the results would be 1721 * wrong, without comparison it should all work out alright but it 1722 * can't hurt to do that here, since it will instantly drop all 1723 * calculations above, though it's a rather stupid idea to do 1724 * gather on a channel which will always return 0 or 1 in any case... 1725 */ 1726 if (chan_swiz == PIPE_SWIZZLE_1) { 1727 for (chan = 0; chan < 4; chan++) { 1728 colors_out[chan] = texel_bld->one; 1729 } 1730 } else if (chan_swiz == PIPE_SWIZZLE_0) { 1731 for (chan = 0; chan < 4; chan++) { 1732 colors_out[chan] = texel_bld->zero; 1733 } 1734 } 1735 } 1736 } 1737 1738 1739 /** 1740 * Sample the texture/mipmap using given image filter and mip filter. 1741 * ilevel0 and ilevel1 indicate the two mipmap levels to sample 1742 * from (vectors or scalars). 1743 * If we're using nearest miplevel sampling the '1' values will be null/unused. 1744 */ 1745 static void 1746 lp_build_sample_mipmap(struct lp_build_sample_context *bld, 1747 unsigned img_filter, 1748 unsigned mip_filter, 1749 boolean is_gather, 1750 const LLVMValueRef *coords, 1751 const LLVMValueRef *offsets, 1752 LLVMValueRef ilevel0, 1753 LLVMValueRef ilevel1, 1754 LLVMValueRef lod_fpart, 1755 LLVMValueRef *colors_out) 1756 { 1757 LLVMBuilderRef builder = bld->gallivm->builder; 1758 LLVMValueRef size0 = NULL; 1759 LLVMValueRef size1 = NULL; 1760 LLVMValueRef row_stride0_vec = NULL; 1761 LLVMValueRef row_stride1_vec = NULL; 1762 LLVMValueRef img_stride0_vec = NULL; 1763 LLVMValueRef img_stride1_vec = NULL; 1764 LLVMValueRef data_ptr0 = NULL; 1765 LLVMValueRef data_ptr1 = NULL; 1766 LLVMValueRef mipoff0 = NULL; 1767 LLVMValueRef mipoff1 = NULL; 1768 LLVMValueRef colors0[4], colors1[4]; 1769 unsigned chan; 1770 1771 /* sample the first mipmap level */ 1772 lp_build_mipmap_level_sizes(bld, ilevel0, 1773 &size0, 1774 &row_stride0_vec, &img_stride0_vec); 1775 if (bld->num_mips == 1) { 1776 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); 1777 } 1778 else { 1779 /* This path should work for num_lods 1 too but slightly less efficient */ 1780 data_ptr0 = bld->base_ptr; 1781 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0); 1782 } 1783 if (img_filter == PIPE_TEX_FILTER_NEAREST) { 1784 lp_build_sample_image_nearest(bld, size0, 1785 row_stride0_vec, img_stride0_vec, 1786 data_ptr0, mipoff0, coords, offsets, 1787 colors0); 1788 } 1789 else { 1790 assert(img_filter == PIPE_TEX_FILTER_LINEAR); 1791 lp_build_sample_image_linear(bld, is_gather, size0, NULL, 1792 row_stride0_vec, img_stride0_vec, 1793 data_ptr0, mipoff0, coords, offsets, 1794 colors0); 1795 } 1796 1797 /* Store the first level's colors in the output variables */ 1798 for (chan = 0; chan < 4; chan++) { 1799 LLVMBuildStore(builder, colors0[chan], colors_out[chan]); 1800 } 1801 1802 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { 1803 struct lp_build_if_state if_ctx; 1804 LLVMValueRef need_lerp; 1805 1806 /* need_lerp = lod_fpart > 0 */ 1807 if (bld->num_lods == 1) { 1808 need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT, 1809 lod_fpart, bld->lodf_bld.zero, 1810 "need_lerp"); 1811 } 1812 else { 1813 /* 1814 * We'll do mip filtering if any of the quads (or individual 1815 * pixel in case of per-pixel lod) need it. 1816 * It might be better to split the vectors here and only fetch/filter 1817 * quads which need it (if there's one lod per quad). 1818 */ 1819 need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type, 1820 PIPE_FUNC_GREATER, 1821 lod_fpart, bld->lodf_bld.zero); 1822 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp); 1823 lp_build_name(need_lerp, "need_lerp"); 1824 } 1825 1826 lp_build_if(&if_ctx, bld->gallivm, need_lerp); 1827 { 1828 /* 1829 * We unfortunately need to clamp lod_fpart here since we can get 1830 * negative values which would screw up filtering if not all 1831 * lod_fpart values have same sign. 1832 */ 1833 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart, 1834 bld->lodf_bld.zero); 1835 /* sample the second mipmap level */ 1836 lp_build_mipmap_level_sizes(bld, ilevel1, 1837 &size1, 1838 &row_stride1_vec, &img_stride1_vec); 1839 if (bld->num_mips == 1) { 1840 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); 1841 } 1842 else { 1843 data_ptr1 = bld->base_ptr; 1844 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1); 1845 } 1846 if (img_filter == PIPE_TEX_FILTER_NEAREST) { 1847 lp_build_sample_image_nearest(bld, size1, 1848 row_stride1_vec, img_stride1_vec, 1849 data_ptr1, mipoff1, coords, offsets, 1850 colors1); 1851 } 1852 else { 1853 lp_build_sample_image_linear(bld, FALSE, size1, NULL, 1854 row_stride1_vec, img_stride1_vec, 1855 data_ptr1, mipoff1, coords, offsets, 1856 colors1); 1857 } 1858 1859 /* interpolate samples from the two mipmap levels */ 1860 1861 if (bld->num_lods != bld->coord_type.length) 1862 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm, 1863 bld->lodf_bld.type, 1864 bld->texel_bld.type, 1865 lod_fpart); 1866 1867 for (chan = 0; chan < 4; chan++) { 1868 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, 1869 colors0[chan], colors1[chan], 1870 0); 1871 LLVMBuildStore(builder, colors0[chan], colors_out[chan]); 1872 } 1873 } 1874 lp_build_endif(&if_ctx); 1875 } 1876 } 1877 1878 1879 /** 1880 * Sample the texture/mipmap using given mip filter, and using 1881 * both nearest and linear filtering at the same time depending 1882 * on linear_mask. 1883 * lod can be per quad but linear_mask is always per pixel. 1884 * ilevel0 and ilevel1 indicate the two mipmap levels to sample 1885 * from (vectors or scalars). 1886 * If we're using nearest miplevel sampling the '1' values will be null/unused. 1887 */ 1888 static void 1889 lp_build_sample_mipmap_both(struct lp_build_sample_context *bld, 1890 LLVMValueRef linear_mask, 1891 unsigned mip_filter, 1892 const LLVMValueRef *coords, 1893 const LLVMValueRef *offsets, 1894 LLVMValueRef ilevel0, 1895 LLVMValueRef ilevel1, 1896 LLVMValueRef lod_fpart, 1897 LLVMValueRef lod_positive, 1898 LLVMValueRef *colors_out) 1899 { 1900 LLVMBuilderRef builder = bld->gallivm->builder; 1901 LLVMValueRef size0 = NULL; 1902 LLVMValueRef size1 = NULL; 1903 LLVMValueRef row_stride0_vec = NULL; 1904 LLVMValueRef row_stride1_vec = NULL; 1905 LLVMValueRef img_stride0_vec = NULL; 1906 LLVMValueRef img_stride1_vec = NULL; 1907 LLVMValueRef data_ptr0 = NULL; 1908 LLVMValueRef data_ptr1 = NULL; 1909 LLVMValueRef mipoff0 = NULL; 1910 LLVMValueRef mipoff1 = NULL; 1911 LLVMValueRef colors0[4], colors1[4]; 1912 unsigned chan; 1913 1914 /* sample the first mipmap level */ 1915 lp_build_mipmap_level_sizes(bld, ilevel0, 1916 &size0, 1917 &row_stride0_vec, &img_stride0_vec); 1918 if (bld->num_mips == 1) { 1919 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); 1920 } 1921 else { 1922 /* This path should work for num_lods 1 too but slightly less efficient */ 1923 data_ptr0 = bld->base_ptr; 1924 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0); 1925 } 1926 1927 lp_build_sample_image_linear(bld, FALSE, size0, linear_mask, 1928 row_stride0_vec, img_stride0_vec, 1929 data_ptr0, mipoff0, coords, offsets, 1930 colors0); 1931 1932 /* Store the first level's colors in the output variables */ 1933 for (chan = 0; chan < 4; chan++) { 1934 LLVMBuildStore(builder, colors0[chan], colors_out[chan]); 1935 } 1936 1937 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { 1938 struct lp_build_if_state if_ctx; 1939 LLVMValueRef need_lerp; 1940 1941 /* 1942 * We'll do mip filtering if any of the quads (or individual 1943 * pixel in case of per-pixel lod) need it. 1944 * Note using lod_positive here not lod_fpart since it may be the same 1945 * condition as that used in the outer "if" in the caller hence llvm 1946 * should be able to merge the branches in this case. 1947 */ 1948 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_positive); 1949 lp_build_name(need_lerp, "need_lerp"); 1950 1951 lp_build_if(&if_ctx, bld->gallivm, need_lerp); 1952 { 1953 /* 1954 * We unfortunately need to clamp lod_fpart here since we can get 1955 * negative values which would screw up filtering if not all 1956 * lod_fpart values have same sign. 1957 */ 1958 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart, 1959 bld->lodf_bld.zero); 1960 /* sample the second mipmap level */ 1961 lp_build_mipmap_level_sizes(bld, ilevel1, 1962 &size1, 1963 &row_stride1_vec, &img_stride1_vec); 1964 if (bld->num_mips == 1) { 1965 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); 1966 } 1967 else { 1968 data_ptr1 = bld->base_ptr; 1969 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1); 1970 } 1971 1972 lp_build_sample_image_linear(bld, FALSE, size1, linear_mask, 1973 row_stride1_vec, img_stride1_vec, 1974 data_ptr1, mipoff1, coords, offsets, 1975 colors1); 1976 1977 /* interpolate samples from the two mipmap levels */ 1978 1979 if (bld->num_lods != bld->coord_type.length) 1980 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm, 1981 bld->lodf_bld.type, 1982 bld->texel_bld.type, 1983 lod_fpart); 1984 1985 for (chan = 0; chan < 4; chan++) { 1986 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, 1987 colors0[chan], colors1[chan], 1988 0); 1989 LLVMBuildStore(builder, colors0[chan], colors_out[chan]); 1990 } 1991 } 1992 lp_build_endif(&if_ctx); 1993 } 1994 } 1995 1996 1997 /** 1998 * Build (per-coord) layer value. 1999 * Either clamp layer to valid values or fill in optional out_of_bounds 2000 * value and just return value unclamped. 2001 */ 2002 static LLVMValueRef 2003 lp_build_layer_coord(struct lp_build_sample_context *bld, 2004 unsigned texture_unit, 2005 boolean is_cube_array, 2006 LLVMValueRef layer, 2007 LLVMValueRef *out_of_bounds) 2008 { 2009 LLVMValueRef num_layers; 2010 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 2011 2012 num_layers = bld->dynamic_state->depth(bld->dynamic_state, bld->gallivm, 2013 bld->context_ptr, texture_unit); 2014 2015 if (out_of_bounds) { 2016 LLVMValueRef out1, out; 2017 assert(!is_cube_array); 2018 num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers); 2019 out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero); 2020 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers); 2021 *out_of_bounds = lp_build_or(int_coord_bld, out, out1); 2022 return layer; 2023 } 2024 else { 2025 LLVMValueRef maxlayer; 2026 LLVMValueRef s = is_cube_array ? lp_build_const_int32(bld->gallivm, 6) : 2027 bld->int_bld.one; 2028 maxlayer = lp_build_sub(&bld->int_bld, num_layers, s); 2029 maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer); 2030 return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer); 2031 } 2032 } 2033 2034 2035 /** 2036 * Calculate cube face, lod, mip levels. 2037 */ 2038 static void 2039 lp_build_sample_common(struct lp_build_sample_context *bld, 2040 boolean is_lodq, 2041 unsigned texture_index, 2042 unsigned sampler_index, 2043 LLVMValueRef *coords, 2044 const struct lp_derivatives *derivs, /* optional */ 2045 LLVMValueRef lod_bias, /* optional */ 2046 LLVMValueRef explicit_lod, /* optional */ 2047 LLVMValueRef *lod_pos_or_zero, 2048 LLVMValueRef *lod, 2049 LLVMValueRef *lod_fpart, 2050 LLVMValueRef *ilevel0, 2051 LLVMValueRef *ilevel1) 2052 { 2053 const unsigned mip_filter = bld->static_sampler_state->min_mip_filter; 2054 const unsigned min_filter = bld->static_sampler_state->min_img_filter; 2055 const unsigned mag_filter = bld->static_sampler_state->mag_img_filter; 2056 const unsigned target = bld->static_texture_state->target; 2057 LLVMValueRef first_level, cube_rho = NULL; 2058 LLVMValueRef lod_ipart = NULL; 2059 struct lp_derivatives cube_derivs; 2060 2061 /* 2062 printf("%s mip %d min %d mag %d\n", __FUNCTION__, 2063 mip_filter, min_filter, mag_filter); 2064 */ 2065 2066 /* 2067 * Choose cube face, recompute texcoords for the chosen face and 2068 * compute rho here too (as it requires transform of derivatives). 2069 */ 2070 if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY) { 2071 boolean need_derivs; 2072 need_derivs = ((min_filter != mag_filter || 2073 mip_filter != PIPE_TEX_MIPFILTER_NONE) && 2074 !bld->static_sampler_state->min_max_lod_equal && 2075 !explicit_lod); 2076 lp_build_cube_lookup(bld, coords, derivs, &cube_rho, &cube_derivs, need_derivs); 2077 derivs = &cube_derivs; 2078 if (target == PIPE_TEXTURE_CUBE_ARRAY) { 2079 /* calculate cube layer coord now */ 2080 LLVMValueRef layer = lp_build_iround(&bld->coord_bld, coords[3]); 2081 LLVMValueRef six = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 6); 2082 layer = lp_build_mul(&bld->int_coord_bld, layer, six); 2083 coords[3] = lp_build_layer_coord(bld, texture_index, TRUE, layer, NULL); 2084 /* because of seamless filtering can't add it to face (coords[2]) here. */ 2085 } 2086 } 2087 else if (target == PIPE_TEXTURE_1D_ARRAY || 2088 target == PIPE_TEXTURE_2D_ARRAY) { 2089 coords[2] = lp_build_iround(&bld->coord_bld, coords[2]); 2090 coords[2] = lp_build_layer_coord(bld, texture_index, FALSE, coords[2], NULL); 2091 } 2092 2093 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) { 2094 /* 2095 * Clamp p coords to [0,1] for fixed function depth texture format here. 2096 * Technically this is not entirely correct for unorm depth as the ref value 2097 * should be converted to the depth format (quantization!) and comparison 2098 * then done in texture format. This would actually help performance (since 2099 * only need to do it once and could save the per-sample conversion of texels 2100 * to floats instead), but it would need more messy code (would need to push 2101 * at least some bits down to actual fetch so conversion could be skipped, 2102 * and would have ugly interaction with border color, would need to convert 2103 * border color to that format too or do some other tricks to make it work). 2104 */ 2105 const struct util_format_description *format_desc = bld->format_desc; 2106 unsigned chan_type; 2107 /* not entirely sure we couldn't end up with non-valid swizzle here */ 2108 chan_type = format_desc->swizzle[0] <= PIPE_SWIZZLE_W ? 2109 format_desc->channel[format_desc->swizzle[0]].type : 2110 UTIL_FORMAT_TYPE_FLOAT; 2111 if (chan_type != UTIL_FORMAT_TYPE_FLOAT) { 2112 coords[4] = lp_build_clamp(&bld->coord_bld, coords[4], 2113 bld->coord_bld.zero, bld->coord_bld.one); 2114 } 2115 } 2116 2117 /* 2118 * Compute the level of detail (float). 2119 */ 2120 if (min_filter != mag_filter || 2121 mip_filter != PIPE_TEX_MIPFILTER_NONE || is_lodq) { 2122 /* Need to compute lod either to choose mipmap levels or to 2123 * distinguish between minification/magnification with one mipmap level. 2124 */ 2125 lp_build_lod_selector(bld, is_lodq, texture_index, sampler_index, 2126 coords[0], coords[1], coords[2], cube_rho, 2127 derivs, lod_bias, explicit_lod, 2128 mip_filter, lod, 2129 &lod_ipart, lod_fpart, lod_pos_or_zero); 2130 if (is_lodq) { 2131 LLVMValueRef last_level; 2132 last_level = bld->dynamic_state->last_level(bld->dynamic_state, 2133 bld->gallivm, 2134 bld->context_ptr, 2135 texture_index); 2136 first_level = bld->dynamic_state->first_level(bld->dynamic_state, 2137 bld->gallivm, 2138 bld->context_ptr, 2139 texture_index); 2140 last_level = lp_build_sub(&bld->int_bld, last_level, first_level); 2141 last_level = lp_build_int_to_float(&bld->float_bld, last_level); 2142 last_level = lp_build_broadcast_scalar(&bld->lodf_bld, last_level); 2143 2144 switch (mip_filter) { 2145 case PIPE_TEX_MIPFILTER_NONE: 2146 *lod_fpart = bld->lodf_bld.zero; 2147 break; 2148 case PIPE_TEX_MIPFILTER_NEAREST: 2149 *lod_fpart = lp_build_round(&bld->lodf_bld, *lod_fpart); 2150 /* fallthrough */ 2151 case PIPE_TEX_MIPFILTER_LINEAR: 2152 *lod_fpart = lp_build_clamp(&bld->lodf_bld, *lod_fpart, 2153 bld->lodf_bld.zero, last_level); 2154 break; 2155 } 2156 return; 2157 } 2158 2159 } else { 2160 lod_ipart = bld->lodi_bld.zero; 2161 *lod_pos_or_zero = bld->lodi_bld.zero; 2162 } 2163 2164 if (bld->num_lods != bld->num_mips) { 2165 /* only makes sense if there's just a single mip level */ 2166 assert(bld->num_mips == 1); 2167 lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1); 2168 } 2169 2170 /* 2171 * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1 2172 */ 2173 switch (mip_filter) { 2174 default: 2175 assert(0 && "bad mip_filter value in lp_build_sample_soa()"); 2176 /* fall-through */ 2177 case PIPE_TEX_MIPFILTER_NONE: 2178 /* always use mip level 0 */ 2179 first_level = bld->dynamic_state->first_level(bld->dynamic_state, 2180 bld->gallivm, bld->context_ptr, 2181 texture_index); 2182 first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level); 2183 *ilevel0 = first_level; 2184 break; 2185 case PIPE_TEX_MIPFILTER_NEAREST: 2186 assert(lod_ipart); 2187 lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL); 2188 break; 2189 case PIPE_TEX_MIPFILTER_LINEAR: 2190 assert(lod_ipart); 2191 assert(*lod_fpart); 2192 lp_build_linear_mip_levels(bld, texture_index, 2193 lod_ipart, lod_fpart, 2194 ilevel0, ilevel1); 2195 break; 2196 } 2197 } 2198 2199 static void 2200 lp_build_clamp_border_color(struct lp_build_sample_context *bld, 2201 unsigned sampler_unit) 2202 { 2203 struct gallivm_state *gallivm = bld->gallivm; 2204 LLVMBuilderRef builder = gallivm->builder; 2205 LLVMValueRef border_color_ptr = 2206 bld->dynamic_state->border_color(bld->dynamic_state, gallivm, 2207 bld->context_ptr, sampler_unit); 2208 LLVMValueRef border_color; 2209 const struct util_format_description *format_desc = bld->format_desc; 2210 struct lp_type vec4_type = bld->texel_type; 2211 struct lp_build_context vec4_bld; 2212 LLVMValueRef min_clamp = NULL; 2213 LLVMValueRef max_clamp = NULL; 2214 2215 /* 2216 * For normalized format need to clamp border color (technically 2217 * probably should also quantize the data). Really sucks doing this 2218 * here but can't avoid at least for now since this is part of 2219 * sampler state and texture format is part of sampler_view state. 2220 * GL expects also expects clamping for uint/sint formats too so 2221 * do that as well (d3d10 can't end up here with uint/sint since it 2222 * only supports them with ld). 2223 */ 2224 vec4_type.length = 4; 2225 lp_build_context_init(&vec4_bld, gallivm, vec4_type); 2226 2227 /* 2228 * Vectorized clamping of border color. Loading is a bit of a hack since 2229 * we just cast the pointer to float array to pointer to vec4 2230 * (int or float). 2231 */ 2232 border_color_ptr = lp_build_array_get_ptr(gallivm, border_color_ptr, 2233 lp_build_const_int32(gallivm, 0)); 2234 border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr, 2235 LLVMPointerType(vec4_bld.vec_type, 0), ""); 2236 border_color = LLVMBuildLoad(builder, border_color_ptr, ""); 2237 /* we don't have aligned type in the dynamic state unfortunately */ 2238 LLVMSetAlignment(border_color, 4); 2239 2240 /* 2241 * Instead of having some incredibly complex logic which will try to figure out 2242 * clamping necessary for each channel, simply use the first channel, and treat 2243 * mixed signed/unsigned normalized formats specially. 2244 * (Mixed non-normalized, which wouldn't work at all here, do not exist for a 2245 * good reason.) 2246 */ 2247 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) { 2248 int chan; 2249 /* d/s needs special handling because both present means just sampling depth */ 2250 if (util_format_is_depth_and_stencil(format_desc->format)) { 2251 chan = format_desc->swizzle[0]; 2252 } 2253 else { 2254 chan = util_format_get_first_non_void_channel(format_desc->format); 2255 } 2256 if (chan >= 0 && chan <= PIPE_SWIZZLE_W) { 2257 unsigned chan_type = format_desc->channel[chan].type; 2258 unsigned chan_norm = format_desc->channel[chan].normalized; 2259 unsigned chan_pure = format_desc->channel[chan].pure_integer; 2260 if (chan_type == UTIL_FORMAT_TYPE_SIGNED) { 2261 if (chan_norm) { 2262 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F); 2263 max_clamp = vec4_bld.one; 2264 } 2265 else if (chan_pure) { 2266 /* 2267 * Border color was stored as int, hence need min/max clamp 2268 * only if chan has less than 32 bits.. 2269 */ 2270 unsigned chan_size = format_desc->channel[chan].size; 2271 if (chan_size < 32) { 2272 min_clamp = lp_build_const_int_vec(gallivm, vec4_type, 2273 0 - (1 << (chan_size - 1))); 2274 max_clamp = lp_build_const_int_vec(gallivm, vec4_type, 2275 (1 << (chan_size - 1)) - 1); 2276 } 2277 } 2278 /* TODO: no idea about non-pure, non-normalized! */ 2279 } 2280 else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) { 2281 if (chan_norm) { 2282 min_clamp = vec4_bld.zero; 2283 max_clamp = vec4_bld.one; 2284 } 2285 /* 2286 * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24 2287 * we use Z32_FLOAT_S8X24 to imply sampling depth component 2288 * and ignoring stencil, which will blow up here if we try to 2289 * do a uint clamp in a float texel build... 2290 * And even if we had that format, mesa st also thinks using z24s8 2291 * means depth sampling ignoring stencil. 2292 */ 2293 else if (chan_pure) { 2294 /* 2295 * Border color was stored as uint, hence never need min 2296 * clamp, and only need max clamp if chan has less than 32 bits. 2297 */ 2298 unsigned chan_size = format_desc->channel[chan].size; 2299 if (chan_size < 32) { 2300 max_clamp = lp_build_const_int_vec(gallivm, vec4_type, 2301 (1 << chan_size) - 1); 2302 } 2303 /* TODO: no idea about non-pure, non-normalized! */ 2304 } 2305 } 2306 else if (chan_type == UTIL_FORMAT_TYPE_FIXED) { 2307 /* TODO: I have no idea what clamp this would need if any! */ 2308 } 2309 } 2310 /* mixed plain formats (or different pure size) */ 2311 switch (format_desc->format) { 2312 case PIPE_FORMAT_B10G10R10A2_UINT: 2313 case PIPE_FORMAT_R10G10B10A2_UINT: 2314 { 2315 unsigned max10 = (1 << 10) - 1; 2316 max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10, 2317 max10, (1 << 2) - 1, NULL); 2318 } 2319 break; 2320 case PIPE_FORMAT_R10SG10SB10SA2U_NORM: 2321 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F, 2322 -1.0F, 0.0F, NULL); 2323 max_clamp = vec4_bld.one; 2324 break; 2325 case PIPE_FORMAT_R8SG8SB8UX8U_NORM: 2326 case PIPE_FORMAT_R5SG5SB6U_NORM: 2327 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F, 2328 0.0F, 0.0F, NULL); 2329 max_clamp = vec4_bld.one; 2330 break; 2331 default: 2332 break; 2333 } 2334 } 2335 else { 2336 /* cannot figure this out from format description */ 2337 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 2338 /* s3tc formats are always unorm */ 2339 min_clamp = vec4_bld.zero; 2340 max_clamp = vec4_bld.one; 2341 } 2342 else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC || 2343 format_desc->layout == UTIL_FORMAT_LAYOUT_ETC) { 2344 switch (format_desc->format) { 2345 case PIPE_FORMAT_RGTC1_UNORM: 2346 case PIPE_FORMAT_RGTC2_UNORM: 2347 case PIPE_FORMAT_LATC1_UNORM: 2348 case PIPE_FORMAT_LATC2_UNORM: 2349 case PIPE_FORMAT_ETC1_RGB8: 2350 min_clamp = vec4_bld.zero; 2351 max_clamp = vec4_bld.one; 2352 break; 2353 case PIPE_FORMAT_RGTC1_SNORM: 2354 case PIPE_FORMAT_RGTC2_SNORM: 2355 case PIPE_FORMAT_LATC1_SNORM: 2356 case PIPE_FORMAT_LATC2_SNORM: 2357 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F); 2358 max_clamp = vec4_bld.one; 2359 break; 2360 default: 2361 assert(0); 2362 break; 2363 } 2364 } 2365 /* 2366 * all others from subsampled/other group, though we don't care 2367 * about yuv (and should not have any from zs here) 2368 */ 2369 else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){ 2370 switch (format_desc->format) { 2371 case PIPE_FORMAT_R8G8_B8G8_UNORM: 2372 case PIPE_FORMAT_G8R8_G8B8_UNORM: 2373 case PIPE_FORMAT_G8R8_B8R8_UNORM: 2374 case PIPE_FORMAT_R8G8_R8B8_UNORM: 2375 case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */ 2376 min_clamp = vec4_bld.zero; 2377 max_clamp = vec4_bld.one; 2378 break; 2379 case PIPE_FORMAT_R8G8Bx_SNORM: 2380 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F); 2381 max_clamp = vec4_bld.one; 2382 break; 2383 /* 2384 * Note smallfloat formats usually don't need clamping 2385 * (they still have infinite range) however this is not 2386 * true for r11g11b10 and r9g9b9e5, which can't represent 2387 * negative numbers (and additionally r9g9b9e5 can't represent 2388 * very large numbers). d3d10 seems happy without clamping in 2389 * this case, but gl spec is pretty clear: "for floating 2390 * point and integer formats, border values are clamped to 2391 * the representable range of the format" so do that here. 2392 */ 2393 case PIPE_FORMAT_R11G11B10_FLOAT: 2394 min_clamp = vec4_bld.zero; 2395 break; 2396 case PIPE_FORMAT_R9G9B9E5_FLOAT: 2397 min_clamp = vec4_bld.zero; 2398 max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5); 2399 break; 2400 default: 2401 assert(0); 2402 break; 2403 } 2404 } 2405 } 2406 2407 if (min_clamp) { 2408 border_color = lp_build_max(&vec4_bld, border_color, min_clamp); 2409 } 2410 if (max_clamp) { 2411 border_color = lp_build_min(&vec4_bld, border_color, max_clamp); 2412 } 2413 2414 bld->border_color_clamped = border_color; 2415 } 2416 2417 2418 /** 2419 * General texture sampling codegen. 2420 * This function handles texture sampling for all texture targets (1D, 2421 * 2D, 3D, cube) and all filtering modes. 2422 */ 2423 static void 2424 lp_build_sample_general(struct lp_build_sample_context *bld, 2425 unsigned sampler_unit, 2426 boolean is_gather, 2427 const LLVMValueRef *coords, 2428 const LLVMValueRef *offsets, 2429 LLVMValueRef lod_positive, 2430 LLVMValueRef lod_fpart, 2431 LLVMValueRef ilevel0, 2432 LLVMValueRef ilevel1, 2433 LLVMValueRef *colors_out) 2434 { 2435 LLVMBuilderRef builder = bld->gallivm->builder; 2436 const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state; 2437 const unsigned mip_filter = sampler_state->min_mip_filter; 2438 const unsigned min_filter = sampler_state->min_img_filter; 2439 const unsigned mag_filter = sampler_state->mag_img_filter; 2440 LLVMValueRef texels[4]; 2441 unsigned chan; 2442 2443 /* if we need border color, (potentially) clamp it now */ 2444 if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s, 2445 min_filter, 2446 mag_filter) || 2447 (bld->dims > 1 && 2448 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t, 2449 min_filter, 2450 mag_filter)) || 2451 (bld->dims > 2 && 2452 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r, 2453 min_filter, 2454 mag_filter))) { 2455 lp_build_clamp_border_color(bld, sampler_unit); 2456 } 2457 2458 2459 /* 2460 * Get/interpolate texture colors. 2461 */ 2462 2463 for (chan = 0; chan < 4; ++chan) { 2464 texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, ""); 2465 lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]); 2466 } 2467 2468 if (min_filter == mag_filter) { 2469 /* no need to distinguish between minification and magnification */ 2470 lp_build_sample_mipmap(bld, min_filter, mip_filter, 2471 is_gather, 2472 coords, offsets, 2473 ilevel0, ilevel1, lod_fpart, 2474 texels); 2475 } 2476 else { 2477 /* 2478 * Could also get rid of the if-logic and always use mipmap_both, both 2479 * for the single lod and multi-lod case if nothing really uses this. 2480 */ 2481 if (bld->num_lods == 1) { 2482 /* Emit conditional to choose min image filter or mag image filter 2483 * depending on the lod being > 0 or <= 0, respectively. 2484 */ 2485 struct lp_build_if_state if_ctx; 2486 2487 lod_positive = LLVMBuildTrunc(builder, lod_positive, 2488 LLVMInt1TypeInContext(bld->gallivm->context), 2489 "lod_pos"); 2490 2491 lp_build_if(&if_ctx, bld->gallivm, lod_positive); 2492 { 2493 /* Use the minification filter */ 2494 lp_build_sample_mipmap(bld, min_filter, mip_filter, FALSE, 2495 coords, offsets, 2496 ilevel0, ilevel1, lod_fpart, 2497 texels); 2498 } 2499 lp_build_else(&if_ctx); 2500 { 2501 /* Use the magnification filter */ 2502 lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE, 2503 FALSE, 2504 coords, offsets, 2505 ilevel0, NULL, NULL, 2506 texels); 2507 } 2508 lp_build_endif(&if_ctx); 2509 } 2510 else { 2511 LLVMValueRef need_linear, linear_mask; 2512 unsigned mip_filter_for_nearest; 2513 struct lp_build_if_state if_ctx; 2514 2515 if (min_filter == PIPE_TEX_FILTER_LINEAR) { 2516 linear_mask = lod_positive; 2517 mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE; 2518 } 2519 else { 2520 linear_mask = lp_build_not(&bld->lodi_bld, lod_positive); 2521 mip_filter_for_nearest = mip_filter; 2522 } 2523 need_linear = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, 2524 linear_mask); 2525 lp_build_name(need_linear, "need_linear"); 2526 2527 if (bld->num_lods != bld->coord_type.length) { 2528 linear_mask = lp_build_unpack_broadcast_aos_scalars(bld->gallivm, 2529 bld->lodi_type, 2530 bld->int_coord_type, 2531 linear_mask); 2532 } 2533 2534 lp_build_if(&if_ctx, bld->gallivm, need_linear); 2535 { 2536 /* 2537 * Do sampling with both filters simultaneously. This means using 2538 * a linear filter and doing some tricks (with weights) for the pixels 2539 * which need nearest filter. 2540 * Note that it's probably rare some pixels need nearest and some 2541 * linear filter but the fixups required for the nearest pixels 2542 * aren't all that complicated so just always run a combined path 2543 * if at least some pixels require linear. 2544 */ 2545 lp_build_sample_mipmap_both(bld, linear_mask, mip_filter, 2546 coords, offsets, 2547 ilevel0, ilevel1, 2548 lod_fpart, lod_positive, 2549 texels); 2550 } 2551 lp_build_else(&if_ctx); 2552 { 2553 /* 2554 * All pixels require just nearest filtering, which is way 2555 * cheaper than linear, hence do a separate path for that. 2556 */ 2557 lp_build_sample_mipmap(bld, PIPE_TEX_FILTER_NEAREST, 2558 mip_filter_for_nearest, FALSE, 2559 coords, offsets, 2560 ilevel0, ilevel1, lod_fpart, 2561 texels); 2562 } 2563 lp_build_endif(&if_ctx); 2564 } 2565 } 2566 2567 for (chan = 0; chan < 4; ++chan) { 2568 colors_out[chan] = LLVMBuildLoad(builder, texels[chan], ""); 2569 lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]); 2570 } 2571 } 2572 2573 2574 /** 2575 * Texel fetch function. 2576 * In contrast to general sampling there is no filtering, no coord minification, 2577 * lod (if any) is always explicit uint, coords are uints (in terms of texel units) 2578 * directly to be applied to the selected mip level (after adding texel offsets). 2579 * This function handles texel fetch for all targets where texel fetch is supported 2580 * (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too). 2581 */ 2582 static void 2583 lp_build_fetch_texel(struct lp_build_sample_context *bld, 2584 unsigned texture_unit, 2585 const LLVMValueRef *coords, 2586 LLVMValueRef explicit_lod, 2587 const LLVMValueRef *offsets, 2588 LLVMValueRef *colors_out) 2589 { 2590 struct lp_build_context *perquadi_bld = &bld->lodi_bld; 2591 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 2592 unsigned dims = bld->dims, chan; 2593 unsigned target = bld->static_texture_state->target; 2594 boolean out_of_bound_ret_zero = TRUE; 2595 LLVMValueRef size, ilevel; 2596 LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL; 2597 LLVMValueRef x = coords[0], y = coords[1], z = coords[2]; 2598 LLVMValueRef width, height, depth, i, j; 2599 LLVMValueRef offset, out_of_bounds, out1; 2600 2601 out_of_bounds = int_coord_bld->zero; 2602 2603 if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) { 2604 if (bld->num_mips != int_coord_bld->type.length) { 2605 ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type, 2606 perquadi_bld->type, explicit_lod, 0); 2607 } 2608 else { 2609 ilevel = explicit_lod; 2610 } 2611 lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel, 2612 out_of_bound_ret_zero ? &out_of_bounds : NULL); 2613 } 2614 else { 2615 assert(bld->num_mips == 1); 2616 if (bld->static_texture_state->target != PIPE_BUFFER) { 2617 ilevel = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm, 2618 bld->context_ptr, texture_unit); 2619 } 2620 else { 2621 ilevel = lp_build_const_int32(bld->gallivm, 0); 2622 } 2623 } 2624 lp_build_mipmap_level_sizes(bld, ilevel, 2625 &size, 2626 &row_stride_vec, &img_stride_vec); 2627 lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type, 2628 size, &width, &height, &depth); 2629 2630 if (target == PIPE_TEXTURE_1D_ARRAY || 2631 target == PIPE_TEXTURE_2D_ARRAY) { 2632 if (out_of_bound_ret_zero) { 2633 z = lp_build_layer_coord(bld, texture_unit, FALSE, z, &out1); 2634 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 2635 } 2636 else { 2637 z = lp_build_layer_coord(bld, texture_unit, FALSE, z, NULL); 2638 } 2639 } 2640 2641 /* This is a lot like border sampling */ 2642 if (offsets[0]) { 2643 /* 2644 * coords are really unsigned, offsets are signed, but I don't think 2645 * exceeding 31 bits is possible 2646 */ 2647 x = lp_build_add(int_coord_bld, x, offsets[0]); 2648 } 2649 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero); 2650 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 2651 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width); 2652 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 2653 2654 if (dims >= 2) { 2655 if (offsets[1]) { 2656 y = lp_build_add(int_coord_bld, y, offsets[1]); 2657 } 2658 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero); 2659 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 2660 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height); 2661 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 2662 2663 if (dims >= 3) { 2664 if (offsets[2]) { 2665 z = lp_build_add(int_coord_bld, z, offsets[2]); 2666 } 2667 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero); 2668 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 2669 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth); 2670 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 2671 } 2672 } 2673 2674 lp_build_sample_offset(int_coord_bld, 2675 bld->format_desc, 2676 x, y, z, row_stride_vec, img_stride_vec, 2677 &offset, &i, &j); 2678 2679 if (bld->static_texture_state->target != PIPE_BUFFER) { 2680 offset = lp_build_add(int_coord_bld, offset, 2681 lp_build_get_mip_offsets(bld, ilevel)); 2682 } 2683 2684 offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds); 2685 2686 lp_build_fetch_rgba_soa(bld->gallivm, 2687 bld->format_desc, 2688 bld->texel_type, TRUE, 2689 bld->base_ptr, offset, 2690 i, j, 2691 bld->cache, 2692 colors_out); 2693 2694 if (out_of_bound_ret_zero) { 2695 /* 2696 * Only needed for ARB_robust_buffer_access_behavior and d3d10. 2697 * Could use min/max above instead of out-of-bounds comparisons 2698 * if we don't care about the result returned for out-of-bounds. 2699 */ 2700 for (chan = 0; chan < 4; chan++) { 2701 colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds, 2702 bld->texel_bld.zero, colors_out[chan]); 2703 } 2704 } 2705 } 2706 2707 2708 /** 2709 * Just set texels to white instead of actually sampling the texture. 2710 * For debugging. 2711 */ 2712 void 2713 lp_build_sample_nop(struct gallivm_state *gallivm, 2714 struct lp_type type, 2715 const LLVMValueRef *coords, 2716 LLVMValueRef texel_out[4]) 2717 { 2718 LLVMValueRef one = lp_build_one(gallivm, type); 2719 unsigned chan; 2720 2721 for (chan = 0; chan < 4; chan++) { 2722 texel_out[chan] = one; 2723 } 2724 } 2725 2726 2727 /** 2728 * Build the actual texture sampling code. 2729 * 'texel' will return a vector of four LLVMValueRefs corresponding to 2730 * R, G, B, A. 2731 * \param type vector float type to use for coords, etc. 2732 * \param sample_key 2733 * \param derivs partial derivatives of (s,t,r,q) with respect to x and y 2734 */ 2735 static void 2736 lp_build_sample_soa_code(struct gallivm_state *gallivm, 2737 const struct lp_static_texture_state *static_texture_state, 2738 const struct lp_static_sampler_state *static_sampler_state, 2739 struct lp_sampler_dynamic_state *dynamic_state, 2740 struct lp_type type, 2741 unsigned sample_key, 2742 unsigned texture_index, 2743 unsigned sampler_index, 2744 LLVMValueRef context_ptr, 2745 LLVMValueRef thread_data_ptr, 2746 const LLVMValueRef *coords, 2747 const LLVMValueRef *offsets, 2748 const struct lp_derivatives *derivs, /* optional */ 2749 LLVMValueRef lod, /* optional */ 2750 LLVMValueRef texel_out[4]) 2751 { 2752 unsigned target = static_texture_state->target; 2753 unsigned dims = texture_dims(target); 2754 unsigned num_quads = type.length / 4; 2755 unsigned mip_filter, min_img_filter, mag_img_filter, i; 2756 struct lp_build_sample_context bld; 2757 struct lp_static_sampler_state derived_sampler_state = *static_sampler_state; 2758 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 2759 LLVMBuilderRef builder = gallivm->builder; 2760 LLVMValueRef tex_width, newcoords[5]; 2761 enum lp_sampler_lod_property lod_property; 2762 enum lp_sampler_lod_control lod_control; 2763 enum lp_sampler_op_type op_type; 2764 LLVMValueRef lod_bias = NULL; 2765 LLVMValueRef explicit_lod = NULL; 2766 boolean op_is_tex, op_is_lodq, op_is_gather; 2767 2768 if (0) { 2769 enum pipe_format fmt = static_texture_state->format; 2770 debug_printf("Sample from %s\n", util_format_name(fmt)); 2771 } 2772 2773 lod_property = (sample_key & LP_SAMPLER_LOD_PROPERTY_MASK) >> 2774 LP_SAMPLER_LOD_PROPERTY_SHIFT; 2775 lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >> 2776 LP_SAMPLER_LOD_CONTROL_SHIFT; 2777 op_type = (sample_key & LP_SAMPLER_OP_TYPE_MASK) >> 2778 LP_SAMPLER_OP_TYPE_SHIFT; 2779 2780 op_is_tex = op_type == LP_SAMPLER_OP_TEXTURE; 2781 op_is_lodq = op_type == LP_SAMPLER_OP_LODQ; 2782 op_is_gather = op_type == LP_SAMPLER_OP_GATHER; 2783 2784 if (lod_control == LP_SAMPLER_LOD_BIAS) { 2785 lod_bias = lod; 2786 assert(lod); 2787 assert(derivs == NULL); 2788 } 2789 else if (lod_control == LP_SAMPLER_LOD_EXPLICIT) { 2790 explicit_lod = lod; 2791 assert(lod); 2792 assert(derivs == NULL); 2793 } 2794 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { 2795 assert(derivs); 2796 assert(lod == NULL); 2797 } 2798 else { 2799 assert(derivs == NULL); 2800 assert(lod == NULL); 2801 } 2802 2803 if (static_texture_state->format == PIPE_FORMAT_NONE) { 2804 /* 2805 * If there's nothing bound, format is NONE, and we must return 2806 * all zero as mandated by d3d10 in this case. 2807 */ 2808 unsigned chan; 2809 LLVMValueRef zero = lp_build_zero(gallivm, type); 2810 for (chan = 0; chan < 4; chan++) { 2811 texel_out[chan] = zero; 2812 } 2813 return; 2814 } 2815 2816 assert(type.floating); 2817 2818 /* Setup our build context */ 2819 memset(&bld, 0, sizeof bld); 2820 bld.gallivm = gallivm; 2821 bld.context_ptr = context_ptr; 2822 bld.static_sampler_state = &derived_sampler_state; 2823 bld.static_texture_state = static_texture_state; 2824 bld.dynamic_state = dynamic_state; 2825 bld.format_desc = util_format_description(static_texture_state->format); 2826 bld.dims = dims; 2827 2828 if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD || op_is_lodq) { 2829 bld.no_quad_lod = TRUE; 2830 } 2831 if (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX || op_is_lodq) { 2832 bld.no_rho_approx = TRUE; 2833 } 2834 if (gallivm_debug & GALLIVM_DEBUG_NO_BRILINEAR || op_is_lodq) { 2835 bld.no_brilinear = TRUE; 2836 } 2837 2838 bld.vector_width = lp_type_width(type); 2839 2840 bld.float_type = lp_type_float(32); 2841 bld.int_type = lp_type_int(32); 2842 bld.coord_type = type; 2843 bld.int_coord_type = lp_int_type(type); 2844 bld.float_size_in_type = lp_type_float(32); 2845 bld.float_size_in_type.length = dims > 1 ? 4 : 1; 2846 bld.int_size_in_type = lp_int_type(bld.float_size_in_type); 2847 bld.texel_type = type; 2848 2849 /* always using the first channel hopefully should be safe, 2850 * if not things WILL break in other places anyway. 2851 */ 2852 if (bld.format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB && 2853 bld.format_desc->channel[0].pure_integer) { 2854 if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { 2855 bld.texel_type = lp_type_int_vec(type.width, type.width * type.length); 2856 } 2857 else if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) { 2858 bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length); 2859 } 2860 } 2861 else if (util_format_has_stencil(bld.format_desc) && 2862 !util_format_has_depth(bld.format_desc)) { 2863 /* for stencil only formats, sample stencil (uint) */ 2864 bld.texel_type = lp_type_int_vec(type.width, type.width * type.length); 2865 } 2866 2867 if (!static_texture_state->level_zero_only || 2868 !static_sampler_state->max_lod_pos || op_is_lodq) { 2869 derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter; 2870 } else { 2871 derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; 2872 } 2873 if (op_is_gather) { 2874 /* 2875 * gather4 is exactly like GL_LINEAR filtering but in the end skipping 2876 * the actual filtering. Using mostly the same paths, so cube face 2877 * selection, coord wrapping etc. all naturally uses the same code. 2878 */ 2879 derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; 2880 derived_sampler_state.min_img_filter = PIPE_TEX_FILTER_LINEAR; 2881 derived_sampler_state.mag_img_filter = PIPE_TEX_FILTER_LINEAR; 2882 } 2883 mip_filter = derived_sampler_state.min_mip_filter; 2884 2885 if (0) { 2886 debug_printf(" .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter); 2887 } 2888 2889 if (static_texture_state->target == PIPE_TEXTURE_CUBE || 2890 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) 2891 { 2892 /* 2893 * Seamless filtering ignores wrap modes. 2894 * Setting to CLAMP_TO_EDGE is correct for nearest filtering, for 2895 * bilinear it's not correct but way better than using for instance repeat. 2896 * Note we even set this for non-seamless. Technically GL allows any wrap 2897 * mode, which made sense when supporting true borders (can get seamless 2898 * effect with border and CLAMP_TO_BORDER), but gallium doesn't support 2899 * borders and d3d9 requires wrap modes to be ignored and it's a pain to fix 2900 * up the sampler state (as it makes it texture dependent). 2901 */ 2902 derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; 2903 derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; 2904 } 2905 /* 2906 * We could force CLAMP to CLAMP_TO_EDGE here if min/mag filter is nearest, 2907 * so AoS path could be used. Not sure it's worth the trouble... 2908 */ 2909 2910 min_img_filter = derived_sampler_state.min_img_filter; 2911 mag_img_filter = derived_sampler_state.mag_img_filter; 2912 2913 2914 /* 2915 * This is all a bit complicated different paths are chosen for performance 2916 * reasons. 2917 * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for 2918 * everything (the last two options are equivalent for 4-wide case). 2919 * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad 2920 * lod is calculated then the lod value extracted afterwards so making this 2921 * case basically the same as far as lod handling is concerned for the 2922 * further sample/filter code as the 1 lod for everything case. 2923 * Different lod handling mostly shows up when building mipmap sizes 2924 * (lp_build_mipmap_level_sizes() and friends) and also in filtering 2925 * (getting the fractional part of the lod to the right texels). 2926 */ 2927 2928 /* 2929 * There are other situations where at least the multiple int lods could be 2930 * avoided like min and max lod being equal. 2931 */ 2932 bld.num_mips = bld.num_lods = 1; 2933 2934 if (bld.no_quad_lod && bld.no_rho_approx && 2935 ((mip_filter != PIPE_TEX_MIPFILTER_NONE && op_is_tex && 2936 (static_texture_state->target == PIPE_TEXTURE_CUBE || 2937 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY)) || 2938 op_is_lodq)) { 2939 /* 2940 * special case for using per-pixel lod even for implicit lod, 2941 * which is generally never required (ok by APIs) except to please 2942 * some (somewhat broken imho) tests (because per-pixel face selection 2943 * can cause derivatives to be different for pixels outside the primitive 2944 * due to the major axis division even if pre-project derivatives are 2945 * looking normal). 2946 * For lodq, we do it to simply avoid scalar pack / unpack (albeit for 2947 * cube maps we do indeed get per-pixel lod values). 2948 */ 2949 bld.num_mips = type.length; 2950 bld.num_lods = type.length; 2951 } 2952 else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT || 2953 (explicit_lod || lod_bias || derivs)) { 2954 if ((!op_is_tex && target != PIPE_BUFFER) || 2955 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { 2956 bld.num_mips = type.length; 2957 bld.num_lods = type.length; 2958 } 2959 else if (op_is_tex && min_img_filter != mag_img_filter) { 2960 bld.num_mips = 1; 2961 bld.num_lods = type.length; 2962 } 2963 } 2964 /* TODO: for true scalar_lod should only use 1 lod value */ 2965 else if ((!op_is_tex && explicit_lod && target != PIPE_BUFFER) || 2966 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { 2967 bld.num_mips = num_quads; 2968 bld.num_lods = num_quads; 2969 } 2970 else if (op_is_tex && min_img_filter != mag_img_filter) { 2971 bld.num_mips = 1; 2972 bld.num_lods = num_quads; 2973 } 2974 2975 2976 bld.lodf_type = type; 2977 /* we want native vector size to be able to use our intrinsics */ 2978 if (bld.num_lods != type.length) { 2979 /* TODO: this currently always has to be per-quad or per-element */ 2980 bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1; 2981 } 2982 bld.lodi_type = lp_int_type(bld.lodf_type); 2983 bld.levelf_type = bld.lodf_type; 2984 if (bld.num_mips == 1) { 2985 bld.levelf_type.length = 1; 2986 } 2987 bld.leveli_type = lp_int_type(bld.levelf_type); 2988 bld.float_size_type = bld.float_size_in_type; 2989 /* Note: size vectors may not be native. They contain minified w/h/d/_ values, 2990 * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */ 2991 if (bld.num_mips > 1) { 2992 bld.float_size_type.length = bld.num_mips == type.length ? 2993 bld.num_mips * bld.float_size_in_type.length : 2994 type.length; 2995 } 2996 bld.int_size_type = lp_int_type(bld.float_size_type); 2997 2998 lp_build_context_init(&bld.float_bld, gallivm, bld.float_type); 2999 lp_build_context_init(&bld.float_vec_bld, gallivm, type); 3000 lp_build_context_init(&bld.int_bld, gallivm, bld.int_type); 3001 lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type); 3002 lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type); 3003 lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type); 3004 lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type); 3005 lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type); 3006 lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type); 3007 lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type); 3008 lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type); 3009 lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type); 3010 lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type); 3011 lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type); 3012 3013 /* Get the dynamic state */ 3014 tex_width = dynamic_state->width(dynamic_state, gallivm, 3015 context_ptr, texture_index); 3016 bld.row_stride_array = dynamic_state->row_stride(dynamic_state, gallivm, 3017 context_ptr, texture_index); 3018 bld.img_stride_array = dynamic_state->img_stride(dynamic_state, gallivm, 3019 context_ptr, texture_index); 3020 bld.base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm, 3021 context_ptr, texture_index); 3022 bld.mip_offsets = dynamic_state->mip_offsets(dynamic_state, gallivm, 3023 context_ptr, texture_index); 3024 /* Note that mip_offsets is an array[level] of offsets to texture images */ 3025 3026 if (dynamic_state->cache_ptr && thread_data_ptr) { 3027 bld.cache = dynamic_state->cache_ptr(dynamic_state, gallivm, 3028 thread_data_ptr, texture_index); 3029 } 3030 3031 /* width, height, depth as single int vector */ 3032 if (dims <= 1) { 3033 bld.int_size = tex_width; 3034 } 3035 else { 3036 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef, 3037 tex_width, 3038 LLVMConstInt(i32t, 0, 0), ""); 3039 if (dims >= 2) { 3040 LLVMValueRef tex_height = 3041 dynamic_state->height(dynamic_state, gallivm, 3042 context_ptr, texture_index); 3043 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size, 3044 tex_height, 3045 LLVMConstInt(i32t, 1, 0), ""); 3046 if (dims >= 3) { 3047 LLVMValueRef tex_depth = 3048 dynamic_state->depth(dynamic_state, gallivm, context_ptr, 3049 texture_index); 3050 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size, 3051 tex_depth, 3052 LLVMConstInt(i32t, 2, 0), ""); 3053 } 3054 } 3055 } 3056 3057 for (i = 0; i < 5; i++) { 3058 newcoords[i] = coords[i]; 3059 } 3060 3061 if (util_format_is_pure_integer(static_texture_state->format) && 3062 !util_format_has_depth(bld.format_desc) && op_is_tex && 3063 (static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR || 3064 static_sampler_state->min_img_filter == PIPE_TEX_FILTER_LINEAR || 3065 static_sampler_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)) { 3066 /* 3067 * Bail if impossible filtering is specified (the awkard additional 3068 * depth check is because it is legal in gallium to have things like S8Z24 3069 * here which would say it's pure int despite such formats should sample 3070 * the depth component). 3071 * In GL such filters make the texture incomplete, this makes it robust 3072 * against state trackers which set this up regardless (we'd crash in the 3073 * lerp later otherwise). 3074 * At least in some apis it may be legal to use such filters with lod 3075 * queries and/or gather (at least for gather d3d10 says only the wrap 3076 * bits are really used hence filter bits are likely simply ignored). 3077 * For fetch, we don't get valid samplers either way here. 3078 */ 3079 unsigned chan; 3080 LLVMValueRef zero = lp_build_zero(gallivm, type); 3081 for (chan = 0; chan < 4; chan++) { 3082 texel_out[chan] = zero; 3083 } 3084 return; 3085 } 3086 3087 if (0) { 3088 /* For debug: no-op texture sampling */ 3089 lp_build_sample_nop(gallivm, 3090 bld.texel_type, 3091 newcoords, 3092 texel_out); 3093 } 3094 3095 else if (op_type == LP_SAMPLER_OP_FETCH) { 3096 lp_build_fetch_texel(&bld, texture_index, newcoords, 3097 lod, offsets, 3098 texel_out); 3099 } 3100 3101 else { 3102 LLVMValueRef lod_fpart = NULL, lod_positive = NULL; 3103 LLVMValueRef ilevel0 = NULL, ilevel1 = NULL, lod = NULL; 3104 boolean use_aos; 3105 3106 use_aos = util_format_fits_8unorm(bld.format_desc) && 3107 op_is_tex && 3108 /* not sure this is strictly needed or simply impossible */ 3109 derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE && 3110 lp_is_simple_wrap_mode(derived_sampler_state.wrap_s); 3111 3112 use_aos &= bld.num_lods <= num_quads || 3113 derived_sampler_state.min_img_filter == 3114 derived_sampler_state.mag_img_filter; 3115 if (dims > 1) { 3116 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t); 3117 if (dims > 2) { 3118 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r); 3119 } 3120 } 3121 if ((static_texture_state->target == PIPE_TEXTURE_CUBE || 3122 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) && 3123 derived_sampler_state.seamless_cube_map && 3124 (derived_sampler_state.min_img_filter == PIPE_TEX_FILTER_LINEAR || 3125 derived_sampler_state.mag_img_filter == PIPE_TEX_FILTER_LINEAR)) { 3126 /* theoretically possible with AoS filtering but not implemented (complex!) */ 3127 use_aos = 0; 3128 } 3129 3130 if ((gallivm_debug & GALLIVM_DEBUG_PERF) && 3131 !use_aos && util_format_fits_8unorm(bld.format_desc)) { 3132 debug_printf("%s: using floating point linear filtering for %s\n", 3133 __FUNCTION__, bld.format_desc->short_name); 3134 debug_printf(" min_img %d mag_img %d mip %d target %d seamless %d" 3135 " wraps %d wrapt %d wrapr %d\n", 3136 derived_sampler_state.min_img_filter, 3137 derived_sampler_state.mag_img_filter, 3138 derived_sampler_state.min_mip_filter, 3139 static_texture_state->target, 3140 derived_sampler_state.seamless_cube_map, 3141 derived_sampler_state.wrap_s, 3142 derived_sampler_state.wrap_t, 3143 derived_sampler_state.wrap_r); 3144 } 3145 3146 lp_build_sample_common(&bld, op_is_lodq, texture_index, sampler_index, 3147 newcoords, 3148 derivs, lod_bias, explicit_lod, 3149 &lod_positive, &lod, &lod_fpart, 3150 &ilevel0, &ilevel1); 3151 3152 if (op_is_lodq) { 3153 texel_out[0] = lod_fpart; 3154 texel_out[1] = lod; 3155 texel_out[2] = texel_out[3] = bld.coord_bld.zero; 3156 return; 3157 } 3158 3159 if (use_aos && static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) { 3160 /* The aos path doesn't do seamless filtering so simply add cube layer 3161 * to face now. 3162 */ 3163 newcoords[2] = lp_build_add(&bld.int_coord_bld, newcoords[2], newcoords[3]); 3164 } 3165 3166 /* 3167 * we only try 8-wide sampling with soa or if we have AVX2 3168 * as it appears to be a loss with just AVX) 3169 */ 3170 if (num_quads == 1 || !use_aos || 3171 (util_cpu_caps.has_avx2 && 3172 (bld.num_lods == 1 || 3173 derived_sampler_state.min_img_filter == derived_sampler_state.mag_img_filter))) { 3174 if (use_aos) { 3175 /* do sampling/filtering with fixed pt arithmetic */ 3176 lp_build_sample_aos(&bld, sampler_index, 3177 newcoords[0], newcoords[1], 3178 newcoords[2], 3179 offsets, lod_positive, lod_fpart, 3180 ilevel0, ilevel1, 3181 texel_out); 3182 } 3183 3184 else { 3185 lp_build_sample_general(&bld, sampler_index, 3186 op_type == LP_SAMPLER_OP_GATHER, 3187 newcoords, offsets, 3188 lod_positive, lod_fpart, 3189 ilevel0, ilevel1, 3190 texel_out); 3191 } 3192 } 3193 else { 3194 unsigned j; 3195 struct lp_build_sample_context bld4; 3196 struct lp_type type4 = type; 3197 unsigned i; 3198 LLVMValueRef texelout4[4]; 3199 LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16]; 3200 3201 type4.length = 4; 3202 3203 /* Setup our build context */ 3204 memset(&bld4, 0, sizeof bld4); 3205 bld4.no_quad_lod = bld.no_quad_lod; 3206 bld4.no_rho_approx = bld.no_rho_approx; 3207 bld4.no_brilinear = bld.no_brilinear; 3208 bld4.gallivm = bld.gallivm; 3209 bld4.context_ptr = bld.context_ptr; 3210 bld4.static_texture_state = bld.static_texture_state; 3211 bld4.static_sampler_state = bld.static_sampler_state; 3212 bld4.dynamic_state = bld.dynamic_state; 3213 bld4.format_desc = bld.format_desc; 3214 bld4.dims = bld.dims; 3215 bld4.row_stride_array = bld.row_stride_array; 3216 bld4.img_stride_array = bld.img_stride_array; 3217 bld4.base_ptr = bld.base_ptr; 3218 bld4.mip_offsets = bld.mip_offsets; 3219 bld4.int_size = bld.int_size; 3220 bld4.cache = bld.cache; 3221 3222 bld4.vector_width = lp_type_width(type4); 3223 3224 bld4.float_type = lp_type_float(32); 3225 bld4.int_type = lp_type_int(32); 3226 bld4.coord_type = type4; 3227 bld4.int_coord_type = lp_int_type(type4); 3228 bld4.float_size_in_type = lp_type_float(32); 3229 bld4.float_size_in_type.length = dims > 1 ? 4 : 1; 3230 bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type); 3231 bld4.texel_type = bld.texel_type; 3232 bld4.texel_type.length = 4; 3233 3234 bld4.num_mips = bld4.num_lods = 1; 3235 if (bld4.no_quad_lod && bld4.no_rho_approx && 3236 (static_texture_state->target == PIPE_TEXTURE_CUBE || 3237 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) && 3238 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { 3239 bld4.num_mips = type4.length; 3240 bld4.num_lods = type4.length; 3241 } 3242 if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT && 3243 (explicit_lod || lod_bias || derivs)) { 3244 if ((!op_is_tex && target != PIPE_BUFFER) || 3245 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { 3246 bld4.num_mips = type4.length; 3247 bld4.num_lods = type4.length; 3248 } 3249 else if (op_is_tex && min_img_filter != mag_img_filter) { 3250 bld4.num_mips = 1; 3251 bld4.num_lods = type4.length; 3252 } 3253 } 3254 3255 /* we want native vector size to be able to use our intrinsics */ 3256 bld4.lodf_type = type4; 3257 if (bld4.num_lods != type4.length) { 3258 bld4.lodf_type.length = 1; 3259 } 3260 bld4.lodi_type = lp_int_type(bld4.lodf_type); 3261 bld4.levelf_type = type4; 3262 if (bld4.num_mips != type4.length) { 3263 bld4.levelf_type.length = 1; 3264 } 3265 bld4.leveli_type = lp_int_type(bld4.levelf_type); 3266 bld4.float_size_type = bld4.float_size_in_type; 3267 if (bld4.num_mips > 1) { 3268 bld4.float_size_type.length = bld4.num_mips == type4.length ? 3269 bld4.num_mips * bld4.float_size_in_type.length : 3270 type4.length; 3271 } 3272 bld4.int_size_type = lp_int_type(bld4.float_size_type); 3273 3274 lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type); 3275 lp_build_context_init(&bld4.float_vec_bld, gallivm, type4); 3276 lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type); 3277 lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type); 3278 lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type); 3279 lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type); 3280 lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type); 3281 lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type); 3282 lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type); 3283 lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type); 3284 lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type); 3285 lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type); 3286 lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type); 3287 lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type); 3288 3289 for (i = 0; i < num_quads; i++) { 3290 LLVMValueRef s4, t4, r4; 3291 LLVMValueRef lod_positive4, lod_fpart4 = NULL; 3292 LLVMValueRef ilevel04, ilevel14 = NULL; 3293 LLVMValueRef offsets4[4] = { NULL }; 3294 unsigned num_lods = bld4.num_lods; 3295 3296 s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4); 3297 t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4); 3298 r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4); 3299 3300 if (offsets[0]) { 3301 offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4); 3302 if (dims > 1) { 3303 offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4); 3304 if (dims > 2) { 3305 offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4); 3306 } 3307 } 3308 } 3309 lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods); 3310 ilevel04 = bld.num_mips == 1 ? ilevel0 : 3311 lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods); 3312 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { 3313 ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods); 3314 lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods); 3315 } 3316 3317 if (use_aos) { 3318 /* do sampling/filtering with fixed pt arithmetic */ 3319 lp_build_sample_aos(&bld4, sampler_index, 3320 s4, t4, r4, offsets4, 3321 lod_positive4, lod_fpart4, 3322 ilevel04, ilevel14, 3323 texelout4); 3324 } 3325 3326 else { 3327 /* this path is currently unreachable and hence might break easily... */ 3328 LLVMValueRef newcoords4[5]; 3329 newcoords4[0] = s4; 3330 newcoords4[1] = t4; 3331 newcoords4[2] = r4; 3332 newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4); 3333 newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4); 3334 3335 lp_build_sample_general(&bld4, sampler_index, 3336 op_type == LP_SAMPLER_OP_GATHER, 3337 newcoords4, offsets4, 3338 lod_positive4, lod_fpart4, 3339 ilevel04, ilevel14, 3340 texelout4); 3341 } 3342 for (j = 0; j < 4; j++) { 3343 texelouttmp[j][i] = texelout4[j]; 3344 } 3345 } 3346 3347 for (j = 0; j < 4; j++) { 3348 texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads); 3349 } 3350 } 3351 } 3352 3353 if (target != PIPE_BUFFER && op_type != LP_SAMPLER_OP_GATHER) { 3354 apply_sampler_swizzle(&bld, texel_out); 3355 } 3356 3357 /* 3358 * texel type can be a (32bit) int/uint (for pure int formats only), 3359 * however we are expected to always return floats (storage is untyped). 3360 */ 3361 if (!bld.texel_type.floating) { 3362 unsigned chan; 3363 for (chan = 0; chan < 4; chan++) { 3364 texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan], 3365 lp_build_vec_type(gallivm, type), ""); 3366 } 3367 } 3368 } 3369 3370 3371 #define USE_TEX_FUNC_CALL 1 3372 3373 #define LP_MAX_TEX_FUNC_ARGS 32 3374 3375 static inline void 3376 get_target_info(enum pipe_texture_target target, 3377 unsigned *num_coords, unsigned *num_derivs, 3378 unsigned *num_offsets, unsigned *layer) 3379 { 3380 unsigned dims = texture_dims(target); 3381 *num_coords = dims; 3382 *num_offsets = dims; 3383 *num_derivs = (target == PIPE_TEXTURE_CUBE || 3384 target == PIPE_TEXTURE_CUBE_ARRAY) ? 3 : dims; 3385 *layer = has_layer_coord(target) ? 2: 0; 3386 if (target == PIPE_TEXTURE_CUBE_ARRAY) { 3387 /* 3388 * dims doesn't include r coord for cubes - this is handled 3389 * by layer instead, but need to fix up for cube arrays... 3390 */ 3391 *layer = 3; 3392 *num_coords = 3; 3393 } 3394 } 3395 3396 3397 /** 3398 * Generate the function body for a texture sampling function. 3399 */ 3400 static void 3401 lp_build_sample_gen_func(struct gallivm_state *gallivm, 3402 const struct lp_static_texture_state *static_texture_state, 3403 const struct lp_static_sampler_state *static_sampler_state, 3404 struct lp_sampler_dynamic_state *dynamic_state, 3405 struct lp_type type, 3406 unsigned texture_index, 3407 unsigned sampler_index, 3408 LLVMValueRef function, 3409 unsigned num_args, 3410 unsigned sample_key) 3411 { 3412 LLVMBuilderRef old_builder; 3413 LLVMBasicBlockRef block; 3414 LLVMValueRef coords[5]; 3415 LLVMValueRef offsets[3] = { NULL }; 3416 LLVMValueRef lod = NULL; 3417 LLVMValueRef context_ptr; 3418 LLVMValueRef thread_data_ptr = NULL; 3419 LLVMValueRef texel_out[4]; 3420 struct lp_derivatives derivs; 3421 struct lp_derivatives *deriv_ptr = NULL; 3422 unsigned num_param = 0; 3423 unsigned i, num_coords, num_derivs, num_offsets, layer; 3424 enum lp_sampler_lod_control lod_control; 3425 boolean need_cache = FALSE; 3426 3427 lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >> 3428 LP_SAMPLER_LOD_CONTROL_SHIFT; 3429 3430 get_target_info(static_texture_state->target, 3431 &num_coords, &num_derivs, &num_offsets, &layer); 3432 3433 if (dynamic_state->cache_ptr) { 3434 const struct util_format_description *format_desc; 3435 format_desc = util_format_description(static_texture_state->format); 3436 if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 3437 need_cache = TRUE; 3438 } 3439 } 3440 3441 /* "unpack" arguments */ 3442 context_ptr = LLVMGetParam(function, num_param++); 3443 if (need_cache) { 3444 thread_data_ptr = LLVMGetParam(function, num_param++); 3445 } 3446 for (i = 0; i < num_coords; i++) { 3447 coords[i] = LLVMGetParam(function, num_param++); 3448 } 3449 for (i = num_coords; i < 5; i++) { 3450 /* This is rather unfortunate... */ 3451 coords[i] = lp_build_undef(gallivm, type); 3452 } 3453 if (layer) { 3454 coords[layer] = LLVMGetParam(function, num_param++); 3455 } 3456 if (sample_key & LP_SAMPLER_SHADOW) { 3457 coords[4] = LLVMGetParam(function, num_param++); 3458 } 3459 if (sample_key & LP_SAMPLER_OFFSETS) { 3460 for (i = 0; i < num_offsets; i++) { 3461 offsets[i] = LLVMGetParam(function, num_param++); 3462 } 3463 } 3464 if (lod_control == LP_SAMPLER_LOD_BIAS || 3465 lod_control == LP_SAMPLER_LOD_EXPLICIT) { 3466 lod = LLVMGetParam(function, num_param++); 3467 } 3468 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { 3469 for (i = 0; i < num_derivs; i++) { 3470 derivs.ddx[i] = LLVMGetParam(function, num_param++); 3471 derivs.ddy[i] = LLVMGetParam(function, num_param++); 3472 } 3473 deriv_ptr = &derivs; 3474 } 3475 3476 assert(num_args == num_param); 3477 3478 /* 3479 * Function body 3480 */ 3481 3482 old_builder = gallivm->builder; 3483 block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry"); 3484 gallivm->builder = LLVMCreateBuilderInContext(gallivm->context); 3485 LLVMPositionBuilderAtEnd(gallivm->builder, block); 3486 3487 lp_build_sample_soa_code(gallivm, 3488 static_texture_state, 3489 static_sampler_state, 3490 dynamic_state, 3491 type, 3492 sample_key, 3493 texture_index, 3494 sampler_index, 3495 context_ptr, 3496 thread_data_ptr, 3497 coords, 3498 offsets, 3499 deriv_ptr, 3500 lod, 3501 texel_out); 3502 3503 LLVMBuildAggregateRet(gallivm->builder, texel_out, 4); 3504 3505 LLVMDisposeBuilder(gallivm->builder); 3506 gallivm->builder = old_builder; 3507 3508 gallivm_verify_function(gallivm, function); 3509 } 3510 3511 3512 /** 3513 * Call the matching function for texture sampling. 3514 * If there's no match, generate a new one. 3515 */ 3516 static void 3517 lp_build_sample_soa_func(struct gallivm_state *gallivm, 3518 const struct lp_static_texture_state *static_texture_state, 3519 const struct lp_static_sampler_state *static_sampler_state, 3520 struct lp_sampler_dynamic_state *dynamic_state, 3521 const struct lp_sampler_params *params) 3522 { 3523 LLVMBuilderRef builder = gallivm->builder; 3524 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent( 3525 LLVMGetInsertBlock(builder))); 3526 LLVMValueRef function, inst; 3527 LLVMValueRef args[LP_MAX_TEX_FUNC_ARGS]; 3528 LLVMBasicBlockRef bb; 3529 LLVMValueRef tex_ret; 3530 unsigned num_args = 0; 3531 char func_name[64]; 3532 unsigned i, num_coords, num_derivs, num_offsets, layer; 3533 unsigned texture_index = params->texture_index; 3534 unsigned sampler_index = params->sampler_index; 3535 unsigned sample_key = params->sample_key; 3536 const LLVMValueRef *coords = params->coords; 3537 const LLVMValueRef *offsets = params->offsets; 3538 const struct lp_derivatives *derivs = params->derivs; 3539 enum lp_sampler_lod_control lod_control; 3540 boolean need_cache = FALSE; 3541 3542 lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >> 3543 LP_SAMPLER_LOD_CONTROL_SHIFT; 3544 3545 get_target_info(static_texture_state->target, 3546 &num_coords, &num_derivs, &num_offsets, &layer); 3547 3548 if (dynamic_state->cache_ptr) { 3549 const struct util_format_description *format_desc; 3550 format_desc = util_format_description(static_texture_state->format); 3551 if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 3552 /* 3553 * This is not 100% correct, if we have cache but the 3554 * util_format_s3tc_prefer is true the cache won't get used 3555 * regardless (could hook up the block decode there...) */ 3556 need_cache = TRUE; 3557 } 3558 } 3559 /* 3560 * texture function matches are found by name. 3561 * Thus the name has to include both the texture and sampler unit 3562 * (which covers all static state) plus the actual texture function 3563 * (including things like offsets, shadow coord, lod control). 3564 * Additionally lod_property has to be included too. 3565 */ 3566 3567 util_snprintf(func_name, sizeof(func_name), "texfunc_res_%d_sam_%d_%x", 3568 texture_index, sampler_index, sample_key); 3569 3570 function = LLVMGetNamedFunction(module, func_name); 3571 3572 if(!function) { 3573 LLVMTypeRef arg_types[LP_MAX_TEX_FUNC_ARGS]; 3574 LLVMTypeRef ret_type; 3575 LLVMTypeRef function_type; 3576 LLVMTypeRef val_type[4]; 3577 unsigned num_param = 0; 3578 3579 /* 3580 * Generate the function prototype. 3581 */ 3582 3583 arg_types[num_param++] = LLVMTypeOf(params->context_ptr); 3584 if (need_cache) { 3585 arg_types[num_param++] = LLVMTypeOf(params->thread_data_ptr); 3586 } 3587 for (i = 0; i < num_coords; i++) { 3588 arg_types[num_param++] = LLVMTypeOf(coords[0]); 3589 assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i])); 3590 } 3591 if (layer) { 3592 arg_types[num_param++] = LLVMTypeOf(coords[layer]); 3593 assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[layer])); 3594 } 3595 if (sample_key & LP_SAMPLER_SHADOW) { 3596 arg_types[num_param++] = LLVMTypeOf(coords[0]); 3597 } 3598 if (sample_key & LP_SAMPLER_OFFSETS) { 3599 for (i = 0; i < num_offsets; i++) { 3600 arg_types[num_param++] = LLVMTypeOf(offsets[0]); 3601 assert(LLVMTypeOf(offsets[0]) == LLVMTypeOf(offsets[i])); 3602 } 3603 } 3604 if (lod_control == LP_SAMPLER_LOD_BIAS || 3605 lod_control == LP_SAMPLER_LOD_EXPLICIT) { 3606 arg_types[num_param++] = LLVMTypeOf(params->lod); 3607 } 3608 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { 3609 for (i = 0; i < num_derivs; i++) { 3610 arg_types[num_param++] = LLVMTypeOf(derivs->ddx[i]); 3611 arg_types[num_param++] = LLVMTypeOf(derivs->ddy[i]); 3612 assert(LLVMTypeOf(derivs->ddx[0]) == LLVMTypeOf(derivs->ddx[i])); 3613 assert(LLVMTypeOf(derivs->ddy[0]) == LLVMTypeOf(derivs->ddy[i])); 3614 } 3615 } 3616 3617 val_type[0] = val_type[1] = val_type[2] = val_type[3] = 3618 lp_build_vec_type(gallivm, params->type); 3619 ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0); 3620 function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0); 3621 function = LLVMAddFunction(module, func_name, function_type); 3622 3623 for (i = 0; i < num_param; ++i) { 3624 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) { 3625 3626 lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS); 3627 } 3628 } 3629 3630 LLVMSetFunctionCallConv(function, LLVMFastCallConv); 3631 LLVMSetLinkage(function, LLVMInternalLinkage); 3632 3633 lp_build_sample_gen_func(gallivm, 3634 static_texture_state, 3635 static_sampler_state, 3636 dynamic_state, 3637 params->type, 3638 texture_index, 3639 sampler_index, 3640 function, 3641 num_param, 3642 sample_key); 3643 } 3644 3645 num_args = 0; 3646 args[num_args++] = params->context_ptr; 3647 if (need_cache) { 3648 args[num_args++] = params->thread_data_ptr; 3649 } 3650 for (i = 0; i < num_coords; i++) { 3651 args[num_args++] = coords[i]; 3652 } 3653 if (layer) { 3654 args[num_args++] = coords[layer]; 3655 } 3656 if (sample_key & LP_SAMPLER_SHADOW) { 3657 args[num_args++] = coords[4]; 3658 } 3659 if (sample_key & LP_SAMPLER_OFFSETS) { 3660 for (i = 0; i < num_offsets; i++) { 3661 args[num_args++] = offsets[i]; 3662 } 3663 } 3664 if (lod_control == LP_SAMPLER_LOD_BIAS || 3665 lod_control == LP_SAMPLER_LOD_EXPLICIT) { 3666 args[num_args++] = params->lod; 3667 } 3668 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { 3669 for (i = 0; i < num_derivs; i++) { 3670 args[num_args++] = derivs->ddx[i]; 3671 args[num_args++] = derivs->ddy[i]; 3672 } 3673 } 3674 3675 assert(num_args <= LP_MAX_TEX_FUNC_ARGS); 3676 3677 tex_ret = LLVMBuildCall(builder, function, args, num_args, ""); 3678 bb = LLVMGetInsertBlock(builder); 3679 inst = LLVMGetLastInstruction(bb); 3680 LLVMSetInstructionCallConv(inst, LLVMFastCallConv); 3681 3682 for (i = 0; i < 4; i++) { 3683 params->texel[i] = LLVMBuildExtractValue(gallivm->builder, tex_ret, i, ""); 3684 } 3685 } 3686 3687 3688 /** 3689 * Build texture sampling code. 3690 * Either via a function call or inline it directly. 3691 */ 3692 void 3693 lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state, 3694 const struct lp_static_sampler_state *static_sampler_state, 3695 struct lp_sampler_dynamic_state *dynamic_state, 3696 struct gallivm_state *gallivm, 3697 const struct lp_sampler_params *params) 3698 { 3699 boolean use_tex_func = FALSE; 3700 3701 /* 3702 * Do not use a function call if the sampling is "simple enough". 3703 * We define this by 3704 * a) format 3705 * b) no mips (either one level only or no mip filter) 3706 * No mips will definitely make the code smaller, though 3707 * the format requirement is a bit iffy - there's some (SoA) formats 3708 * which definitely generate less code. This does happen to catch 3709 * some important cases though which are hurt quite a bit by using 3710 * a call (though not really because of the call overhead but because 3711 * they are reusing the same texture unit with some of the same 3712 * parameters). 3713 * Ideally we'd let llvm recognize this stuff by doing IPO passes. 3714 */ 3715 3716 if (USE_TEX_FUNC_CALL) { 3717 const struct util_format_description *format_desc; 3718 boolean simple_format; 3719 boolean simple_tex; 3720 enum lp_sampler_op_type op_type; 3721 format_desc = util_format_description(static_texture_state->format); 3722 simple_format = !format_desc || 3723 (util_format_is_rgba8_variant(format_desc) && 3724 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB); 3725 3726 op_type = (params->sample_key & LP_SAMPLER_OP_TYPE_MASK) >> 3727 LP_SAMPLER_OP_TYPE_SHIFT; 3728 simple_tex = 3729 op_type != LP_SAMPLER_OP_TEXTURE || 3730 ((static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE || 3731 static_texture_state->level_zero_only == TRUE) && 3732 static_sampler_state->min_img_filter == static_sampler_state->mag_img_filter); 3733 3734 use_tex_func = format_desc && !(simple_format && simple_tex); 3735 } 3736 3737 if (use_tex_func) { 3738 lp_build_sample_soa_func(gallivm, 3739 static_texture_state, 3740 static_sampler_state, 3741 dynamic_state, 3742 params); 3743 } 3744 else { 3745 lp_build_sample_soa_code(gallivm, 3746 static_texture_state, 3747 static_sampler_state, 3748 dynamic_state, 3749 params->type, 3750 params->sample_key, 3751 params->texture_index, 3752 params->sampler_index, 3753 params->context_ptr, 3754 params->thread_data_ptr, 3755 params->coords, 3756 params->offsets, 3757 params->derivs, 3758 params->lod, 3759 params->texel); 3760 } 3761 } 3762 3763 3764 void 3765 lp_build_size_query_soa(struct gallivm_state *gallivm, 3766 const struct lp_static_texture_state *static_state, 3767 struct lp_sampler_dynamic_state *dynamic_state, 3768 const struct lp_sampler_size_query_params *params) 3769 { 3770 LLVMValueRef lod, level = 0, size; 3771 LLVMValueRef first_level = NULL; 3772 int dims, i; 3773 boolean has_array; 3774 unsigned num_lods = 1; 3775 struct lp_build_context bld_int_vec4; 3776 LLVMValueRef context_ptr = params->context_ptr; 3777 unsigned texture_unit = params->texture_unit; 3778 unsigned target = params->target; 3779 3780 if (static_state->format == PIPE_FORMAT_NONE) { 3781 /* 3782 * If there's nothing bound, format is NONE, and we must return 3783 * all zero as mandated by d3d10 in this case. 3784 */ 3785 unsigned chan; 3786 LLVMValueRef zero = lp_build_const_vec(gallivm, params->int_type, 0.0F); 3787 for (chan = 0; chan < 4; chan++) { 3788 params->sizes_out[chan] = zero; 3789 } 3790 return; 3791 } 3792 3793 /* 3794 * Do some sanity verification about bound texture and shader dcl target. 3795 * Not entirely sure what's possible but assume array/non-array 3796 * always compatible (probably not ok for OpenGL but d3d10 has no 3797 * distinction of arrays at the resource level). 3798 * Everything else looks bogus (though not entirely sure about rect/2d). 3799 * Currently disabled because it causes assertion failures if there's 3800 * nothing bound (or rather a dummy texture, not that this case would 3801 * return the right values). 3802 */ 3803 if (0 && static_state->target != target) { 3804 if (static_state->target == PIPE_TEXTURE_1D) 3805 assert(target == PIPE_TEXTURE_1D_ARRAY); 3806 else if (static_state->target == PIPE_TEXTURE_1D_ARRAY) 3807 assert(target == PIPE_TEXTURE_1D); 3808 else if (static_state->target == PIPE_TEXTURE_2D) 3809 assert(target == PIPE_TEXTURE_2D_ARRAY); 3810 else if (static_state->target == PIPE_TEXTURE_2D_ARRAY) 3811 assert(target == PIPE_TEXTURE_2D); 3812 else if (static_state->target == PIPE_TEXTURE_CUBE) 3813 assert(target == PIPE_TEXTURE_CUBE_ARRAY); 3814 else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY) 3815 assert(target == PIPE_TEXTURE_CUBE); 3816 else 3817 assert(0); 3818 } 3819 3820 dims = texture_dims(target); 3821 3822 switch (target) { 3823 case PIPE_TEXTURE_1D_ARRAY: 3824 case PIPE_TEXTURE_2D_ARRAY: 3825 case PIPE_TEXTURE_CUBE_ARRAY: 3826 has_array = TRUE; 3827 break; 3828 default: 3829 has_array = FALSE; 3830 break; 3831 } 3832 3833 assert(!params->int_type.floating); 3834 3835 lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128)); 3836 3837 if (params->explicit_lod) { 3838 /* FIXME: this needs to honor per-element lod */ 3839 lod = LLVMBuildExtractElement(gallivm->builder, params->explicit_lod, 3840 lp_build_const_int32(gallivm, 0), ""); 3841 first_level = dynamic_state->first_level(dynamic_state, gallivm, 3842 context_ptr, texture_unit); 3843 level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level"); 3844 lod = lp_build_broadcast_scalar(&bld_int_vec4, level); 3845 } else { 3846 lod = bld_int_vec4.zero; 3847 } 3848 3849 size = bld_int_vec4.undef; 3850 3851 size = LLVMBuildInsertElement(gallivm->builder, size, 3852 dynamic_state->width(dynamic_state, gallivm, 3853 context_ptr, texture_unit), 3854 lp_build_const_int32(gallivm, 0), ""); 3855 3856 if (dims >= 2) { 3857 size = LLVMBuildInsertElement(gallivm->builder, size, 3858 dynamic_state->height(dynamic_state, gallivm, 3859 context_ptr, texture_unit), 3860 lp_build_const_int32(gallivm, 1), ""); 3861 } 3862 3863 if (dims >= 3) { 3864 size = LLVMBuildInsertElement(gallivm->builder, size, 3865 dynamic_state->depth(dynamic_state, gallivm, 3866 context_ptr, texture_unit), 3867 lp_build_const_int32(gallivm, 2), ""); 3868 } 3869 3870 size = lp_build_minify(&bld_int_vec4, size, lod, TRUE); 3871 3872 if (has_array) { 3873 LLVMValueRef layers = dynamic_state->depth(dynamic_state, gallivm, 3874 context_ptr, texture_unit); 3875 if (target == PIPE_TEXTURE_CUBE_ARRAY) { 3876 /* 3877 * It looks like GL wants number of cubes, d3d10.1 has it undefined? 3878 * Could avoid this by passing in number of cubes instead of total 3879 * number of layers (might make things easier elsewhere too). 3880 */ 3881 LLVMValueRef six = lp_build_const_int32(gallivm, 6); 3882 layers = LLVMBuildSDiv(gallivm->builder, layers, six, ""); 3883 } 3884 size = LLVMBuildInsertElement(gallivm->builder, size, layers, 3885 lp_build_const_int32(gallivm, dims), ""); 3886 } 3887 3888 /* 3889 * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels) 3890 * if level is out of bounds (note this can't cover unbound texture 3891 * here, which also requires returning zero). 3892 */ 3893 if (params->explicit_lod && params->is_sviewinfo) { 3894 LLVMValueRef last_level, out, out1; 3895 struct lp_build_context leveli_bld; 3896 3897 /* everything is scalar for now */ 3898 lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32)); 3899 last_level = dynamic_state->last_level(dynamic_state, gallivm, 3900 context_ptr, texture_unit); 3901 3902 out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level); 3903 out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level); 3904 out = lp_build_or(&leveli_bld, out, out1); 3905 if (num_lods == 1) { 3906 out = lp_build_broadcast_scalar(&bld_int_vec4, out); 3907 } 3908 else { 3909 /* TODO */ 3910 assert(0); 3911 } 3912 size = lp_build_andnot(&bld_int_vec4, size, out); 3913 } 3914 for (i = 0; i < dims + (has_array ? 1 : 0); i++) { 3915 params->sizes_out[i] = lp_build_extract_broadcast(gallivm, bld_int_vec4.type, params->int_type, 3916 size, 3917 lp_build_const_int32(gallivm, i)); 3918 } 3919 if (params->is_sviewinfo) { 3920 for (; i < 4; i++) { 3921 params->sizes_out[i] = lp_build_const_vec(gallivm, params->int_type, 0.0); 3922 } 3923 } 3924 3925 /* 3926 * if there's no explicit_lod (buffers, rects) queries requiring nr of 3927 * mips would be illegal. 3928 */ 3929 if (params->is_sviewinfo && params->explicit_lod) { 3930 struct lp_build_context bld_int_scalar; 3931 LLVMValueRef num_levels; 3932 lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32)); 3933 3934 if (static_state->level_zero_only) { 3935 num_levels = bld_int_scalar.one; 3936 } 3937 else { 3938 LLVMValueRef last_level; 3939 3940 last_level = dynamic_state->last_level(dynamic_state, gallivm, 3941 context_ptr, texture_unit); 3942 num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level); 3943 num_levels = lp_build_add(&bld_int_scalar, num_levels, bld_int_scalar.one); 3944 } 3945 params->sizes_out[3] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, params->int_type), 3946 num_levels); 3947 } 3948 } 3949