1 /************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 /** 29 * @file 30 * Texture sampling -- SoA. 31 * 32 * @author Jose Fonseca <jfonseca (at) vmware.com> 33 * @author Brian Paul <brianp (at) vmware.com> 34 */ 35 36 #include "pipe/p_defines.h" 37 #include "pipe/p_state.h" 38 #include "pipe/p_shader_tokens.h" 39 #include "util/u_debug.h" 40 #include "util/u_dump.h" 41 #include "util/u_memory.h" 42 #include "util/u_math.h" 43 #include "util/u_format.h" 44 #include "util/u_cpu_detect.h" 45 #include "util/format_rgb9e5.h" 46 #include "lp_bld_debug.h" 47 #include "lp_bld_type.h" 48 #include "lp_bld_const.h" 49 #include "lp_bld_conv.h" 50 #include "lp_bld_arit.h" 51 #include "lp_bld_bitarit.h" 52 #include "lp_bld_logic.h" 53 #include "lp_bld_printf.h" 54 #include "lp_bld_swizzle.h" 55 #include "lp_bld_flow.h" 56 #include "lp_bld_gather.h" 57 #include "lp_bld_format.h" 58 #include "lp_bld_sample.h" 59 #include "lp_bld_sample_aos.h" 60 #include "lp_bld_struct.h" 61 #include "lp_bld_quad.h" 62 #include "lp_bld_pack.h" 63 #include "lp_bld_intr.h" 64 65 66 /** 67 * Generate code to fetch a texel from a texture at int coords (x, y, z). 68 * The computation depends on whether the texture is 1D, 2D or 3D. 69 * The result, texel, will be float vectors: 70 * texel[0] = red values 71 * texel[1] = green values 72 * texel[2] = blue values 73 * texel[3] = alpha values 74 */ 75 static void 76 lp_build_sample_texel_soa(struct lp_build_sample_context *bld, 77 LLVMValueRef width, 78 LLVMValueRef height, 79 LLVMValueRef depth, 80 LLVMValueRef x, 81 LLVMValueRef y, 82 LLVMValueRef z, 83 LLVMValueRef y_stride, 84 LLVMValueRef z_stride, 85 LLVMValueRef data_ptr, 86 LLVMValueRef mipoffsets, 87 LLVMValueRef texel_out[4]) 88 { 89 const struct lp_static_sampler_state *static_state = bld->static_sampler_state; 90 const unsigned dims = bld->dims; 91 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 92 LLVMBuilderRef builder = bld->gallivm->builder; 93 LLVMValueRef offset; 94 LLVMValueRef i, j; 95 LLVMValueRef use_border = NULL; 96 97 /* use_border = x < 0 || x >= width || y < 0 || y >= height */ 98 if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s, 99 static_state->min_img_filter, 100 static_state->mag_img_filter)) { 101 LLVMValueRef b1, b2; 102 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero); 103 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width); 104 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2"); 105 } 106 107 if (dims >= 2 && 108 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t, 109 static_state->min_img_filter, 110 static_state->mag_img_filter)) { 111 LLVMValueRef b1, b2; 112 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero); 113 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height); 114 if (use_border) { 115 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1"); 116 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2"); 117 } 118 else { 119 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2"); 120 } 121 } 122 123 if (dims == 3 && 124 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r, 125 static_state->min_img_filter, 126 static_state->mag_img_filter)) { 127 LLVMValueRef b1, b2; 128 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero); 129 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth); 130 if (use_border) { 131 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1"); 132 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2"); 133 } 134 else { 135 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2"); 136 } 137 } 138 139 /* convert x,y,z coords to linear offset from start of texture, in bytes */ 140 lp_build_sample_offset(&bld->int_coord_bld, 141 bld->format_desc, 142 x, y, z, y_stride, z_stride, 143 &offset, &i, &j); 144 if (mipoffsets) { 145 offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets); 146 } 147 148 if (use_border) { 149 /* If we can sample the border color, it means that texcoords may 150 * lie outside the bounds of the texture image. We need to do 151 * something to prevent reading out of bounds and causing a segfault. 152 * 153 * Simply AND the texture coords with !use_border. This will cause 154 * coords which are out of bounds to become zero. Zero's guaranteed 155 * to be inside the texture image. 156 */ 157 offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border); 158 } 159 160 lp_build_fetch_rgba_soa(bld->gallivm, 161 bld->format_desc, 162 bld->texel_type, TRUE, 163 data_ptr, offset, 164 i, j, 165 bld->cache, 166 texel_out); 167 168 /* 169 * Note: if we find an app which frequently samples the texture border 170 * we might want to implement a true conditional here to avoid sampling 171 * the texture whenever possible (since that's quite a bit of code). 172 * Ex: 173 * if (use_border) { 174 * texel = border_color; 175 * } 176 * else { 177 * texel = sample_texture(coord); 178 * } 179 * As it is now, we always sample the texture, then selectively replace 180 * the texel color results with the border color. 181 */ 182 183 if (use_border) { 184 /* select texel color or border color depending on use_border. */ 185 const struct util_format_description *format_desc = bld->format_desc; 186 int chan; 187 struct lp_type border_type = bld->texel_type; 188 border_type.length = 4; 189 /* 190 * Only replace channels which are actually present. The others should 191 * get optimized away eventually by sampler_view swizzle anyway but it's 192 * easier too. 193 */ 194 for (chan = 0; chan < 4; chan++) { 195 unsigned chan_s; 196 /* reverse-map channel... */ 197 for (chan_s = 0; chan_s < 4; chan_s++) { 198 if (chan_s == format_desc->swizzle[chan]) { 199 break; 200 } 201 } 202 if (chan_s <= 3) { 203 /* use the already clamped color */ 204 LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan); 205 LLVMValueRef border_chan; 206 207 border_chan = lp_build_extract_broadcast(bld->gallivm, 208 border_type, 209 bld->texel_type, 210 bld->border_color_clamped, 211 idx); 212 texel_out[chan] = lp_build_select(&bld->texel_bld, use_border, 213 border_chan, texel_out[chan]); 214 } 215 } 216 } 217 } 218 219 220 /** 221 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes. 222 */ 223 static LLVMValueRef 224 lp_build_coord_mirror(struct lp_build_sample_context *bld, 225 LLVMValueRef coord) 226 { 227 struct lp_build_context *coord_bld = &bld->coord_bld; 228 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 229 LLVMValueRef fract, flr, isOdd; 230 231 lp_build_ifloor_fract(coord_bld, coord, &flr, &fract); 232 /* kill off NaNs */ 233 /* XXX: not safe without arch rounding, fract can be anything. */ 234 fract = lp_build_max_ext(coord_bld, fract, coord_bld->zero, 235 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); 236 237 /* isOdd = flr & 1 */ 238 isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, ""); 239 240 /* make coord positive or negative depending on isOdd */ 241 /* XXX slight overkill masking out sign bit is unnecessary */ 242 coord = lp_build_set_sign(coord_bld, fract, isOdd); 243 244 /* convert isOdd to float */ 245 isOdd = lp_build_int_to_float(coord_bld, isOdd); 246 247 /* add isOdd to coord */ 248 coord = lp_build_add(coord_bld, coord, isOdd); 249 250 return coord; 251 } 252 253 254 /** 255 * Helper to compute the first coord and the weight for 256 * linear wrap repeat npot textures 257 */ 258 void 259 lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld, 260 LLVMValueRef coord_f, 261 LLVMValueRef length_i, 262 LLVMValueRef length_f, 263 LLVMValueRef *coord0_i, 264 LLVMValueRef *weight_f) 265 { 266 struct lp_build_context *coord_bld = &bld->coord_bld; 267 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 268 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5); 269 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i, 270 int_coord_bld->one); 271 LLVMValueRef mask; 272 /* wrap with normalized floats is just fract */ 273 coord_f = lp_build_fract(coord_bld, coord_f); 274 /* mul by size and subtract 0.5 */ 275 coord_f = lp_build_mul(coord_bld, coord_f, length_f); 276 coord_f = lp_build_sub(coord_bld, coord_f, half); 277 /* 278 * we avoided the 0.5/length division before the repeat wrap, 279 * now need to fix up edge cases with selects 280 */ 281 /* 282 * Note we do a float (unordered) compare so we can eliminate NaNs. 283 * (Otherwise would need fract_safe above). 284 */ 285 mask = lp_build_compare(coord_bld->gallivm, coord_bld->type, 286 PIPE_FUNC_LESS, coord_f, coord_bld->zero); 287 288 /* convert to int, compute lerp weight */ 289 lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f); 290 *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i); 291 } 292 293 294 /** 295 * Build LLVM code for texture wrap mode for linear filtering. 296 * \param x0_out returns first integer texcoord 297 * \param x1_out returns second integer texcoord 298 * \param weight_out returns linear interpolation weight 299 */ 300 static void 301 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, 302 LLVMValueRef coord, 303 LLVMValueRef length, 304 LLVMValueRef length_f, 305 LLVMValueRef offset, 306 boolean is_pot, 307 unsigned wrap_mode, 308 LLVMValueRef *x0_out, 309 LLVMValueRef *x1_out, 310 LLVMValueRef *weight_out) 311 { 312 struct lp_build_context *coord_bld = &bld->coord_bld; 313 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 314 LLVMBuilderRef builder = bld->gallivm->builder; 315 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5); 316 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); 317 LLVMValueRef coord0, coord1, weight; 318 319 switch(wrap_mode) { 320 case PIPE_TEX_WRAP_REPEAT: 321 if (is_pot) { 322 /* mul by size and subtract 0.5 */ 323 coord = lp_build_mul(coord_bld, coord, length_f); 324 coord = lp_build_sub(coord_bld, coord, half); 325 if (offset) { 326 offset = lp_build_int_to_float(coord_bld, offset); 327 coord = lp_build_add(coord_bld, coord, offset); 328 } 329 /* convert to int, compute lerp weight */ 330 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 331 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 332 /* repeat wrap */ 333 coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, ""); 334 coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, ""); 335 } 336 else { 337 LLVMValueRef mask; 338 if (offset) { 339 offset = lp_build_int_to_float(coord_bld, offset); 340 offset = lp_build_div(coord_bld, offset, length_f); 341 coord = lp_build_add(coord_bld, coord, offset); 342 } 343 lp_build_coord_repeat_npot_linear(bld, coord, 344 length, length_f, 345 &coord0, &weight); 346 mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, 347 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); 348 coord1 = LLVMBuildAnd(builder, 349 lp_build_add(int_coord_bld, coord0, int_coord_bld->one), 350 mask, ""); 351 } 352 break; 353 354 case PIPE_TEX_WRAP_CLAMP: 355 if (bld->static_sampler_state->normalized_coords) { 356 /* scale coord to length */ 357 coord = lp_build_mul(coord_bld, coord, length_f); 358 } 359 if (offset) { 360 offset = lp_build_int_to_float(coord_bld, offset); 361 coord = lp_build_add(coord_bld, coord, offset); 362 } 363 364 /* clamp to [0, length] */ 365 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f); 366 367 coord = lp_build_sub(coord_bld, coord, half); 368 369 /* convert to int, compute lerp weight */ 370 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 371 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 372 break; 373 374 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 375 { 376 struct lp_build_context abs_coord_bld = bld->coord_bld; 377 abs_coord_bld.type.sign = FALSE; 378 379 if (bld->static_sampler_state->normalized_coords) { 380 /* mul by tex size */ 381 coord = lp_build_mul(coord_bld, coord, length_f); 382 } 383 if (offset) { 384 offset = lp_build_int_to_float(coord_bld, offset); 385 coord = lp_build_add(coord_bld, coord, offset); 386 } 387 388 /* clamp to length max */ 389 coord = lp_build_min_ext(coord_bld, coord, length_f, 390 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); 391 /* subtract 0.5 */ 392 coord = lp_build_sub(coord_bld, coord, half); 393 /* clamp to [0, length - 0.5] */ 394 coord = lp_build_max(coord_bld, coord, coord_bld->zero); 395 /* convert to int, compute lerp weight */ 396 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight); 397 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 398 /* coord1 = min(coord1, length-1) */ 399 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); 400 break; 401 } 402 403 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 404 if (bld->static_sampler_state->normalized_coords) { 405 /* scale coord to length */ 406 coord = lp_build_mul(coord_bld, coord, length_f); 407 } 408 if (offset) { 409 offset = lp_build_int_to_float(coord_bld, offset); 410 coord = lp_build_add(coord_bld, coord, offset); 411 } 412 /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */ 413 /* can skip clamp (though might not work for very large coord values) */ 414 coord = lp_build_sub(coord_bld, coord, half); 415 /* convert to int, compute lerp weight */ 416 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 417 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 418 break; 419 420 case PIPE_TEX_WRAP_MIRROR_REPEAT: 421 if (offset) { 422 offset = lp_build_int_to_float(coord_bld, offset); 423 offset = lp_build_div(coord_bld, offset, length_f); 424 coord = lp_build_add(coord_bld, coord, offset); 425 } 426 /* compute mirror function */ 427 coord = lp_build_coord_mirror(bld, coord); 428 429 /* scale coord to length */ 430 coord = lp_build_mul(coord_bld, coord, length_f); 431 coord = lp_build_sub(coord_bld, coord, half); 432 433 /* convert to int, compute lerp weight */ 434 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 435 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 436 437 /* coord0 = max(coord0, 0) */ 438 coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero); 439 /* coord1 = min(coord1, length-1) */ 440 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); 441 break; 442 443 case PIPE_TEX_WRAP_MIRROR_CLAMP: 444 if (bld->static_sampler_state->normalized_coords) { 445 /* scale coord to length */ 446 coord = lp_build_mul(coord_bld, coord, length_f); 447 } 448 if (offset) { 449 offset = lp_build_int_to_float(coord_bld, offset); 450 coord = lp_build_add(coord_bld, coord, offset); 451 } 452 coord = lp_build_abs(coord_bld, coord); 453 454 /* clamp to [0, length] */ 455 coord = lp_build_min(coord_bld, coord, length_f); 456 457 coord = lp_build_sub(coord_bld, coord, half); 458 459 /* convert to int, compute lerp weight */ 460 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 461 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 462 break; 463 464 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 465 { 466 struct lp_build_context abs_coord_bld = bld->coord_bld; 467 abs_coord_bld.type.sign = FALSE; 468 469 if (bld->static_sampler_state->normalized_coords) { 470 /* scale coord to length */ 471 coord = lp_build_mul(coord_bld, coord, length_f); 472 } 473 if (offset) { 474 offset = lp_build_int_to_float(coord_bld, offset); 475 coord = lp_build_add(coord_bld, coord, offset); 476 } 477 coord = lp_build_abs(coord_bld, coord); 478 479 /* clamp to length max */ 480 coord = lp_build_min_ext(coord_bld, coord, length_f, 481 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); 482 /* subtract 0.5 */ 483 coord = lp_build_sub(coord_bld, coord, half); 484 /* clamp to [0, length - 0.5] */ 485 coord = lp_build_max(coord_bld, coord, coord_bld->zero); 486 487 /* convert to int, compute lerp weight */ 488 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight); 489 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 490 /* coord1 = min(coord1, length-1) */ 491 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); 492 } 493 break; 494 495 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 496 { 497 if (bld->static_sampler_state->normalized_coords) { 498 /* scale coord to length */ 499 coord = lp_build_mul(coord_bld, coord, length_f); 500 } 501 if (offset) { 502 offset = lp_build_int_to_float(coord_bld, offset); 503 coord = lp_build_add(coord_bld, coord, offset); 504 } 505 coord = lp_build_abs(coord_bld, coord); 506 507 /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */ 508 /* skip clamp - always positive, and other side 509 only potentially matters for very large coords */ 510 coord = lp_build_sub(coord_bld, coord, half); 511 512 /* convert to int, compute lerp weight */ 513 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 514 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 515 } 516 break; 517 518 default: 519 assert(0); 520 coord0 = NULL; 521 coord1 = NULL; 522 weight = NULL; 523 } 524 525 *x0_out = coord0; 526 *x1_out = coord1; 527 *weight_out = weight; 528 } 529 530 531 /** 532 * Build LLVM code for texture wrap mode for nearest filtering. 533 * \param coord the incoming texcoord (nominally in [0,1]) 534 * \param length the texture size along one dimension, as int vector 535 * \param length_f the texture size along one dimension, as float vector 536 * \param offset texel offset along one dimension (as int vector) 537 * \param is_pot if TRUE, length is a power of two 538 * \param wrap_mode one of PIPE_TEX_WRAP_x 539 */ 540 static LLVMValueRef 541 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, 542 LLVMValueRef coord, 543 LLVMValueRef length, 544 LLVMValueRef length_f, 545 LLVMValueRef offset, 546 boolean is_pot, 547 unsigned wrap_mode) 548 { 549 struct lp_build_context *coord_bld = &bld->coord_bld; 550 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 551 LLVMBuilderRef builder = bld->gallivm->builder; 552 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); 553 LLVMValueRef icoord; 554 555 switch(wrap_mode) { 556 case PIPE_TEX_WRAP_REPEAT: 557 if (is_pot) { 558 coord = lp_build_mul(coord_bld, coord, length_f); 559 icoord = lp_build_ifloor(coord_bld, coord); 560 if (offset) { 561 icoord = lp_build_add(int_coord_bld, icoord, offset); 562 } 563 icoord = LLVMBuildAnd(builder, icoord, length_minus_one, ""); 564 } 565 else { 566 if (offset) { 567 offset = lp_build_int_to_float(coord_bld, offset); 568 offset = lp_build_div(coord_bld, offset, length_f); 569 coord = lp_build_add(coord_bld, coord, offset); 570 } 571 /* take fraction, unnormalize */ 572 coord = lp_build_fract_safe(coord_bld, coord); 573 coord = lp_build_mul(coord_bld, coord, length_f); 574 icoord = lp_build_itrunc(coord_bld, coord); 575 } 576 break; 577 578 case PIPE_TEX_WRAP_CLAMP: 579 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 580 if (bld->static_sampler_state->normalized_coords) { 581 /* scale coord to length */ 582 coord = lp_build_mul(coord_bld, coord, length_f); 583 } 584 585 if (offset) { 586 offset = lp_build_int_to_float(coord_bld, offset); 587 coord = lp_build_add(coord_bld, coord, offset); 588 } 589 /* floor */ 590 /* use itrunc instead since we clamp to 0 anyway */ 591 icoord = lp_build_itrunc(coord_bld, coord); 592 593 /* clamp to [0, length - 1]. */ 594 icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero, 595 length_minus_one); 596 break; 597 598 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 599 if (bld->static_sampler_state->normalized_coords) { 600 /* scale coord to length */ 601 coord = lp_build_mul(coord_bld, coord, length_f); 602 } 603 /* no clamp necessary, border masking will handle this */ 604 icoord = lp_build_ifloor(coord_bld, coord); 605 if (offset) { 606 icoord = lp_build_add(int_coord_bld, icoord, offset); 607 } 608 break; 609 610 case PIPE_TEX_WRAP_MIRROR_REPEAT: 611 if (offset) { 612 offset = lp_build_int_to_float(coord_bld, offset); 613 offset = lp_build_div(coord_bld, offset, length_f); 614 coord = lp_build_add(coord_bld, coord, offset); 615 } 616 /* compute mirror function */ 617 coord = lp_build_coord_mirror(bld, coord); 618 619 /* scale coord to length */ 620 assert(bld->static_sampler_state->normalized_coords); 621 coord = lp_build_mul(coord_bld, coord, length_f); 622 623 /* itrunc == ifloor here */ 624 icoord = lp_build_itrunc(coord_bld, coord); 625 626 /* clamp to [0, length - 1] */ 627 icoord = lp_build_min(int_coord_bld, icoord, length_minus_one); 628 break; 629 630 case PIPE_TEX_WRAP_MIRROR_CLAMP: 631 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 632 if (bld->static_sampler_state->normalized_coords) { 633 /* scale coord to length */ 634 coord = lp_build_mul(coord_bld, coord, length_f); 635 } 636 if (offset) { 637 offset = lp_build_int_to_float(coord_bld, offset); 638 coord = lp_build_add(coord_bld, coord, offset); 639 } 640 coord = lp_build_abs(coord_bld, coord); 641 642 /* itrunc == ifloor here */ 643 icoord = lp_build_itrunc(coord_bld, coord); 644 /* 645 * Use unsigned min due to possible undef values (NaNs, overflow) 646 */ 647 { 648 struct lp_build_context abs_coord_bld = *int_coord_bld; 649 abs_coord_bld.type.sign = FALSE; 650 /* clamp to [0, length - 1] */ 651 icoord = lp_build_min(&abs_coord_bld, icoord, length_minus_one); 652 } 653 break; 654 655 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 656 if (bld->static_sampler_state->normalized_coords) { 657 /* scale coord to length */ 658 coord = lp_build_mul(coord_bld, coord, length_f); 659 } 660 if (offset) { 661 offset = lp_build_int_to_float(coord_bld, offset); 662 coord = lp_build_add(coord_bld, coord, offset); 663 } 664 coord = lp_build_abs(coord_bld, coord); 665 666 /* itrunc == ifloor here */ 667 icoord = lp_build_itrunc(coord_bld, coord); 668 break; 669 670 default: 671 assert(0); 672 icoord = NULL; 673 } 674 675 return icoord; 676 } 677 678 679 /** 680 * Do shadow test/comparison. 681 * \param p shadow ref value 682 * \param texel the texel to compare against 683 */ 684 static LLVMValueRef 685 lp_build_sample_comparefunc(struct lp_build_sample_context *bld, 686 LLVMValueRef p, 687 LLVMValueRef texel) 688 { 689 struct lp_build_context *texel_bld = &bld->texel_bld; 690 LLVMValueRef res; 691 692 if (0) { 693 //lp_build_print_value(bld->gallivm, "shadow cmp coord", p); 694 lp_build_print_value(bld->gallivm, "shadow cmp texel", texel); 695 } 696 697 /* result = (p FUNC texel) ? 1 : 0 */ 698 /* 699 * honor d3d10 floating point rules here, which state that comparisons 700 * are ordered except NOT_EQUAL which is unordered. 701 */ 702 if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) { 703 res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func, 704 p, texel); 705 } 706 else { 707 res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func, 708 p, texel); 709 } 710 return res; 711 } 712 713 714 /** 715 * Generate code to sample a mipmap level with nearest filtering. 716 * If sampling a cube texture, r = cube face in [0,5]. 717 */ 718 static void 719 lp_build_sample_image_nearest(struct lp_build_sample_context *bld, 720 LLVMValueRef size, 721 LLVMValueRef row_stride_vec, 722 LLVMValueRef img_stride_vec, 723 LLVMValueRef data_ptr, 724 LLVMValueRef mipoffsets, 725 LLVMValueRef *coords, 726 const LLVMValueRef *offsets, 727 LLVMValueRef colors_out[4]) 728 { 729 const unsigned dims = bld->dims; 730 LLVMValueRef width_vec; 731 LLVMValueRef height_vec; 732 LLVMValueRef depth_vec; 733 LLVMValueRef flt_size; 734 LLVMValueRef flt_width_vec; 735 LLVMValueRef flt_height_vec; 736 LLVMValueRef flt_depth_vec; 737 LLVMValueRef x, y = NULL, z = NULL; 738 739 lp_build_extract_image_sizes(bld, 740 &bld->int_size_bld, 741 bld->int_coord_type, 742 size, 743 &width_vec, &height_vec, &depth_vec); 744 745 flt_size = lp_build_int_to_float(&bld->float_size_bld, size); 746 747 lp_build_extract_image_sizes(bld, 748 &bld->float_size_bld, 749 bld->coord_type, 750 flt_size, 751 &flt_width_vec, &flt_height_vec, &flt_depth_vec); 752 753 /* 754 * Compute integer texcoords. 755 */ 756 x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec, 757 flt_width_vec, offsets[0], 758 bld->static_texture_state->pot_width, 759 bld->static_sampler_state->wrap_s); 760 lp_build_name(x, "tex.x.wrapped"); 761 762 if (dims >= 2) { 763 y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec, 764 flt_height_vec, offsets[1], 765 bld->static_texture_state->pot_height, 766 bld->static_sampler_state->wrap_t); 767 lp_build_name(y, "tex.y.wrapped"); 768 769 if (dims == 3) { 770 z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec, 771 flt_depth_vec, offsets[2], 772 bld->static_texture_state->pot_depth, 773 bld->static_sampler_state->wrap_r); 774 lp_build_name(z, "tex.z.wrapped"); 775 } 776 } 777 if (has_layer_coord(bld->static_texture_state->target)) { 778 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) { 779 /* add cube layer to face */ 780 z = lp_build_add(&bld->int_coord_bld, coords[2], coords[3]); 781 } 782 else { 783 z = coords[2]; 784 } 785 lp_build_name(z, "tex.z.layer"); 786 } 787 788 /* 789 * Get texture colors. 790 */ 791 lp_build_sample_texel_soa(bld, 792 width_vec, height_vec, depth_vec, 793 x, y, z, 794 row_stride_vec, img_stride_vec, 795 data_ptr, mipoffsets, colors_out); 796 797 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) { 798 LLVMValueRef cmpval; 799 cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]); 800 /* this is really just a AND 1.0, cmpval but llvm is clever enough */ 801 colors_out[0] = lp_build_select(&bld->texel_bld, cmpval, 802 bld->texel_bld.one, bld->texel_bld.zero); 803 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0]; 804 } 805 806 } 807 808 809 /** 810 * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly. 811 */ 812 static LLVMValueRef 813 lp_build_masklerp(struct lp_build_context *bld, 814 LLVMValueRef weight, 815 LLVMValueRef mask0, 816 LLVMValueRef mask1) 817 { 818 struct gallivm_state *gallivm = bld->gallivm; 819 LLVMBuilderRef builder = gallivm->builder; 820 LLVMValueRef weight2; 821 822 weight2 = lp_build_sub(bld, bld->one, weight); 823 weight = LLVMBuildBitCast(builder, weight, 824 lp_build_int_vec_type(gallivm, bld->type), ""); 825 weight2 = LLVMBuildBitCast(builder, weight2, 826 lp_build_int_vec_type(gallivm, bld->type), ""); 827 weight = LLVMBuildAnd(builder, weight, mask1, ""); 828 weight2 = LLVMBuildAnd(builder, weight2, mask0, ""); 829 weight = LLVMBuildBitCast(builder, weight, bld->vec_type, ""); 830 weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, ""); 831 return lp_build_add(bld, weight, weight2); 832 } 833 834 /** 835 * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly. 836 */ 837 static LLVMValueRef 838 lp_build_masklerp2d(struct lp_build_context *bld, 839 LLVMValueRef weight0, 840 LLVMValueRef weight1, 841 LLVMValueRef mask00, 842 LLVMValueRef mask01, 843 LLVMValueRef mask10, 844 LLVMValueRef mask11) 845 { 846 LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01); 847 LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11); 848 return lp_build_lerp(bld, weight1, val0, val1, 0); 849 } 850 851 /* 852 * this is a bit excessive code for something OpenGL just recommends 853 * but does not require. 854 */ 855 #define ACCURATE_CUBE_CORNERS 1 856 857 /** 858 * Generate code to sample a mipmap level with linear filtering. 859 * If sampling a cube texture, r = cube face in [0,5]. 860 * If linear_mask is present, only pixels having their mask set 861 * will receive linear filtering, the rest will use nearest. 862 */ 863 static void 864 lp_build_sample_image_linear(struct lp_build_sample_context *bld, 865 boolean is_gather, 866 LLVMValueRef size, 867 LLVMValueRef linear_mask, 868 LLVMValueRef row_stride_vec, 869 LLVMValueRef img_stride_vec, 870 LLVMValueRef data_ptr, 871 LLVMValueRef mipoffsets, 872 LLVMValueRef *coords, 873 const LLVMValueRef *offsets, 874 LLVMValueRef colors_out[4]) 875 { 876 LLVMBuilderRef builder = bld->gallivm->builder; 877 struct lp_build_context *ivec_bld = &bld->int_coord_bld; 878 struct lp_build_context *coord_bld = &bld->coord_bld; 879 struct lp_build_context *texel_bld = &bld->texel_bld; 880 const unsigned dims = bld->dims; 881 LLVMValueRef width_vec; 882 LLVMValueRef height_vec; 883 LLVMValueRef depth_vec; 884 LLVMValueRef flt_size; 885 LLVMValueRef flt_width_vec; 886 LLVMValueRef flt_height_vec; 887 LLVMValueRef flt_depth_vec; 888 LLVMValueRef fall_off[4], have_corners; 889 LLVMValueRef z1 = NULL; 890 LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL; 891 LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL; 892 LLVMValueRef y00 = NULL, y01 = NULL, y10 = NULL, y11 = NULL; 893 LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL; 894 LLVMValueRef xs[4], ys[4], zs[4]; 895 LLVMValueRef neighbors[2][2][4]; 896 int chan, texel_index; 897 boolean seamless_cube_filter, accurate_cube_corners; 898 899 seamless_cube_filter = (bld->static_texture_state->target == PIPE_TEXTURE_CUBE || 900 bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) && 901 bld->static_sampler_state->seamless_cube_map; 902 /* 903 * XXX I don't know how this is really supposed to work with gather. From GL 904 * spec wording (not gather specific) it sounds like the 4th missing texel 905 * should be an average of the other 3, hence for gather could return this. 906 * This is however NOT how the code here works, which just fixes up the 907 * weights used for filtering instead. And of course for gather there is 908 * no filter to tweak... 909 */ 910 accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter && 911 !is_gather; 912 913 lp_build_extract_image_sizes(bld, 914 &bld->int_size_bld, 915 bld->int_coord_type, 916 size, 917 &width_vec, &height_vec, &depth_vec); 918 919 flt_size = lp_build_int_to_float(&bld->float_size_bld, size); 920 921 lp_build_extract_image_sizes(bld, 922 &bld->float_size_bld, 923 bld->coord_type, 924 flt_size, 925 &flt_width_vec, &flt_height_vec, &flt_depth_vec); 926 927 /* 928 * Compute integer texcoords. 929 */ 930 931 if (!seamless_cube_filter) { 932 lp_build_sample_wrap_linear(bld, coords[0], width_vec, 933 flt_width_vec, offsets[0], 934 bld->static_texture_state->pot_width, 935 bld->static_sampler_state->wrap_s, 936 &x00, &x01, &s_fpart); 937 lp_build_name(x00, "tex.x0.wrapped"); 938 lp_build_name(x01, "tex.x1.wrapped"); 939 x10 = x00; 940 x11 = x01; 941 942 if (dims >= 2) { 943 lp_build_sample_wrap_linear(bld, coords[1], height_vec, 944 flt_height_vec, offsets[1], 945 bld->static_texture_state->pot_height, 946 bld->static_sampler_state->wrap_t, 947 &y00, &y10, &t_fpart); 948 lp_build_name(y00, "tex.y0.wrapped"); 949 lp_build_name(y10, "tex.y1.wrapped"); 950 y01 = y00; 951 y11 = y10; 952 953 if (dims == 3) { 954 lp_build_sample_wrap_linear(bld, coords[2], depth_vec, 955 flt_depth_vec, offsets[2], 956 bld->static_texture_state->pot_depth, 957 bld->static_sampler_state->wrap_r, 958 &z00, &z1, &r_fpart); 959 z01 = z10 = z11 = z00; 960 lp_build_name(z00, "tex.z0.wrapped"); 961 lp_build_name(z1, "tex.z1.wrapped"); 962 } 963 } 964 if (has_layer_coord(bld->static_texture_state->target)) { 965 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) { 966 /* add cube layer to face */ 967 z00 = z01 = z10 = z11 = z1 = 968 lp_build_add(&bld->int_coord_bld, coords[2], coords[3]); 969 } 970 else { 971 z00 = z01 = z10 = z11 = z1 = coords[2]; /* cube face or layer */ 972 } 973 lp_build_name(z00, "tex.z0.layer"); 974 lp_build_name(z1, "tex.z1.layer"); 975 } 976 } 977 else { 978 struct lp_build_if_state edge_if; 979 LLVMTypeRef int1t; 980 LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2]; 981 LLVMValueRef coord, have_edge, have_corner; 982 LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, fall_off_y; 983 LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp; 984 LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped; 985 LLVMValueRef face = coords[2]; 986 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5f); 987 LLVMValueRef length_minus_one = lp_build_sub(ivec_bld, width_vec, ivec_bld->one); 988 /* XXX drop height calcs. Could (should) do this without seamless filtering too */ 989 height_vec = width_vec; 990 flt_height_vec = flt_width_vec; 991 992 /* XXX the overflow logic is actually sort of duplicated with trilinear, 993 * since an overflow in one mip should also have a corresponding overflow 994 * in another. 995 */ 996 /* should always have normalized coords, and offsets are undefined */ 997 assert(bld->static_sampler_state->normalized_coords); 998 coord = lp_build_mul(coord_bld, coords[0], flt_width_vec); 999 /* instead of clamp, build mask if overflowed */ 1000 coord = lp_build_sub(coord_bld, coord, half); 1001 /* convert to int, compute lerp weight */ 1002 /* not ideal with AVX (and no AVX2) */ 1003 lp_build_ifloor_fract(coord_bld, coord, &x0, &s_fpart); 1004 x1 = lp_build_add(ivec_bld, x0, ivec_bld->one); 1005 coord = lp_build_mul(coord_bld, coords[1], flt_height_vec); 1006 coord = lp_build_sub(coord_bld, coord, half); 1007 lp_build_ifloor_fract(coord_bld, coord, &y0, &t_fpart); 1008 y1 = lp_build_add(ivec_bld, y0, ivec_bld->one); 1009 1010 fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero); 1011 fall_off[1] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, x1, length_minus_one); 1012 fall_off[2] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, y0, ivec_bld->zero); 1013 fall_off[3] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, y1, length_minus_one); 1014 1015 fall_off_x = lp_build_or(ivec_bld, fall_off[0], fall_off[1]); 1016 fall_off_y = lp_build_or(ivec_bld, fall_off[2], fall_off[3]); 1017 have_edge = lp_build_or(ivec_bld, fall_off_x, fall_off_y); 1018 have_edge = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_edge); 1019 1020 /* needed for accurate corner filtering branch later, rely on 0 init */ 1021 int1t = LLVMInt1TypeInContext(bld->gallivm->context); 1022 have_corners = lp_build_alloca(bld->gallivm, int1t, "have_corner"); 1023 1024 for (texel_index = 0; texel_index < 4; texel_index++) { 1025 xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs"); 1026 ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys"); 1027 zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs"); 1028 } 1029 1030 lp_build_if(&edge_if, bld->gallivm, have_edge); 1031 1032 have_corner = lp_build_and(ivec_bld, fall_off_x, fall_off_y); 1033 have_corner = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_corner); 1034 LLVMBuildStore(builder, have_corner, have_corners); 1035 1036 /* 1037 * Need to feed clamped values here for cheap corner handling, 1038 * but only for y coord (as when falling off both edges we only 1039 * fall off the x one) - this should be sufficient. 1040 */ 1041 y0_clamped = lp_build_max(ivec_bld, y0, ivec_bld->zero); 1042 y1_clamped = lp_build_min(ivec_bld, y1, length_minus_one); 1043 1044 /* 1045 * Get all possible new coords. 1046 */ 1047 lp_build_cube_new_coords(ivec_bld, face, 1048 x0, x1, y0_clamped, y1_clamped, 1049 length_minus_one, 1050 new_faces, new_xcoords, new_ycoords); 1051 1052 /* handle fall off x-, x+ direction */ 1053 /* determine new coords, face (not both fall_off vars can be true at same time) */ 1054 x00 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][0], x0); 1055 y00 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][0], y0_clamped); 1056 x10 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][1], x0); 1057 y10 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][1], y1_clamped); 1058 x01 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][0], x1); 1059 y01 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][0], y0_clamped); 1060 x11 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][1], x1); 1061 y11 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][1], y1_clamped); 1062 1063 z00 = z10 = lp_build_select(ivec_bld, fall_off[0], new_faces[0], face); 1064 z01 = z11 = lp_build_select(ivec_bld, fall_off[1], new_faces[1], face); 1065 1066 /* handle fall off y-, y+ direction */ 1067 /* 1068 * Cheap corner logic: just hack up things so a texel doesn't fall 1069 * off both sides (which means filter weights will be wrong but we'll only 1070 * use valid texels in the filter). 1071 * This means however (y) coords must additionally be clamped (see above). 1072 * This corner handling should be fully OpenGL (but not d3d10) compliant. 1073 */ 1074 fall_off_ym_notxm = lp_build_andnot(ivec_bld, fall_off[2], fall_off[0]); 1075 fall_off_ym_notxp = lp_build_andnot(ivec_bld, fall_off[2], fall_off[1]); 1076 fall_off_yp_notxm = lp_build_andnot(ivec_bld, fall_off[3], fall_off[0]); 1077 fall_off_yp_notxp = lp_build_andnot(ivec_bld, fall_off[3], fall_off[1]); 1078 1079 x00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_xcoords[2][0], x00); 1080 y00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_ycoords[2][0], y00); 1081 x01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_xcoords[2][1], x01); 1082 y01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_ycoords[2][1], y01); 1083 x10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_xcoords[3][0], x10); 1084 y10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_ycoords[3][0], y10); 1085 x11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_xcoords[3][1], x11); 1086 y11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_ycoords[3][1], y11); 1087 1088 z00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_faces[2], z00); 1089 z01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_faces[2], z01); 1090 z10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_faces[3], z10); 1091 z11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_faces[3], z11); 1092 1093 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) { 1094 /* now can add cube layer to face (per sample) */ 1095 z00 = lp_build_add(ivec_bld, z00, coords[3]); 1096 z01 = lp_build_add(ivec_bld, z01, coords[3]); 1097 z10 = lp_build_add(ivec_bld, z10, coords[3]); 1098 z11 = lp_build_add(ivec_bld, z11, coords[3]); 1099 } 1100 1101 LLVMBuildStore(builder, x00, xs[0]); 1102 LLVMBuildStore(builder, x01, xs[1]); 1103 LLVMBuildStore(builder, x10, xs[2]); 1104 LLVMBuildStore(builder, x11, xs[3]); 1105 LLVMBuildStore(builder, y00, ys[0]); 1106 LLVMBuildStore(builder, y01, ys[1]); 1107 LLVMBuildStore(builder, y10, ys[2]); 1108 LLVMBuildStore(builder, y11, ys[3]); 1109 LLVMBuildStore(builder, z00, zs[0]); 1110 LLVMBuildStore(builder, z01, zs[1]); 1111 LLVMBuildStore(builder, z10, zs[2]); 1112 LLVMBuildStore(builder, z11, zs[3]); 1113 1114 lp_build_else(&edge_if); 1115 1116 LLVMBuildStore(builder, x0, xs[0]); 1117 LLVMBuildStore(builder, x1, xs[1]); 1118 LLVMBuildStore(builder, x0, xs[2]); 1119 LLVMBuildStore(builder, x1, xs[3]); 1120 LLVMBuildStore(builder, y0, ys[0]); 1121 LLVMBuildStore(builder, y0, ys[1]); 1122 LLVMBuildStore(builder, y1, ys[2]); 1123 LLVMBuildStore(builder, y1, ys[3]); 1124 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) { 1125 LLVMValueRef cube_layer = lp_build_add(ivec_bld, face, coords[3]); 1126 LLVMBuildStore(builder, cube_layer, zs[0]); 1127 LLVMBuildStore(builder, cube_layer, zs[1]); 1128 LLVMBuildStore(builder, cube_layer, zs[2]); 1129 LLVMBuildStore(builder, cube_layer, zs[3]); 1130 } 1131 else { 1132 LLVMBuildStore(builder, face, zs[0]); 1133 LLVMBuildStore(builder, face, zs[1]); 1134 LLVMBuildStore(builder, face, zs[2]); 1135 LLVMBuildStore(builder, face, zs[3]); 1136 } 1137 1138 lp_build_endif(&edge_if); 1139 1140 x00 = LLVMBuildLoad(builder, xs[0], ""); 1141 x01 = LLVMBuildLoad(builder, xs[1], ""); 1142 x10 = LLVMBuildLoad(builder, xs[2], ""); 1143 x11 = LLVMBuildLoad(builder, xs[3], ""); 1144 y00 = LLVMBuildLoad(builder, ys[0], ""); 1145 y01 = LLVMBuildLoad(builder, ys[1], ""); 1146 y10 = LLVMBuildLoad(builder, ys[2], ""); 1147 y11 = LLVMBuildLoad(builder, ys[3], ""); 1148 z00 = LLVMBuildLoad(builder, zs[0], ""); 1149 z01 = LLVMBuildLoad(builder, zs[1], ""); 1150 z10 = LLVMBuildLoad(builder, zs[2], ""); 1151 z11 = LLVMBuildLoad(builder, zs[3], ""); 1152 } 1153 1154 if (linear_mask) { 1155 /* 1156 * Whack filter weights into place. Whatever texel had more weight is 1157 * the one which should have been selected by nearest filtering hence 1158 * just use 100% weight for it. 1159 */ 1160 struct lp_build_context *c_bld = &bld->coord_bld; 1161 LLVMValueRef w1_mask, w1_weight; 1162 LLVMValueRef half = lp_build_const_vec(bld->gallivm, c_bld->type, 0.5f); 1163 1164 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, s_fpart, half); 1165 /* this select is really just a "and" */ 1166 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero); 1167 s_fpart = lp_build_select(c_bld, linear_mask, s_fpart, w1_weight); 1168 if (dims >= 2) { 1169 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, t_fpart, half); 1170 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero); 1171 t_fpart = lp_build_select(c_bld, linear_mask, t_fpart, w1_weight); 1172 if (dims == 3) { 1173 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, r_fpart, half); 1174 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero); 1175 r_fpart = lp_build_select(c_bld, linear_mask, r_fpart, w1_weight); 1176 } 1177 } 1178 } 1179 1180 /* 1181 * Get texture colors. 1182 */ 1183 /* get x0/x1 texels */ 1184 lp_build_sample_texel_soa(bld, 1185 width_vec, height_vec, depth_vec, 1186 x00, y00, z00, 1187 row_stride_vec, img_stride_vec, 1188 data_ptr, mipoffsets, neighbors[0][0]); 1189 lp_build_sample_texel_soa(bld, 1190 width_vec, height_vec, depth_vec, 1191 x01, y01, z01, 1192 row_stride_vec, img_stride_vec, 1193 data_ptr, mipoffsets, neighbors[0][1]); 1194 1195 if (dims == 1) { 1196 assert(!is_gather); 1197 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) { 1198 /* Interpolate two samples from 1D image to produce one color */ 1199 for (chan = 0; chan < 4; chan++) { 1200 colors_out[chan] = lp_build_lerp(texel_bld, s_fpart, 1201 neighbors[0][0][chan], 1202 neighbors[0][1][chan], 1203 0); 1204 } 1205 } 1206 else { 1207 LLVMValueRef cmpval0, cmpval1; 1208 cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]); 1209 cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]); 1210 /* simplified lerp, AND mask with weight and add */ 1211 colors_out[0] = lp_build_masklerp(texel_bld, s_fpart, 1212 cmpval0, cmpval1); 1213 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0]; 1214 } 1215 } 1216 else { 1217 /* 2D/3D texture */ 1218 struct lp_build_if_state corner_if; 1219 LLVMValueRef colors0[4], colorss[4]; 1220 1221 /* get x0/x1 texels at y1 */ 1222 lp_build_sample_texel_soa(bld, 1223 width_vec, height_vec, depth_vec, 1224 x10, y10, z10, 1225 row_stride_vec, img_stride_vec, 1226 data_ptr, mipoffsets, neighbors[1][0]); 1227 lp_build_sample_texel_soa(bld, 1228 width_vec, height_vec, depth_vec, 1229 x11, y11, z11, 1230 row_stride_vec, img_stride_vec, 1231 data_ptr, mipoffsets, neighbors[1][1]); 1232 1233 /* 1234 * To avoid having to duplicate linear_mask / fetch code use 1235 * another branch (with corner condition though edge would work 1236 * as well) here. 1237 */ 1238 if (accurate_cube_corners) { 1239 LLVMValueRef w00, w01, w10, w11, wx0, wy0; 1240 LLVMValueRef c_weight, c00, c01, c10, c11; 1241 LLVMValueRef have_corner, one_third, tmp; 1242 1243 colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs"); 1244 colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs"); 1245 colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs"); 1246 colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs"); 1247 1248 have_corner = LLVMBuildLoad(builder, have_corners, ""); 1249 1250 lp_build_if(&corner_if, bld->gallivm, have_corner); 1251 1252 /* 1253 * we can't use standard 2d lerp as we need per-element weight 1254 * in case of corners, so just calculate bilinear result as 1255 * w00*s00 + w01*s01 + w10*s10 + w11*s11. 1256 * (This is actually less work than using 2d lerp, 7 vs. 9 instructions, 1257 * however calculating the weights needs another 6, so actually probably 1258 * not slower than 2d lerp only for 4 channels as weights only need 1259 * to be calculated once - of course fixing the weights has additional cost.) 1260 */ 1261 wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart); 1262 wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart); 1263 w00 = lp_build_mul(coord_bld, wx0, wy0); 1264 w01 = lp_build_mul(coord_bld, s_fpart, wy0); 1265 w10 = lp_build_mul(coord_bld, wx0, t_fpart); 1266 w11 = lp_build_mul(coord_bld, s_fpart, t_fpart); 1267 1268 /* find corner weight */ 1269 c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]); 1270 c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero); 1271 c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]); 1272 c_weight = lp_build_select(coord_bld, c01, w01, c_weight); 1273 c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]); 1274 c_weight = lp_build_select(coord_bld, c10, w10, c_weight); 1275 c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]); 1276 c_weight = lp_build_select(coord_bld, c11, w11, c_weight); 1277 1278 /* 1279 * add 1/3 of the corner weight to each of the 3 other samples 1280 * and null out corner weight 1281 */ 1282 one_third = lp_build_const_vec(bld->gallivm, coord_bld->type, 1.0f/3.0f); 1283 c_weight = lp_build_mul(coord_bld, c_weight, one_third); 1284 w00 = lp_build_add(coord_bld, w00, c_weight); 1285 c00 = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, ""); 1286 w00 = lp_build_andnot(coord_bld, w00, c00); 1287 w01 = lp_build_add(coord_bld, w01, c_weight); 1288 c01 = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, ""); 1289 w01 = lp_build_andnot(coord_bld, w01, c01); 1290 w10 = lp_build_add(coord_bld, w10, c_weight); 1291 c10 = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, ""); 1292 w10 = lp_build_andnot(coord_bld, w10, c10); 1293 w11 = lp_build_add(coord_bld, w11, c_weight); 1294 c11 = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, ""); 1295 w11 = lp_build_andnot(coord_bld, w11, c11); 1296 1297 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) { 1298 for (chan = 0; chan < 4; chan++) { 1299 colors0[chan] = lp_build_mul(coord_bld, w00, neighbors[0][0][chan]); 1300 tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]); 1301 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]); 1302 tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]); 1303 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]); 1304 tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]); 1305 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]); 1306 } 1307 } 1308 else { 1309 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11; 1310 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]); 1311 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]); 1312 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]); 1313 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]); 1314 /* inputs to interpolation are just masks so just add masked weights together */ 1315 cmpval00 = LLVMBuildBitCast(builder, cmpval00, coord_bld->vec_type, ""); 1316 cmpval01 = LLVMBuildBitCast(builder, cmpval01, coord_bld->vec_type, ""); 1317 cmpval10 = LLVMBuildBitCast(builder, cmpval10, coord_bld->vec_type, ""); 1318 cmpval11 = LLVMBuildBitCast(builder, cmpval11, coord_bld->vec_type, ""); 1319 colors0[0] = lp_build_and(coord_bld, w00, cmpval00); 1320 tmp = lp_build_and(coord_bld, w01, cmpval01); 1321 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]); 1322 tmp = lp_build_and(coord_bld, w10, cmpval10); 1323 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]); 1324 tmp = lp_build_and(coord_bld, w11, cmpval11); 1325 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]); 1326 colors0[1] = colors0[2] = colors0[3] = colors0[0]; 1327 } 1328 1329 LLVMBuildStore(builder, colors0[0], colorss[0]); 1330 LLVMBuildStore(builder, colors0[1], colorss[1]); 1331 LLVMBuildStore(builder, colors0[2], colorss[2]); 1332 LLVMBuildStore(builder, colors0[3], colorss[3]); 1333 1334 lp_build_else(&corner_if); 1335 } 1336 1337 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) { 1338 if (is_gather) { 1339 /* 1340 * Just assign the red channel (no component selection yet). 1341 * This is a bit hackish, we usually do the swizzle at the 1342 * end of sampling (much less values to swizzle), but this 1343 * obviously cannot work when using gather. 1344 */ 1345 unsigned chan_swiz = bld->static_texture_state->swizzle_r; 1346 colors0[0] = lp_build_swizzle_soa_channel(texel_bld, 1347 neighbors[1][0], 1348 chan_swiz); 1349 colors0[1] = lp_build_swizzle_soa_channel(texel_bld, 1350 neighbors[1][1], 1351 chan_swiz); 1352 colors0[2] = lp_build_swizzle_soa_channel(texel_bld, 1353 neighbors[0][1], 1354 chan_swiz); 1355 colors0[3] = lp_build_swizzle_soa_channel(texel_bld, 1356 neighbors[0][0], 1357 chan_swiz); 1358 } 1359 else { 1360 /* Bilinear interpolate the four samples from the 2D image / 3D slice */ 1361 for (chan = 0; chan < 4; chan++) { 1362 colors0[chan] = lp_build_lerp_2d(texel_bld, 1363 s_fpart, t_fpart, 1364 neighbors[0][0][chan], 1365 neighbors[0][1][chan], 1366 neighbors[1][0][chan], 1367 neighbors[1][1][chan], 1368 0); 1369 } 1370 } 1371 } 1372 else { 1373 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11; 1374 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]); 1375 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]); 1376 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]); 1377 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]); 1378 1379 if (is_gather) { 1380 /* more hacks for swizzling, should be X, ONE or ZERO... */ 1381 unsigned chan_swiz = bld->static_texture_state->swizzle_r; 1382 if (chan_swiz <= PIPE_SWIZZLE_W) { 1383 colors0[0] = lp_build_select(texel_bld, cmpval10, 1384 texel_bld->one, texel_bld->zero); 1385 colors0[1] = lp_build_select(texel_bld, cmpval11, 1386 texel_bld->one, texel_bld->zero); 1387 colors0[2] = lp_build_select(texel_bld, cmpval01, 1388 texel_bld->one, texel_bld->zero); 1389 colors0[3] = lp_build_select(texel_bld, cmpval00, 1390 texel_bld->one, texel_bld->zero); 1391 } 1392 else if (chan_swiz == PIPE_SWIZZLE_0) { 1393 colors0[0] = colors0[1] = colors0[2] = colors0[3] = 1394 texel_bld->zero; 1395 } 1396 else { 1397 colors0[0] = colors0[1] = colors0[2] = colors0[3] = 1398 texel_bld->one; 1399 } 1400 } 1401 else { 1402 colors0[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart, 1403 cmpval00, cmpval01, cmpval10, cmpval11); 1404 colors0[1] = colors0[2] = colors0[3] = colors0[0]; 1405 } 1406 } 1407 1408 if (accurate_cube_corners) { 1409 LLVMBuildStore(builder, colors0[0], colorss[0]); 1410 LLVMBuildStore(builder, colors0[1], colorss[1]); 1411 LLVMBuildStore(builder, colors0[2], colorss[2]); 1412 LLVMBuildStore(builder, colors0[3], colorss[3]); 1413 1414 lp_build_endif(&corner_if); 1415 1416 colors0[0] = LLVMBuildLoad(builder, colorss[0], ""); 1417 colors0[1] = LLVMBuildLoad(builder, colorss[1], ""); 1418 colors0[2] = LLVMBuildLoad(builder, colorss[2], ""); 1419 colors0[3] = LLVMBuildLoad(builder, colorss[3], ""); 1420 } 1421 1422 if (dims == 3) { 1423 LLVMValueRef neighbors1[2][2][4]; 1424 LLVMValueRef colors1[4]; 1425 1426 assert(!is_gather); 1427 1428 /* get x0/x1/y0/y1 texels at z1 */ 1429 lp_build_sample_texel_soa(bld, 1430 width_vec, height_vec, depth_vec, 1431 x00, y00, z1, 1432 row_stride_vec, img_stride_vec, 1433 data_ptr, mipoffsets, neighbors1[0][0]); 1434 lp_build_sample_texel_soa(bld, 1435 width_vec, height_vec, depth_vec, 1436 x01, y01, z1, 1437 row_stride_vec, img_stride_vec, 1438 data_ptr, mipoffsets, neighbors1[0][1]); 1439 lp_build_sample_texel_soa(bld, 1440 width_vec, height_vec, depth_vec, 1441 x10, y10, z1, 1442 row_stride_vec, img_stride_vec, 1443 data_ptr, mipoffsets, neighbors1[1][0]); 1444 lp_build_sample_texel_soa(bld, 1445 width_vec, height_vec, depth_vec, 1446 x11, y11, z1, 1447 row_stride_vec, img_stride_vec, 1448 data_ptr, mipoffsets, neighbors1[1][1]); 1449 1450 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) { 1451 /* Bilinear interpolate the four samples from the second Z slice */ 1452 for (chan = 0; chan < 4; chan++) { 1453 colors1[chan] = lp_build_lerp_2d(texel_bld, 1454 s_fpart, t_fpart, 1455 neighbors1[0][0][chan], 1456 neighbors1[0][1][chan], 1457 neighbors1[1][0][chan], 1458 neighbors1[1][1][chan], 1459 0); 1460 } 1461 /* Linearly interpolate the two samples from the two 3D slices */ 1462 for (chan = 0; chan < 4; chan++) { 1463 colors_out[chan] = lp_build_lerp(texel_bld, 1464 r_fpart, 1465 colors0[chan], colors1[chan], 1466 0); 1467 } 1468 } 1469 else { 1470 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11; 1471 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]); 1472 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]); 1473 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]); 1474 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]); 1475 colors1[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart, 1476 cmpval00, cmpval01, cmpval10, cmpval11); 1477 /* Linearly interpolate the two samples from the two 3D slices */ 1478 colors_out[0] = lp_build_lerp(texel_bld, 1479 r_fpart, 1480 colors0[0], colors1[0], 1481 0); 1482 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0]; 1483 } 1484 } 1485 else { 1486 /* 2D tex */ 1487 for (chan = 0; chan < 4; chan++) { 1488 colors_out[chan] = colors0[chan]; 1489 } 1490 } 1491 } 1492 } 1493 1494 1495 /** 1496 * Sample the texture/mipmap using given image filter and mip filter. 1497 * ilevel0 and ilevel1 indicate the two mipmap levels to sample 1498 * from (vectors or scalars). 1499 * If we're using nearest miplevel sampling the '1' values will be null/unused. 1500 */ 1501 static void 1502 lp_build_sample_mipmap(struct lp_build_sample_context *bld, 1503 unsigned img_filter, 1504 unsigned mip_filter, 1505 boolean is_gather, 1506 LLVMValueRef *coords, 1507 const LLVMValueRef *offsets, 1508 LLVMValueRef ilevel0, 1509 LLVMValueRef ilevel1, 1510 LLVMValueRef lod_fpart, 1511 LLVMValueRef *colors_out) 1512 { 1513 LLVMBuilderRef builder = bld->gallivm->builder; 1514 LLVMValueRef size0 = NULL; 1515 LLVMValueRef size1 = NULL; 1516 LLVMValueRef row_stride0_vec = NULL; 1517 LLVMValueRef row_stride1_vec = NULL; 1518 LLVMValueRef img_stride0_vec = NULL; 1519 LLVMValueRef img_stride1_vec = NULL; 1520 LLVMValueRef data_ptr0 = NULL; 1521 LLVMValueRef data_ptr1 = NULL; 1522 LLVMValueRef mipoff0 = NULL; 1523 LLVMValueRef mipoff1 = NULL; 1524 LLVMValueRef colors0[4], colors1[4]; 1525 unsigned chan; 1526 1527 /* sample the first mipmap level */ 1528 lp_build_mipmap_level_sizes(bld, ilevel0, 1529 &size0, 1530 &row_stride0_vec, &img_stride0_vec); 1531 if (bld->num_mips == 1) { 1532 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); 1533 } 1534 else { 1535 /* This path should work for num_lods 1 too but slightly less efficient */ 1536 data_ptr0 = bld->base_ptr; 1537 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0); 1538 } 1539 if (img_filter == PIPE_TEX_FILTER_NEAREST) { 1540 lp_build_sample_image_nearest(bld, size0, 1541 row_stride0_vec, img_stride0_vec, 1542 data_ptr0, mipoff0, coords, offsets, 1543 colors0); 1544 } 1545 else { 1546 assert(img_filter == PIPE_TEX_FILTER_LINEAR); 1547 lp_build_sample_image_linear(bld, is_gather, size0, NULL, 1548 row_stride0_vec, img_stride0_vec, 1549 data_ptr0, mipoff0, coords, offsets, 1550 colors0); 1551 } 1552 1553 /* Store the first level's colors in the output variables */ 1554 for (chan = 0; chan < 4; chan++) { 1555 LLVMBuildStore(builder, colors0[chan], colors_out[chan]); 1556 } 1557 1558 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { 1559 struct lp_build_if_state if_ctx; 1560 LLVMValueRef need_lerp; 1561 1562 /* need_lerp = lod_fpart > 0 */ 1563 if (bld->num_lods == 1) { 1564 need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT, 1565 lod_fpart, bld->lodf_bld.zero, 1566 "need_lerp"); 1567 } 1568 else { 1569 /* 1570 * We'll do mip filtering if any of the quads (or individual 1571 * pixel in case of per-pixel lod) need it. 1572 * It might be better to split the vectors here and only fetch/filter 1573 * quads which need it (if there's one lod per quad). 1574 */ 1575 need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type, 1576 PIPE_FUNC_GREATER, 1577 lod_fpart, bld->lodf_bld.zero); 1578 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp); 1579 } 1580 1581 lp_build_if(&if_ctx, bld->gallivm, need_lerp); 1582 { 1583 /* 1584 * We unfortunately need to clamp lod_fpart here since we can get 1585 * negative values which would screw up filtering if not all 1586 * lod_fpart values have same sign. 1587 */ 1588 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart, 1589 bld->lodf_bld.zero); 1590 /* sample the second mipmap level */ 1591 lp_build_mipmap_level_sizes(bld, ilevel1, 1592 &size1, 1593 &row_stride1_vec, &img_stride1_vec); 1594 if (bld->num_mips == 1) { 1595 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); 1596 } 1597 else { 1598 data_ptr1 = bld->base_ptr; 1599 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1); 1600 } 1601 if (img_filter == PIPE_TEX_FILTER_NEAREST) { 1602 lp_build_sample_image_nearest(bld, size1, 1603 row_stride1_vec, img_stride1_vec, 1604 data_ptr1, mipoff1, coords, offsets, 1605 colors1); 1606 } 1607 else { 1608 lp_build_sample_image_linear(bld, FALSE, size1, NULL, 1609 row_stride1_vec, img_stride1_vec, 1610 data_ptr1, mipoff1, coords, offsets, 1611 colors1); 1612 } 1613 1614 /* interpolate samples from the two mipmap levels */ 1615 1616 if (bld->num_lods != bld->coord_type.length) 1617 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm, 1618 bld->lodf_bld.type, 1619 bld->texel_bld.type, 1620 lod_fpart); 1621 1622 for (chan = 0; chan < 4; chan++) { 1623 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, 1624 colors0[chan], colors1[chan], 1625 0); 1626 LLVMBuildStore(builder, colors0[chan], colors_out[chan]); 1627 } 1628 } 1629 lp_build_endif(&if_ctx); 1630 } 1631 } 1632 1633 1634 /** 1635 * Sample the texture/mipmap using given mip filter, and using 1636 * both nearest and linear filtering at the same time depending 1637 * on linear_mask. 1638 * lod can be per quad but linear_mask is always per pixel. 1639 * ilevel0 and ilevel1 indicate the two mipmap levels to sample 1640 * from (vectors or scalars). 1641 * If we're using nearest miplevel sampling the '1' values will be null/unused. 1642 */ 1643 static void 1644 lp_build_sample_mipmap_both(struct lp_build_sample_context *bld, 1645 LLVMValueRef linear_mask, 1646 unsigned mip_filter, 1647 LLVMValueRef *coords, 1648 const LLVMValueRef *offsets, 1649 LLVMValueRef ilevel0, 1650 LLVMValueRef ilevel1, 1651 LLVMValueRef lod_fpart, 1652 LLVMValueRef lod_positive, 1653 LLVMValueRef *colors_out) 1654 { 1655 LLVMBuilderRef builder = bld->gallivm->builder; 1656 LLVMValueRef size0 = NULL; 1657 LLVMValueRef size1 = NULL; 1658 LLVMValueRef row_stride0_vec = NULL; 1659 LLVMValueRef row_stride1_vec = NULL; 1660 LLVMValueRef img_stride0_vec = NULL; 1661 LLVMValueRef img_stride1_vec = NULL; 1662 LLVMValueRef data_ptr0 = NULL; 1663 LLVMValueRef data_ptr1 = NULL; 1664 LLVMValueRef mipoff0 = NULL; 1665 LLVMValueRef mipoff1 = NULL; 1666 LLVMValueRef colors0[4], colors1[4]; 1667 unsigned chan; 1668 1669 /* sample the first mipmap level */ 1670 lp_build_mipmap_level_sizes(bld, ilevel0, 1671 &size0, 1672 &row_stride0_vec, &img_stride0_vec); 1673 if (bld->num_mips == 1) { 1674 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); 1675 } 1676 else { 1677 /* This path should work for num_lods 1 too but slightly less efficient */ 1678 data_ptr0 = bld->base_ptr; 1679 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0); 1680 } 1681 1682 lp_build_sample_image_linear(bld, FALSE, size0, linear_mask, 1683 row_stride0_vec, img_stride0_vec, 1684 data_ptr0, mipoff0, coords, offsets, 1685 colors0); 1686 1687 /* Store the first level's colors in the output variables */ 1688 for (chan = 0; chan < 4; chan++) { 1689 LLVMBuildStore(builder, colors0[chan], colors_out[chan]); 1690 } 1691 1692 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { 1693 struct lp_build_if_state if_ctx; 1694 LLVMValueRef need_lerp; 1695 1696 /* 1697 * We'll do mip filtering if any of the quads (or individual 1698 * pixel in case of per-pixel lod) need it. 1699 * Note using lod_positive here not lod_fpart since it may be the same 1700 * condition as that used in the outer "if" in the caller hence llvm 1701 * should be able to merge the branches in this case. 1702 */ 1703 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_positive); 1704 1705 lp_build_if(&if_ctx, bld->gallivm, need_lerp); 1706 { 1707 /* 1708 * We unfortunately need to clamp lod_fpart here since we can get 1709 * negative values which would screw up filtering if not all 1710 * lod_fpart values have same sign. 1711 */ 1712 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart, 1713 bld->lodf_bld.zero); 1714 /* sample the second mipmap level */ 1715 lp_build_mipmap_level_sizes(bld, ilevel1, 1716 &size1, 1717 &row_stride1_vec, &img_stride1_vec); 1718 if (bld->num_mips == 1) { 1719 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); 1720 } 1721 else { 1722 data_ptr1 = bld->base_ptr; 1723 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1); 1724 } 1725 1726 lp_build_sample_image_linear(bld, FALSE, size1, linear_mask, 1727 row_stride1_vec, img_stride1_vec, 1728 data_ptr1, mipoff1, coords, offsets, 1729 colors1); 1730 1731 /* interpolate samples from the two mipmap levels */ 1732 1733 if (bld->num_lods != bld->coord_type.length) 1734 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm, 1735 bld->lodf_bld.type, 1736 bld->texel_bld.type, 1737 lod_fpart); 1738 1739 for (chan = 0; chan < 4; chan++) { 1740 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, 1741 colors0[chan], colors1[chan], 1742 0); 1743 LLVMBuildStore(builder, colors0[chan], colors_out[chan]); 1744 } 1745 } 1746 lp_build_endif(&if_ctx); 1747 } 1748 } 1749 1750 1751 /** 1752 * Build (per-coord) layer value. 1753 * Either clamp layer to valid values or fill in optional out_of_bounds 1754 * value and just return value unclamped. 1755 */ 1756 static LLVMValueRef 1757 lp_build_layer_coord(struct lp_build_sample_context *bld, 1758 unsigned texture_unit, 1759 boolean is_cube_array, 1760 LLVMValueRef layer, 1761 LLVMValueRef *out_of_bounds) 1762 { 1763 LLVMValueRef num_layers; 1764 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 1765 1766 num_layers = bld->dynamic_state->depth(bld->dynamic_state, bld->gallivm, 1767 bld->context_ptr, texture_unit); 1768 1769 if (out_of_bounds) { 1770 LLVMValueRef out1, out; 1771 assert(!is_cube_array); 1772 num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers); 1773 out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero); 1774 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers); 1775 *out_of_bounds = lp_build_or(int_coord_bld, out, out1); 1776 return layer; 1777 } 1778 else { 1779 LLVMValueRef maxlayer; 1780 LLVMValueRef s = is_cube_array ? lp_build_const_int32(bld->gallivm, 6) : 1781 bld->int_bld.one; 1782 maxlayer = lp_build_sub(&bld->int_bld, num_layers, s); 1783 maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer); 1784 return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer); 1785 } 1786 } 1787 1788 1789 /** 1790 * Calculate cube face, lod, mip levels. 1791 */ 1792 static void 1793 lp_build_sample_common(struct lp_build_sample_context *bld, 1794 unsigned texture_index, 1795 unsigned sampler_index, 1796 LLVMValueRef *coords, 1797 const struct lp_derivatives *derivs, /* optional */ 1798 LLVMValueRef lod_bias, /* optional */ 1799 LLVMValueRef explicit_lod, /* optional */ 1800 LLVMValueRef *lod_pos_or_zero, 1801 LLVMValueRef *lod_fpart, 1802 LLVMValueRef *ilevel0, 1803 LLVMValueRef *ilevel1) 1804 { 1805 const unsigned mip_filter = bld->static_sampler_state->min_mip_filter; 1806 const unsigned min_filter = bld->static_sampler_state->min_img_filter; 1807 const unsigned mag_filter = bld->static_sampler_state->mag_img_filter; 1808 const unsigned target = bld->static_texture_state->target; 1809 LLVMValueRef first_level, cube_rho = NULL; 1810 LLVMValueRef lod_ipart = NULL; 1811 struct lp_derivatives cube_derivs; 1812 1813 /* 1814 printf("%s mip %d min %d mag %d\n", __FUNCTION__, 1815 mip_filter, min_filter, mag_filter); 1816 */ 1817 1818 /* 1819 * Choose cube face, recompute texcoords for the chosen face and 1820 * compute rho here too (as it requires transform of derivatives). 1821 */ 1822 if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY) { 1823 boolean need_derivs; 1824 need_derivs = ((min_filter != mag_filter || 1825 mip_filter != PIPE_TEX_MIPFILTER_NONE) && 1826 !bld->static_sampler_state->min_max_lod_equal && 1827 !explicit_lod); 1828 lp_build_cube_lookup(bld, coords, derivs, &cube_rho, &cube_derivs, need_derivs); 1829 derivs = &cube_derivs; 1830 if (target == PIPE_TEXTURE_CUBE_ARRAY) { 1831 /* calculate cube layer coord now */ 1832 LLVMValueRef layer = lp_build_iround(&bld->coord_bld, coords[3]); 1833 LLVMValueRef six = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 6); 1834 layer = lp_build_mul(&bld->int_coord_bld, layer, six); 1835 coords[3] = lp_build_layer_coord(bld, texture_index, TRUE, layer, NULL); 1836 /* because of seamless filtering can't add it to face (coords[2]) here. */ 1837 } 1838 } 1839 else if (target == PIPE_TEXTURE_1D_ARRAY || 1840 target == PIPE_TEXTURE_2D_ARRAY) { 1841 coords[2] = lp_build_iround(&bld->coord_bld, coords[2]); 1842 coords[2] = lp_build_layer_coord(bld, texture_index, FALSE, coords[2], NULL); 1843 } 1844 1845 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) { 1846 /* 1847 * Clamp p coords to [0,1] for fixed function depth texture format here. 1848 * Technically this is not entirely correct for unorm depth as the ref value 1849 * should be converted to the depth format (quantization!) and comparison 1850 * then done in texture format. This would actually help performance (since 1851 * only need to do it once and could save the per-sample conversion of texels 1852 * to floats instead), but it would need more messy code (would need to push 1853 * at least some bits down to actual fetch so conversion could be skipped, 1854 * and would have ugly interaction with border color, would need to convert 1855 * border color to that format too or do some other tricks to make it work). 1856 */ 1857 const struct util_format_description *format_desc = bld->format_desc; 1858 unsigned chan_type; 1859 /* not entirely sure we couldn't end up with non-valid swizzle here */ 1860 chan_type = format_desc->swizzle[0] <= PIPE_SWIZZLE_W ? 1861 format_desc->channel[format_desc->swizzle[0]].type : 1862 UTIL_FORMAT_TYPE_FLOAT; 1863 if (chan_type != UTIL_FORMAT_TYPE_FLOAT) { 1864 coords[4] = lp_build_clamp(&bld->coord_bld, coords[4], 1865 bld->coord_bld.zero, bld->coord_bld.one); 1866 } 1867 } 1868 1869 /* 1870 * Compute the level of detail (float). 1871 */ 1872 if (min_filter != mag_filter || 1873 mip_filter != PIPE_TEX_MIPFILTER_NONE) { 1874 /* Need to compute lod either to choose mipmap levels or to 1875 * distinguish between minification/magnification with one mipmap level. 1876 */ 1877 lp_build_lod_selector(bld, texture_index, sampler_index, 1878 coords[0], coords[1], coords[2], cube_rho, 1879 derivs, lod_bias, explicit_lod, 1880 mip_filter, 1881 &lod_ipart, lod_fpart, lod_pos_or_zero); 1882 } else { 1883 lod_ipart = bld->lodi_bld.zero; 1884 *lod_pos_or_zero = bld->lodi_bld.zero; 1885 } 1886 1887 if (bld->num_lods != bld->num_mips) { 1888 /* only makes sense if there's just a single mip level */ 1889 assert(bld->num_mips == 1); 1890 lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1); 1891 } 1892 1893 /* 1894 * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1 1895 */ 1896 switch (mip_filter) { 1897 default: 1898 assert(0 && "bad mip_filter value in lp_build_sample_soa()"); 1899 /* fall-through */ 1900 case PIPE_TEX_MIPFILTER_NONE: 1901 /* always use mip level 0 */ 1902 first_level = bld->dynamic_state->first_level(bld->dynamic_state, 1903 bld->gallivm, bld->context_ptr, 1904 texture_index); 1905 first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level); 1906 *ilevel0 = first_level; 1907 break; 1908 case PIPE_TEX_MIPFILTER_NEAREST: 1909 assert(lod_ipart); 1910 lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL); 1911 break; 1912 case PIPE_TEX_MIPFILTER_LINEAR: 1913 assert(lod_ipart); 1914 assert(*lod_fpart); 1915 lp_build_linear_mip_levels(bld, texture_index, 1916 lod_ipart, lod_fpart, 1917 ilevel0, ilevel1); 1918 break; 1919 } 1920 } 1921 1922 static void 1923 lp_build_clamp_border_color(struct lp_build_sample_context *bld, 1924 unsigned sampler_unit) 1925 { 1926 struct gallivm_state *gallivm = bld->gallivm; 1927 LLVMBuilderRef builder = gallivm->builder; 1928 LLVMValueRef border_color_ptr = 1929 bld->dynamic_state->border_color(bld->dynamic_state, gallivm, 1930 bld->context_ptr, sampler_unit); 1931 LLVMValueRef border_color; 1932 const struct util_format_description *format_desc = bld->format_desc; 1933 struct lp_type vec4_type = bld->texel_type; 1934 struct lp_build_context vec4_bld; 1935 LLVMValueRef min_clamp = NULL; 1936 LLVMValueRef max_clamp = NULL; 1937 1938 /* 1939 * For normalized format need to clamp border color (technically 1940 * probably should also quantize the data). Really sucks doing this 1941 * here but can't avoid at least for now since this is part of 1942 * sampler state and texture format is part of sampler_view state. 1943 * GL expects also expects clamping for uint/sint formats too so 1944 * do that as well (d3d10 can't end up here with uint/sint since it 1945 * only supports them with ld). 1946 */ 1947 vec4_type.length = 4; 1948 lp_build_context_init(&vec4_bld, gallivm, vec4_type); 1949 1950 /* 1951 * Vectorized clamping of border color. Loading is a bit of a hack since 1952 * we just cast the pointer to float array to pointer to vec4 1953 * (int or float). 1954 */ 1955 border_color_ptr = lp_build_array_get_ptr(gallivm, border_color_ptr, 1956 lp_build_const_int32(gallivm, 0)); 1957 border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr, 1958 LLVMPointerType(vec4_bld.vec_type, 0), ""); 1959 border_color = LLVMBuildLoad(builder, border_color_ptr, ""); 1960 /* we don't have aligned type in the dynamic state unfortunately */ 1961 LLVMSetAlignment(border_color, 4); 1962 1963 /* 1964 * Instead of having some incredibly complex logic which will try to figure out 1965 * clamping necessary for each channel, simply use the first channel, and treat 1966 * mixed signed/unsigned normalized formats specially. 1967 * (Mixed non-normalized, which wouldn't work at all here, do not exist for a 1968 * good reason.) 1969 */ 1970 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) { 1971 int chan; 1972 /* d/s needs special handling because both present means just sampling depth */ 1973 if (util_format_is_depth_and_stencil(format_desc->format)) { 1974 chan = format_desc->swizzle[0]; 1975 } 1976 else { 1977 chan = util_format_get_first_non_void_channel(format_desc->format); 1978 } 1979 if (chan >= 0 && chan <= PIPE_SWIZZLE_W) { 1980 unsigned chan_type = format_desc->channel[chan].type; 1981 unsigned chan_norm = format_desc->channel[chan].normalized; 1982 unsigned chan_pure = format_desc->channel[chan].pure_integer; 1983 if (chan_type == UTIL_FORMAT_TYPE_SIGNED) { 1984 if (chan_norm) { 1985 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F); 1986 max_clamp = vec4_bld.one; 1987 } 1988 else if (chan_pure) { 1989 /* 1990 * Border color was stored as int, hence need min/max clamp 1991 * only if chan has less than 32 bits.. 1992 */ 1993 unsigned chan_size = format_desc->channel[chan].size; 1994 if (chan_size < 32) { 1995 min_clamp = lp_build_const_int_vec(gallivm, vec4_type, 1996 0 - (1 << (chan_size - 1))); 1997 max_clamp = lp_build_const_int_vec(gallivm, vec4_type, 1998 (1 << (chan_size - 1)) - 1); 1999 } 2000 } 2001 /* TODO: no idea about non-pure, non-normalized! */ 2002 } 2003 else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) { 2004 if (chan_norm) { 2005 min_clamp = vec4_bld.zero; 2006 max_clamp = vec4_bld.one; 2007 } 2008 /* 2009 * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24 2010 * we use Z32_FLOAT_S8X24 to imply sampling depth component 2011 * and ignoring stencil, which will blow up here if we try to 2012 * do a uint clamp in a float texel build... 2013 * And even if we had that format, mesa st also thinks using z24s8 2014 * means depth sampling ignoring stencil. 2015 */ 2016 else if (chan_pure) { 2017 /* 2018 * Border color was stored as uint, hence never need min 2019 * clamp, and only need max clamp if chan has less than 32 bits. 2020 */ 2021 unsigned chan_size = format_desc->channel[chan].size; 2022 if (chan_size < 32) { 2023 max_clamp = lp_build_const_int_vec(gallivm, vec4_type, 2024 (1 << chan_size) - 1); 2025 } 2026 /* TODO: no idea about non-pure, non-normalized! */ 2027 } 2028 } 2029 else if (chan_type == UTIL_FORMAT_TYPE_FIXED) { 2030 /* TODO: I have no idea what clamp this would need if any! */ 2031 } 2032 } 2033 /* mixed plain formats (or different pure size) */ 2034 switch (format_desc->format) { 2035 case PIPE_FORMAT_B10G10R10A2_UINT: 2036 case PIPE_FORMAT_R10G10B10A2_UINT: 2037 { 2038 unsigned max10 = (1 << 10) - 1; 2039 max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10, 2040 max10, (1 << 2) - 1, NULL); 2041 } 2042 break; 2043 case PIPE_FORMAT_R10SG10SB10SA2U_NORM: 2044 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F, 2045 -1.0F, 0.0F, NULL); 2046 max_clamp = vec4_bld.one; 2047 break; 2048 case PIPE_FORMAT_R8SG8SB8UX8U_NORM: 2049 case PIPE_FORMAT_R5SG5SB6U_NORM: 2050 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F, 2051 0.0F, 0.0F, NULL); 2052 max_clamp = vec4_bld.one; 2053 break; 2054 default: 2055 break; 2056 } 2057 } 2058 else { 2059 /* cannot figure this out from format description */ 2060 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 2061 /* s3tc formats are always unorm */ 2062 min_clamp = vec4_bld.zero; 2063 max_clamp = vec4_bld.one; 2064 } 2065 else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC || 2066 format_desc->layout == UTIL_FORMAT_LAYOUT_ETC) { 2067 switch (format_desc->format) { 2068 case PIPE_FORMAT_RGTC1_UNORM: 2069 case PIPE_FORMAT_RGTC2_UNORM: 2070 case PIPE_FORMAT_LATC1_UNORM: 2071 case PIPE_FORMAT_LATC2_UNORM: 2072 case PIPE_FORMAT_ETC1_RGB8: 2073 min_clamp = vec4_bld.zero; 2074 max_clamp = vec4_bld.one; 2075 break; 2076 case PIPE_FORMAT_RGTC1_SNORM: 2077 case PIPE_FORMAT_RGTC2_SNORM: 2078 case PIPE_FORMAT_LATC1_SNORM: 2079 case PIPE_FORMAT_LATC2_SNORM: 2080 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F); 2081 max_clamp = vec4_bld.one; 2082 break; 2083 default: 2084 assert(0); 2085 break; 2086 } 2087 } 2088 /* 2089 * all others from subsampled/other group, though we don't care 2090 * about yuv (and should not have any from zs here) 2091 */ 2092 else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){ 2093 switch (format_desc->format) { 2094 case PIPE_FORMAT_R8G8_B8G8_UNORM: 2095 case PIPE_FORMAT_G8R8_G8B8_UNORM: 2096 case PIPE_FORMAT_G8R8_B8R8_UNORM: 2097 case PIPE_FORMAT_R8G8_R8B8_UNORM: 2098 case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */ 2099 min_clamp = vec4_bld.zero; 2100 max_clamp = vec4_bld.one; 2101 break; 2102 case PIPE_FORMAT_R8G8Bx_SNORM: 2103 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F); 2104 max_clamp = vec4_bld.one; 2105 break; 2106 /* 2107 * Note smallfloat formats usually don't need clamping 2108 * (they still have infinite range) however this is not 2109 * true for r11g11b10 and r9g9b9e5, which can't represent 2110 * negative numbers (and additionally r9g9b9e5 can't represent 2111 * very large numbers). d3d10 seems happy without clamping in 2112 * this case, but gl spec is pretty clear: "for floating 2113 * point and integer formats, border values are clamped to 2114 * the representable range of the format" so do that here. 2115 */ 2116 case PIPE_FORMAT_R11G11B10_FLOAT: 2117 min_clamp = vec4_bld.zero; 2118 break; 2119 case PIPE_FORMAT_R9G9B9E5_FLOAT: 2120 min_clamp = vec4_bld.zero; 2121 max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5); 2122 break; 2123 default: 2124 assert(0); 2125 break; 2126 } 2127 } 2128 } 2129 2130 if (min_clamp) { 2131 border_color = lp_build_max(&vec4_bld, border_color, min_clamp); 2132 } 2133 if (max_clamp) { 2134 border_color = lp_build_min(&vec4_bld, border_color, max_clamp); 2135 } 2136 2137 bld->border_color_clamped = border_color; 2138 } 2139 2140 2141 /** 2142 * General texture sampling codegen. 2143 * This function handles texture sampling for all texture targets (1D, 2144 * 2D, 3D, cube) and all filtering modes. 2145 */ 2146 static void 2147 lp_build_sample_general(struct lp_build_sample_context *bld, 2148 unsigned sampler_unit, 2149 boolean is_gather, 2150 LLVMValueRef *coords, 2151 const LLVMValueRef *offsets, 2152 LLVMValueRef lod_positive, 2153 LLVMValueRef lod_fpart, 2154 LLVMValueRef ilevel0, 2155 LLVMValueRef ilevel1, 2156 LLVMValueRef *colors_out) 2157 { 2158 LLVMBuilderRef builder = bld->gallivm->builder; 2159 const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state; 2160 const unsigned mip_filter = sampler_state->min_mip_filter; 2161 const unsigned min_filter = sampler_state->min_img_filter; 2162 const unsigned mag_filter = sampler_state->mag_img_filter; 2163 LLVMValueRef texels[4]; 2164 unsigned chan; 2165 2166 /* if we need border color, (potentially) clamp it now */ 2167 if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s, 2168 min_filter, 2169 mag_filter) || 2170 (bld->dims > 1 && 2171 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t, 2172 min_filter, 2173 mag_filter)) || 2174 (bld->dims > 2 && 2175 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r, 2176 min_filter, 2177 mag_filter))) { 2178 lp_build_clamp_border_color(bld, sampler_unit); 2179 } 2180 2181 2182 /* 2183 * Get/interpolate texture colors. 2184 */ 2185 2186 for (chan = 0; chan < 4; ++chan) { 2187 texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, ""); 2188 lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]); 2189 } 2190 2191 if (min_filter == mag_filter) { 2192 /* no need to distinguish between minification and magnification */ 2193 lp_build_sample_mipmap(bld, min_filter, mip_filter, 2194 is_gather, 2195 coords, offsets, 2196 ilevel0, ilevel1, lod_fpart, 2197 texels); 2198 } 2199 else { 2200 /* 2201 * Could also get rid of the if-logic and always use mipmap_both, both 2202 * for the single lod and multi-lod case if nothing really uses this. 2203 */ 2204 if (bld->num_lods == 1) { 2205 /* Emit conditional to choose min image filter or mag image filter 2206 * depending on the lod being > 0 or <= 0, respectively. 2207 */ 2208 struct lp_build_if_state if_ctx; 2209 2210 lod_positive = LLVMBuildTrunc(builder, lod_positive, 2211 LLVMInt1TypeInContext(bld->gallivm->context), ""); 2212 2213 lp_build_if(&if_ctx, bld->gallivm, lod_positive); 2214 { 2215 /* Use the minification filter */ 2216 lp_build_sample_mipmap(bld, min_filter, mip_filter, FALSE, 2217 coords, offsets, 2218 ilevel0, ilevel1, lod_fpart, 2219 texels); 2220 } 2221 lp_build_else(&if_ctx); 2222 { 2223 /* Use the magnification filter */ 2224 lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE, 2225 FALSE, 2226 coords, offsets, 2227 ilevel0, NULL, NULL, 2228 texels); 2229 } 2230 lp_build_endif(&if_ctx); 2231 } 2232 else { 2233 LLVMValueRef need_linear, linear_mask; 2234 unsigned mip_filter_for_nearest; 2235 struct lp_build_if_state if_ctx; 2236 2237 if (min_filter == PIPE_TEX_FILTER_LINEAR) { 2238 linear_mask = lod_positive; 2239 mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE; 2240 } 2241 else { 2242 linear_mask = lp_build_not(&bld->lodi_bld, lod_positive); 2243 mip_filter_for_nearest = mip_filter; 2244 } 2245 need_linear = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, 2246 linear_mask); 2247 2248 if (bld->num_lods != bld->coord_type.length) { 2249 linear_mask = lp_build_unpack_broadcast_aos_scalars(bld->gallivm, 2250 bld->lodi_type, 2251 bld->int_coord_type, 2252 linear_mask); 2253 } 2254 2255 lp_build_if(&if_ctx, bld->gallivm, need_linear); 2256 { 2257 /* 2258 * Do sampling with both filters simultaneously. This means using 2259 * a linear filter and doing some tricks (with weights) for the pixels 2260 * which need nearest filter. 2261 * Note that it's probably rare some pixels need nearest and some 2262 * linear filter but the fixups required for the nearest pixels 2263 * aren't all that complicated so just always run a combined path 2264 * if at least some pixels require linear. 2265 */ 2266 lp_build_sample_mipmap_both(bld, linear_mask, mip_filter, 2267 coords, offsets, 2268 ilevel0, ilevel1, 2269 lod_fpart, lod_positive, 2270 texels); 2271 } 2272 lp_build_else(&if_ctx); 2273 { 2274 /* 2275 * All pixels require just nearest filtering, which is way 2276 * cheaper than linear, hence do a separate path for that. 2277 */ 2278 lp_build_sample_mipmap(bld, PIPE_TEX_FILTER_NEAREST, 2279 mip_filter_for_nearest, FALSE, 2280 coords, offsets, 2281 ilevel0, ilevel1, lod_fpart, 2282 texels); 2283 } 2284 lp_build_endif(&if_ctx); 2285 } 2286 } 2287 2288 for (chan = 0; chan < 4; ++chan) { 2289 colors_out[chan] = LLVMBuildLoad(builder, texels[chan], ""); 2290 lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]); 2291 } 2292 } 2293 2294 2295 /** 2296 * Texel fetch function. 2297 * In contrast to general sampling there is no filtering, no coord minification, 2298 * lod (if any) is always explicit uint, coords are uints (in terms of texel units) 2299 * directly to be applied to the selected mip level (after adding texel offsets). 2300 * This function handles texel fetch for all targets where texel fetch is supported 2301 * (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too). 2302 */ 2303 static void 2304 lp_build_fetch_texel(struct lp_build_sample_context *bld, 2305 unsigned texture_unit, 2306 const LLVMValueRef *coords, 2307 LLVMValueRef explicit_lod, 2308 const LLVMValueRef *offsets, 2309 LLVMValueRef *colors_out) 2310 { 2311 struct lp_build_context *perquadi_bld = &bld->lodi_bld; 2312 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 2313 unsigned dims = bld->dims, chan; 2314 unsigned target = bld->static_texture_state->target; 2315 boolean out_of_bound_ret_zero = TRUE; 2316 LLVMValueRef size, ilevel; 2317 LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL; 2318 LLVMValueRef x = coords[0], y = coords[1], z = coords[2]; 2319 LLVMValueRef width, height, depth, i, j; 2320 LLVMValueRef offset, out_of_bounds, out1; 2321 2322 out_of_bounds = int_coord_bld->zero; 2323 2324 if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) { 2325 if (bld->num_mips != int_coord_bld->type.length) { 2326 ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type, 2327 perquadi_bld->type, explicit_lod, 0); 2328 } 2329 else { 2330 ilevel = explicit_lod; 2331 } 2332 lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel, 2333 out_of_bound_ret_zero ? &out_of_bounds : NULL); 2334 } 2335 else { 2336 assert(bld->num_mips == 1); 2337 if (bld->static_texture_state->target != PIPE_BUFFER) { 2338 ilevel = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm, 2339 bld->context_ptr, texture_unit); 2340 } 2341 else { 2342 ilevel = lp_build_const_int32(bld->gallivm, 0); 2343 } 2344 } 2345 lp_build_mipmap_level_sizes(bld, ilevel, 2346 &size, 2347 &row_stride_vec, &img_stride_vec); 2348 lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type, 2349 size, &width, &height, &depth); 2350 2351 if (target == PIPE_TEXTURE_1D_ARRAY || 2352 target == PIPE_TEXTURE_2D_ARRAY) { 2353 if (out_of_bound_ret_zero) { 2354 z = lp_build_layer_coord(bld, texture_unit, FALSE, z, &out1); 2355 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 2356 } 2357 else { 2358 z = lp_build_layer_coord(bld, texture_unit, FALSE, z, NULL); 2359 } 2360 } 2361 2362 /* This is a lot like border sampling */ 2363 if (offsets[0]) { 2364 /* 2365 * coords are really unsigned, offsets are signed, but I don't think 2366 * exceeding 31 bits is possible 2367 */ 2368 x = lp_build_add(int_coord_bld, x, offsets[0]); 2369 } 2370 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero); 2371 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 2372 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width); 2373 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 2374 2375 if (dims >= 2) { 2376 if (offsets[1]) { 2377 y = lp_build_add(int_coord_bld, y, offsets[1]); 2378 } 2379 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero); 2380 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 2381 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height); 2382 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 2383 2384 if (dims >= 3) { 2385 if (offsets[2]) { 2386 z = lp_build_add(int_coord_bld, z, offsets[2]); 2387 } 2388 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero); 2389 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 2390 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth); 2391 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); 2392 } 2393 } 2394 2395 lp_build_sample_offset(int_coord_bld, 2396 bld->format_desc, 2397 x, y, z, row_stride_vec, img_stride_vec, 2398 &offset, &i, &j); 2399 2400 if (bld->static_texture_state->target != PIPE_BUFFER) { 2401 offset = lp_build_add(int_coord_bld, offset, 2402 lp_build_get_mip_offsets(bld, ilevel)); 2403 } 2404 2405 offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds); 2406 2407 lp_build_fetch_rgba_soa(bld->gallivm, 2408 bld->format_desc, 2409 bld->texel_type, TRUE, 2410 bld->base_ptr, offset, 2411 i, j, 2412 bld->cache, 2413 colors_out); 2414 2415 if (out_of_bound_ret_zero) { 2416 /* 2417 * Only needed for ARB_robust_buffer_access_behavior and d3d10. 2418 * Could use min/max above instead of out-of-bounds comparisons 2419 * if we don't care about the result returned for out-of-bounds. 2420 */ 2421 for (chan = 0; chan < 4; chan++) { 2422 colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds, 2423 bld->texel_bld.zero, colors_out[chan]); 2424 } 2425 } 2426 } 2427 2428 2429 /** 2430 * Just set texels to white instead of actually sampling the texture. 2431 * For debugging. 2432 */ 2433 void 2434 lp_build_sample_nop(struct gallivm_state *gallivm, 2435 struct lp_type type, 2436 const LLVMValueRef *coords, 2437 LLVMValueRef texel_out[4]) 2438 { 2439 LLVMValueRef one = lp_build_one(gallivm, type); 2440 unsigned chan; 2441 2442 for (chan = 0; chan < 4; chan++) { 2443 texel_out[chan] = one; 2444 } 2445 } 2446 2447 2448 /** 2449 * Build the actual texture sampling code. 2450 * 'texel' will return a vector of four LLVMValueRefs corresponding to 2451 * R, G, B, A. 2452 * \param type vector float type to use for coords, etc. 2453 * \param sample_key 2454 * \param derivs partial derivatives of (s,t,r,q) with respect to x and y 2455 */ 2456 static void 2457 lp_build_sample_soa_code(struct gallivm_state *gallivm, 2458 const struct lp_static_texture_state *static_texture_state, 2459 const struct lp_static_sampler_state *static_sampler_state, 2460 struct lp_sampler_dynamic_state *dynamic_state, 2461 struct lp_type type, 2462 unsigned sample_key, 2463 unsigned texture_index, 2464 unsigned sampler_index, 2465 LLVMValueRef context_ptr, 2466 LLVMValueRef thread_data_ptr, 2467 const LLVMValueRef *coords, 2468 const LLVMValueRef *offsets, 2469 const struct lp_derivatives *derivs, /* optional */ 2470 LLVMValueRef lod, /* optional */ 2471 LLVMValueRef texel_out[4]) 2472 { 2473 unsigned target = static_texture_state->target; 2474 unsigned dims = texture_dims(target); 2475 unsigned num_quads = type.length / 4; 2476 unsigned mip_filter, min_img_filter, mag_img_filter, i; 2477 struct lp_build_sample_context bld; 2478 struct lp_static_sampler_state derived_sampler_state = *static_sampler_state; 2479 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 2480 LLVMBuilderRef builder = gallivm->builder; 2481 LLVMValueRef tex_width, newcoords[5]; 2482 enum lp_sampler_lod_property lod_property; 2483 enum lp_sampler_lod_control lod_control; 2484 enum lp_sampler_op_type op_type; 2485 LLVMValueRef lod_bias = NULL; 2486 LLVMValueRef explicit_lod = NULL; 2487 boolean op_is_tex; 2488 2489 if (0) { 2490 enum pipe_format fmt = static_texture_state->format; 2491 debug_printf("Sample from %s\n", util_format_name(fmt)); 2492 } 2493 2494 lod_property = (sample_key & LP_SAMPLER_LOD_PROPERTY_MASK) >> 2495 LP_SAMPLER_LOD_PROPERTY_SHIFT; 2496 lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >> 2497 LP_SAMPLER_LOD_CONTROL_SHIFT; 2498 op_type = (sample_key & LP_SAMPLER_OP_TYPE_MASK) >> 2499 LP_SAMPLER_OP_TYPE_SHIFT; 2500 2501 op_is_tex = op_type == LP_SAMPLER_OP_TEXTURE; 2502 2503 if (lod_control == LP_SAMPLER_LOD_BIAS) { 2504 lod_bias = lod; 2505 assert(lod); 2506 assert(derivs == NULL); 2507 } 2508 else if (lod_control == LP_SAMPLER_LOD_EXPLICIT) { 2509 explicit_lod = lod; 2510 assert(lod); 2511 assert(derivs == NULL); 2512 } 2513 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { 2514 assert(derivs); 2515 assert(lod == NULL); 2516 } 2517 else { 2518 assert(derivs == NULL); 2519 assert(lod == NULL); 2520 } 2521 2522 if (static_texture_state->format == PIPE_FORMAT_NONE) { 2523 /* 2524 * If there's nothing bound, format is NONE, and we must return 2525 * all zero as mandated by d3d10 in this case. 2526 */ 2527 unsigned chan; 2528 LLVMValueRef zero = lp_build_zero(gallivm, type); 2529 for (chan = 0; chan < 4; chan++) { 2530 texel_out[chan] = zero; 2531 } 2532 return; 2533 } 2534 2535 assert(type.floating); 2536 2537 /* Setup our build context */ 2538 memset(&bld, 0, sizeof bld); 2539 bld.gallivm = gallivm; 2540 bld.context_ptr = context_ptr; 2541 bld.static_sampler_state = &derived_sampler_state; 2542 bld.static_texture_state = static_texture_state; 2543 bld.dynamic_state = dynamic_state; 2544 bld.format_desc = util_format_description(static_texture_state->format); 2545 bld.dims = dims; 2546 2547 bld.vector_width = lp_type_width(type); 2548 2549 bld.float_type = lp_type_float(32); 2550 bld.int_type = lp_type_int(32); 2551 bld.coord_type = type; 2552 bld.int_coord_type = lp_int_type(type); 2553 bld.float_size_in_type = lp_type_float(32); 2554 bld.float_size_in_type.length = dims > 1 ? 4 : 1; 2555 bld.int_size_in_type = lp_int_type(bld.float_size_in_type); 2556 bld.texel_type = type; 2557 2558 /* always using the first channel hopefully should be safe, 2559 * if not things WILL break in other places anyway. 2560 */ 2561 if (bld.format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB && 2562 bld.format_desc->channel[0].pure_integer) { 2563 if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { 2564 bld.texel_type = lp_type_int_vec(type.width, type.width * type.length); 2565 } 2566 else if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) { 2567 bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length); 2568 } 2569 } 2570 else if (util_format_has_stencil(bld.format_desc) && 2571 !util_format_has_depth(bld.format_desc)) { 2572 /* for stencil only formats, sample stencil (uint) */ 2573 bld.texel_type = lp_type_int_vec(type.width, type.width * type.length); 2574 } 2575 2576 if (!static_texture_state->level_zero_only) { 2577 derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter; 2578 } else { 2579 derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; 2580 } 2581 if (op_type == LP_SAMPLER_OP_GATHER) { 2582 /* 2583 * gather4 is exactly like GL_LINEAR filtering but in the end skipping 2584 * the actual filtering. Using mostly the same paths, so cube face 2585 * selection, coord wrapping etc. all naturally uses the same code. 2586 */ 2587 derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; 2588 derived_sampler_state.min_img_filter = PIPE_TEX_FILTER_LINEAR; 2589 derived_sampler_state.mag_img_filter = PIPE_TEX_FILTER_LINEAR; 2590 } 2591 mip_filter = derived_sampler_state.min_mip_filter; 2592 2593 if (0) { 2594 debug_printf(" .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter); 2595 } 2596 2597 if (static_texture_state->target == PIPE_TEXTURE_CUBE || 2598 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) 2599 { 2600 /* 2601 * Seamless filtering ignores wrap modes. 2602 * Setting to CLAMP_TO_EDGE is correct for nearest filtering, for 2603 * bilinear it's not correct but way better than using for instance repeat. 2604 * Note we even set this for non-seamless. Technically GL allows any wrap 2605 * mode, which made sense when supporting true borders (can get seamless 2606 * effect with border and CLAMP_TO_BORDER), but gallium doesn't support 2607 * borders and d3d9 requires wrap modes to be ignored and it's a pain to fix 2608 * up the sampler state (as it makes it texture dependent). 2609 */ 2610 derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; 2611 derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; 2612 } 2613 /* 2614 * We could force CLAMP to CLAMP_TO_EDGE here if min/mag filter is nearest, 2615 * so AoS path could be used. Not sure it's worth the trouble... 2616 */ 2617 2618 min_img_filter = derived_sampler_state.min_img_filter; 2619 mag_img_filter = derived_sampler_state.mag_img_filter; 2620 2621 2622 /* 2623 * This is all a bit complicated different paths are chosen for performance 2624 * reasons. 2625 * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for 2626 * everything (the last two options are equivalent for 4-wide case). 2627 * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad 2628 * lod is calculated then the lod value extracted afterwards so making this 2629 * case basically the same as far as lod handling is concerned for the 2630 * further sample/filter code as the 1 lod for everything case. 2631 * Different lod handling mostly shows up when building mipmap sizes 2632 * (lp_build_mipmap_level_sizes() and friends) and also in filtering 2633 * (getting the fractional part of the lod to the right texels). 2634 */ 2635 2636 /* 2637 * There are other situations where at least the multiple int lods could be 2638 * avoided like min and max lod being equal. 2639 */ 2640 bld.num_mips = bld.num_lods = 1; 2641 2642 if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) && 2643 (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) && 2644 (static_texture_state->target == PIPE_TEXTURE_CUBE || 2645 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) && 2646 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { 2647 /* 2648 * special case for using per-pixel lod even for implicit lod, 2649 * which is generally never required (ok by APIs) except to please 2650 * some (somewhat broken imho) tests (because per-pixel face selection 2651 * can cause derivatives to be different for pixels outside the primitive 2652 * due to the major axis division even if pre-project derivatives are 2653 * looking normal). 2654 */ 2655 bld.num_mips = type.length; 2656 bld.num_lods = type.length; 2657 } 2658 else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT || 2659 (explicit_lod || lod_bias || derivs)) { 2660 if ((!op_is_tex && target != PIPE_BUFFER) || 2661 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { 2662 bld.num_mips = type.length; 2663 bld.num_lods = type.length; 2664 } 2665 else if (op_is_tex && min_img_filter != mag_img_filter) { 2666 bld.num_mips = 1; 2667 bld.num_lods = type.length; 2668 } 2669 } 2670 /* TODO: for true scalar_lod should only use 1 lod value */ 2671 else if ((!op_is_tex && explicit_lod && target != PIPE_BUFFER) || 2672 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { 2673 bld.num_mips = num_quads; 2674 bld.num_lods = num_quads; 2675 } 2676 else if (op_is_tex && min_img_filter != mag_img_filter) { 2677 bld.num_mips = 1; 2678 bld.num_lods = num_quads; 2679 } 2680 2681 2682 bld.lodf_type = type; 2683 /* we want native vector size to be able to use our intrinsics */ 2684 if (bld.num_lods != type.length) { 2685 /* TODO: this currently always has to be per-quad or per-element */ 2686 bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1; 2687 } 2688 bld.lodi_type = lp_int_type(bld.lodf_type); 2689 bld.levelf_type = bld.lodf_type; 2690 if (bld.num_mips == 1) { 2691 bld.levelf_type.length = 1; 2692 } 2693 bld.leveli_type = lp_int_type(bld.levelf_type); 2694 bld.float_size_type = bld.float_size_in_type; 2695 /* Note: size vectors may not be native. They contain minified w/h/d/_ values, 2696 * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */ 2697 if (bld.num_mips > 1) { 2698 bld.float_size_type.length = bld.num_mips == type.length ? 2699 bld.num_mips * bld.float_size_in_type.length : 2700 type.length; 2701 } 2702 bld.int_size_type = lp_int_type(bld.float_size_type); 2703 2704 lp_build_context_init(&bld.float_bld, gallivm, bld.float_type); 2705 lp_build_context_init(&bld.float_vec_bld, gallivm, type); 2706 lp_build_context_init(&bld.int_bld, gallivm, bld.int_type); 2707 lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type); 2708 lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type); 2709 lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type); 2710 lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type); 2711 lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type); 2712 lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type); 2713 lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type); 2714 lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type); 2715 lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type); 2716 lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type); 2717 lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type); 2718 2719 /* Get the dynamic state */ 2720 tex_width = dynamic_state->width(dynamic_state, gallivm, 2721 context_ptr, texture_index); 2722 bld.row_stride_array = dynamic_state->row_stride(dynamic_state, gallivm, 2723 context_ptr, texture_index); 2724 bld.img_stride_array = dynamic_state->img_stride(dynamic_state, gallivm, 2725 context_ptr, texture_index); 2726 bld.base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm, 2727 context_ptr, texture_index); 2728 bld.mip_offsets = dynamic_state->mip_offsets(dynamic_state, gallivm, 2729 context_ptr, texture_index); 2730 /* Note that mip_offsets is an array[level] of offsets to texture images */ 2731 2732 if (dynamic_state->cache_ptr && thread_data_ptr) { 2733 bld.cache = dynamic_state->cache_ptr(dynamic_state, gallivm, 2734 thread_data_ptr, texture_index); 2735 } 2736 2737 /* width, height, depth as single int vector */ 2738 if (dims <= 1) { 2739 bld.int_size = tex_width; 2740 } 2741 else { 2742 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef, 2743 tex_width, 2744 LLVMConstInt(i32t, 0, 0), ""); 2745 if (dims >= 2) { 2746 LLVMValueRef tex_height = 2747 dynamic_state->height(dynamic_state, gallivm, 2748 context_ptr, texture_index); 2749 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size, 2750 tex_height, 2751 LLVMConstInt(i32t, 1, 0), ""); 2752 if (dims >= 3) { 2753 LLVMValueRef tex_depth = 2754 dynamic_state->depth(dynamic_state, gallivm, context_ptr, 2755 texture_index); 2756 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size, 2757 tex_depth, 2758 LLVMConstInt(i32t, 2, 0), ""); 2759 } 2760 } 2761 } 2762 2763 for (i = 0; i < 5; i++) { 2764 newcoords[i] = coords[i]; 2765 } 2766 2767 if (0) { 2768 /* For debug: no-op texture sampling */ 2769 lp_build_sample_nop(gallivm, 2770 bld.texel_type, 2771 newcoords, 2772 texel_out); 2773 } 2774 2775 else if (op_type == LP_SAMPLER_OP_FETCH) { 2776 lp_build_fetch_texel(&bld, texture_index, newcoords, 2777 lod, offsets, 2778 texel_out); 2779 } 2780 2781 else { 2782 LLVMValueRef lod_fpart = NULL, lod_positive = NULL; 2783 LLVMValueRef ilevel0 = NULL, ilevel1 = NULL; 2784 boolean use_aos; 2785 2786 if (util_format_is_pure_integer(static_texture_state->format) && 2787 !util_format_has_depth(bld.format_desc) && 2788 (static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR || 2789 static_sampler_state->min_img_filter == PIPE_TEX_FILTER_LINEAR || 2790 static_sampler_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)) { 2791 /* 2792 * Bail if impossible filtering is specified (the awkard additional 2793 * depth check is because it is legal in gallium to have things like S8Z24 2794 * here which would say it's pure int despite such formats should sample 2795 * the depth component). 2796 * In GL such filters make the texture incomplete, this makes it robust 2797 * against state trackers which set this up regardless (we'd crash in the 2798 * lerp later (except for gather)). 2799 * Must do this after fetch_texel code since with GL state tracker we'll 2800 * get some junk sampler for buffer textures. 2801 */ 2802 unsigned chan; 2803 LLVMValueRef zero = lp_build_zero(gallivm, type); 2804 for (chan = 0; chan < 4; chan++) { 2805 texel_out[chan] = zero; 2806 } 2807 return; 2808 } 2809 2810 use_aos = util_format_fits_8unorm(bld.format_desc) && 2811 op_is_tex && 2812 /* not sure this is strictly needed or simply impossible */ 2813 derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE && 2814 lp_is_simple_wrap_mode(derived_sampler_state.wrap_s); 2815 2816 use_aos &= bld.num_lods <= num_quads || 2817 derived_sampler_state.min_img_filter == 2818 derived_sampler_state.mag_img_filter; 2819 if (dims > 1) { 2820 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t); 2821 if (dims > 2) { 2822 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r); 2823 } 2824 } 2825 if ((static_texture_state->target == PIPE_TEXTURE_CUBE || 2826 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) && 2827 derived_sampler_state.seamless_cube_map && 2828 (derived_sampler_state.min_img_filter == PIPE_TEX_FILTER_LINEAR || 2829 derived_sampler_state.mag_img_filter == PIPE_TEX_FILTER_LINEAR)) { 2830 /* theoretically possible with AoS filtering but not implemented (complex!) */ 2831 use_aos = 0; 2832 } 2833 2834 if ((gallivm_debug & GALLIVM_DEBUG_PERF) && 2835 !use_aos && util_format_fits_8unorm(bld.format_desc)) { 2836 debug_printf("%s: using floating point linear filtering for %s\n", 2837 __FUNCTION__, bld.format_desc->short_name); 2838 debug_printf(" min_img %d mag_img %d mip %d target %d seamless %d" 2839 " wraps %d wrapt %d wrapr %d\n", 2840 derived_sampler_state.min_img_filter, 2841 derived_sampler_state.mag_img_filter, 2842 derived_sampler_state.min_mip_filter, 2843 static_texture_state->target, 2844 derived_sampler_state.seamless_cube_map, 2845 derived_sampler_state.wrap_s, 2846 derived_sampler_state.wrap_t, 2847 derived_sampler_state.wrap_r); 2848 } 2849 2850 lp_build_sample_common(&bld, texture_index, sampler_index, 2851 newcoords, 2852 derivs, lod_bias, explicit_lod, 2853 &lod_positive, &lod_fpart, 2854 &ilevel0, &ilevel1); 2855 2856 if (use_aos && static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) { 2857 /* The aos path doesn't do seamless filtering so simply add cube layer 2858 * to face now. 2859 */ 2860 newcoords[2] = lp_build_add(&bld.int_coord_bld, newcoords[2], newcoords[3]); 2861 } 2862 2863 /* 2864 * we only try 8-wide sampling with soa or if we have AVX2 2865 * as it appears to be a loss with just AVX) 2866 */ 2867 if (num_quads == 1 || !use_aos || 2868 (util_cpu_caps.has_avx2 && 2869 (bld.num_lods == 1 || 2870 derived_sampler_state.min_img_filter == derived_sampler_state.mag_img_filter))) { 2871 if (use_aos) { 2872 /* do sampling/filtering with fixed pt arithmetic */ 2873 lp_build_sample_aos(&bld, sampler_index, 2874 newcoords[0], newcoords[1], 2875 newcoords[2], 2876 offsets, lod_positive, lod_fpart, 2877 ilevel0, ilevel1, 2878 texel_out); 2879 } 2880 2881 else { 2882 lp_build_sample_general(&bld, sampler_index, 2883 op_type == LP_SAMPLER_OP_GATHER, 2884 newcoords, offsets, 2885 lod_positive, lod_fpart, 2886 ilevel0, ilevel1, 2887 texel_out); 2888 } 2889 } 2890 else { 2891 unsigned j; 2892 struct lp_build_sample_context bld4; 2893 struct lp_type type4 = type; 2894 unsigned i; 2895 LLVMValueRef texelout4[4]; 2896 LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16]; 2897 2898 type4.length = 4; 2899 2900 /* Setup our build context */ 2901 memset(&bld4, 0, sizeof bld4); 2902 bld4.gallivm = bld.gallivm; 2903 bld4.context_ptr = bld.context_ptr; 2904 bld4.static_texture_state = bld.static_texture_state; 2905 bld4.static_sampler_state = bld.static_sampler_state; 2906 bld4.dynamic_state = bld.dynamic_state; 2907 bld4.format_desc = bld.format_desc; 2908 bld4.dims = bld.dims; 2909 bld4.row_stride_array = bld.row_stride_array; 2910 bld4.img_stride_array = bld.img_stride_array; 2911 bld4.base_ptr = bld.base_ptr; 2912 bld4.mip_offsets = bld.mip_offsets; 2913 bld4.int_size = bld.int_size; 2914 bld4.cache = bld.cache; 2915 2916 bld4.vector_width = lp_type_width(type4); 2917 2918 bld4.float_type = lp_type_float(32); 2919 bld4.int_type = lp_type_int(32); 2920 bld4.coord_type = type4; 2921 bld4.int_coord_type = lp_int_type(type4); 2922 bld4.float_size_in_type = lp_type_float(32); 2923 bld4.float_size_in_type.length = dims > 1 ? 4 : 1; 2924 bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type); 2925 bld4.texel_type = bld.texel_type; 2926 bld4.texel_type.length = 4; 2927 2928 bld4.num_mips = bld4.num_lods = 1; 2929 if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) && 2930 (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) && 2931 (static_texture_state->target == PIPE_TEXTURE_CUBE || 2932 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) && 2933 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { 2934 bld4.num_mips = type4.length; 2935 bld4.num_lods = type4.length; 2936 } 2937 if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT && 2938 (explicit_lod || lod_bias || derivs)) { 2939 if ((!op_is_tex && target != PIPE_BUFFER) || 2940 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { 2941 bld4.num_mips = type4.length; 2942 bld4.num_lods = type4.length; 2943 } 2944 else if (op_is_tex && min_img_filter != mag_img_filter) { 2945 bld4.num_mips = 1; 2946 bld4.num_lods = type4.length; 2947 } 2948 } 2949 2950 /* we want native vector size to be able to use our intrinsics */ 2951 bld4.lodf_type = type4; 2952 if (bld4.num_lods != type4.length) { 2953 bld4.lodf_type.length = 1; 2954 } 2955 bld4.lodi_type = lp_int_type(bld4.lodf_type); 2956 bld4.levelf_type = type4; 2957 if (bld4.num_mips != type4.length) { 2958 bld4.levelf_type.length = 1; 2959 } 2960 bld4.leveli_type = lp_int_type(bld4.levelf_type); 2961 bld4.float_size_type = bld4.float_size_in_type; 2962 if (bld4.num_mips > 1) { 2963 bld4.float_size_type.length = bld4.num_mips == type4.length ? 2964 bld4.num_mips * bld4.float_size_in_type.length : 2965 type4.length; 2966 } 2967 bld4.int_size_type = lp_int_type(bld4.float_size_type); 2968 2969 lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type); 2970 lp_build_context_init(&bld4.float_vec_bld, gallivm, type4); 2971 lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type); 2972 lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type); 2973 lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type); 2974 lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type); 2975 lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type); 2976 lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type); 2977 lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type); 2978 lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type); 2979 lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type); 2980 lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type); 2981 lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type); 2982 lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type); 2983 2984 for (i = 0; i < num_quads; i++) { 2985 LLVMValueRef s4, t4, r4; 2986 LLVMValueRef lod_positive4, lod_fpart4 = NULL; 2987 LLVMValueRef ilevel04, ilevel14 = NULL; 2988 LLVMValueRef offsets4[4] = { NULL }; 2989 unsigned num_lods = bld4.num_lods; 2990 2991 s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4); 2992 t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4); 2993 r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4); 2994 2995 if (offsets[0]) { 2996 offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4); 2997 if (dims > 1) { 2998 offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4); 2999 if (dims > 2) { 3000 offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4); 3001 } 3002 } 3003 } 3004 lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods); 3005 ilevel04 = bld.num_mips == 1 ? ilevel0 : 3006 lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods); 3007 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { 3008 ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods); 3009 lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods); 3010 } 3011 3012 if (use_aos) { 3013 /* do sampling/filtering with fixed pt arithmetic */ 3014 lp_build_sample_aos(&bld4, sampler_index, 3015 s4, t4, r4, offsets4, 3016 lod_positive4, lod_fpart4, 3017 ilevel04, ilevel14, 3018 texelout4); 3019 } 3020 3021 else { 3022 /* this path is currently unreachable and hence might break easily... */ 3023 LLVMValueRef newcoords4[5]; 3024 newcoords4[0] = s4; 3025 newcoords4[1] = t4; 3026 newcoords4[2] = r4; 3027 newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4); 3028 newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4); 3029 3030 lp_build_sample_general(&bld4, sampler_index, 3031 op_type == LP_SAMPLER_OP_GATHER, 3032 newcoords4, offsets4, 3033 lod_positive4, lod_fpart4, 3034 ilevel04, ilevel14, 3035 texelout4); 3036 } 3037 for (j = 0; j < 4; j++) { 3038 texelouttmp[j][i] = texelout4[j]; 3039 } 3040 } 3041 3042 for (j = 0; j < 4; j++) { 3043 texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads); 3044 } 3045 } 3046 } 3047 3048 if (target != PIPE_BUFFER && op_type != LP_SAMPLER_OP_GATHER) { 3049 apply_sampler_swizzle(&bld, texel_out); 3050 } 3051 3052 /* 3053 * texel type can be a (32bit) int/uint (for pure int formats only), 3054 * however we are expected to always return floats (storage is untyped). 3055 */ 3056 if (!bld.texel_type.floating) { 3057 unsigned chan; 3058 for (chan = 0; chan < 4; chan++) { 3059 texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan], 3060 lp_build_vec_type(gallivm, type), ""); 3061 } 3062 } 3063 } 3064 3065 3066 #define USE_TEX_FUNC_CALL 1 3067 3068 #define LP_MAX_TEX_FUNC_ARGS 32 3069 3070 static inline void 3071 get_target_info(enum pipe_texture_target target, 3072 unsigned *num_coords, unsigned *num_derivs, 3073 unsigned *num_offsets, unsigned *layer) 3074 { 3075 unsigned dims = texture_dims(target); 3076 *num_coords = dims; 3077 *num_offsets = dims; 3078 *num_derivs = (target == PIPE_TEXTURE_CUBE || 3079 target == PIPE_TEXTURE_CUBE_ARRAY) ? 3 : dims; 3080 *layer = has_layer_coord(target) ? 2: 0; 3081 if (target == PIPE_TEXTURE_CUBE_ARRAY) { 3082 /* 3083 * dims doesn't include r coord for cubes - this is handled 3084 * by layer instead, but need to fix up for cube arrays... 3085 */ 3086 *layer = 3; 3087 *num_coords = 3; 3088 } 3089 } 3090 3091 3092 /** 3093 * Generate the function body for a texture sampling function. 3094 */ 3095 static void 3096 lp_build_sample_gen_func(struct gallivm_state *gallivm, 3097 const struct lp_static_texture_state *static_texture_state, 3098 const struct lp_static_sampler_state *static_sampler_state, 3099 struct lp_sampler_dynamic_state *dynamic_state, 3100 struct lp_type type, 3101 unsigned texture_index, 3102 unsigned sampler_index, 3103 LLVMValueRef function, 3104 unsigned num_args, 3105 unsigned sample_key) 3106 { 3107 LLVMBuilderRef old_builder; 3108 LLVMBasicBlockRef block; 3109 LLVMValueRef coords[5]; 3110 LLVMValueRef offsets[3] = { NULL }; 3111 LLVMValueRef lod = NULL; 3112 LLVMValueRef context_ptr; 3113 LLVMValueRef thread_data_ptr = NULL; 3114 LLVMValueRef texel_out[4]; 3115 struct lp_derivatives derivs; 3116 struct lp_derivatives *deriv_ptr = NULL; 3117 unsigned num_param = 0; 3118 unsigned i, num_coords, num_derivs, num_offsets, layer; 3119 enum lp_sampler_lod_control lod_control; 3120 boolean need_cache = FALSE; 3121 3122 lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >> 3123 LP_SAMPLER_LOD_CONTROL_SHIFT; 3124 3125 get_target_info(static_texture_state->target, 3126 &num_coords, &num_derivs, &num_offsets, &layer); 3127 3128 if (dynamic_state->cache_ptr) { 3129 const struct util_format_description *format_desc; 3130 format_desc = util_format_description(static_texture_state->format); 3131 if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 3132 need_cache = TRUE; 3133 } 3134 } 3135 3136 /* "unpack" arguments */ 3137 context_ptr = LLVMGetParam(function, num_param++); 3138 if (need_cache) { 3139 thread_data_ptr = LLVMGetParam(function, num_param++); 3140 } 3141 for (i = 0; i < num_coords; i++) { 3142 coords[i] = LLVMGetParam(function, num_param++); 3143 } 3144 for (i = num_coords; i < 5; i++) { 3145 /* This is rather unfortunate... */ 3146 coords[i] = lp_build_undef(gallivm, type); 3147 } 3148 if (layer) { 3149 coords[layer] = LLVMGetParam(function, num_param++); 3150 } 3151 if (sample_key & LP_SAMPLER_SHADOW) { 3152 coords[4] = LLVMGetParam(function, num_param++); 3153 } 3154 if (sample_key & LP_SAMPLER_OFFSETS) { 3155 for (i = 0; i < num_offsets; i++) { 3156 offsets[i] = LLVMGetParam(function, num_param++); 3157 } 3158 } 3159 if (lod_control == LP_SAMPLER_LOD_BIAS || 3160 lod_control == LP_SAMPLER_LOD_EXPLICIT) { 3161 lod = LLVMGetParam(function, num_param++); 3162 } 3163 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { 3164 for (i = 0; i < num_derivs; i++) { 3165 derivs.ddx[i] = LLVMGetParam(function, num_param++); 3166 derivs.ddy[i] = LLVMGetParam(function, num_param++); 3167 } 3168 deriv_ptr = &derivs; 3169 } 3170 3171 assert(num_args == num_param); 3172 3173 /* 3174 * Function body 3175 */ 3176 3177 old_builder = gallivm->builder; 3178 block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry"); 3179 gallivm->builder = LLVMCreateBuilderInContext(gallivm->context); 3180 LLVMPositionBuilderAtEnd(gallivm->builder, block); 3181 3182 lp_build_sample_soa_code(gallivm, 3183 static_texture_state, 3184 static_sampler_state, 3185 dynamic_state, 3186 type, 3187 sample_key, 3188 texture_index, 3189 sampler_index, 3190 context_ptr, 3191 thread_data_ptr, 3192 coords, 3193 offsets, 3194 deriv_ptr, 3195 lod, 3196 texel_out); 3197 3198 LLVMBuildAggregateRet(gallivm->builder, texel_out, 4); 3199 3200 LLVMDisposeBuilder(gallivm->builder); 3201 gallivm->builder = old_builder; 3202 3203 gallivm_verify_function(gallivm, function); 3204 } 3205 3206 3207 /** 3208 * Call the matching function for texture sampling. 3209 * If there's no match, generate a new one. 3210 */ 3211 static void 3212 lp_build_sample_soa_func(struct gallivm_state *gallivm, 3213 const struct lp_static_texture_state *static_texture_state, 3214 const struct lp_static_sampler_state *static_sampler_state, 3215 struct lp_sampler_dynamic_state *dynamic_state, 3216 const struct lp_sampler_params *params) 3217 { 3218 LLVMBuilderRef builder = gallivm->builder; 3219 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent( 3220 LLVMGetInsertBlock(builder))); 3221 LLVMValueRef function, inst; 3222 LLVMValueRef args[LP_MAX_TEX_FUNC_ARGS]; 3223 LLVMBasicBlockRef bb; 3224 LLVMValueRef tex_ret; 3225 unsigned num_args = 0; 3226 char func_name[64]; 3227 unsigned i, num_coords, num_derivs, num_offsets, layer; 3228 unsigned texture_index = params->texture_index; 3229 unsigned sampler_index = params->sampler_index; 3230 unsigned sample_key = params->sample_key; 3231 const LLVMValueRef *coords = params->coords; 3232 const LLVMValueRef *offsets = params->offsets; 3233 const struct lp_derivatives *derivs = params->derivs; 3234 enum lp_sampler_lod_control lod_control; 3235 boolean need_cache = FALSE; 3236 3237 lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >> 3238 LP_SAMPLER_LOD_CONTROL_SHIFT; 3239 3240 get_target_info(static_texture_state->target, 3241 &num_coords, &num_derivs, &num_offsets, &layer); 3242 3243 if (dynamic_state->cache_ptr) { 3244 const struct util_format_description *format_desc; 3245 format_desc = util_format_description(static_texture_state->format); 3246 if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 3247 /* 3248 * This is not 100% correct, if we have cache but the 3249 * util_format_s3tc_prefer is true the cache won't get used 3250 * regardless (could hook up the block decode there...) */ 3251 need_cache = TRUE; 3252 } 3253 } 3254 /* 3255 * texture function matches are found by name. 3256 * Thus the name has to include both the texture and sampler unit 3257 * (which covers all static state) plus the actual texture function 3258 * (including things like offsets, shadow coord, lod control). 3259 * Additionally lod_property has to be included too. 3260 */ 3261 3262 util_snprintf(func_name, sizeof(func_name), "texfunc_res_%d_sam_%d_%x", 3263 texture_index, sampler_index, sample_key); 3264 3265 function = LLVMGetNamedFunction(module, func_name); 3266 3267 if(!function) { 3268 LLVMTypeRef arg_types[LP_MAX_TEX_FUNC_ARGS]; 3269 LLVMTypeRef ret_type; 3270 LLVMTypeRef function_type; 3271 LLVMTypeRef val_type[4]; 3272 unsigned num_param = 0; 3273 3274 /* 3275 * Generate the function prototype. 3276 */ 3277 3278 arg_types[num_param++] = LLVMTypeOf(params->context_ptr); 3279 if (need_cache) { 3280 arg_types[num_param++] = LLVMTypeOf(params->thread_data_ptr); 3281 } 3282 for (i = 0; i < num_coords; i++) { 3283 arg_types[num_param++] = LLVMTypeOf(coords[0]); 3284 assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i])); 3285 } 3286 if (layer) { 3287 arg_types[num_param++] = LLVMTypeOf(coords[layer]); 3288 assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[layer])); 3289 } 3290 if (sample_key & LP_SAMPLER_SHADOW) { 3291 arg_types[num_param++] = LLVMTypeOf(coords[0]); 3292 } 3293 if (sample_key & LP_SAMPLER_OFFSETS) { 3294 for (i = 0; i < num_offsets; i++) { 3295 arg_types[num_param++] = LLVMTypeOf(offsets[0]); 3296 assert(LLVMTypeOf(offsets[0]) == LLVMTypeOf(offsets[i])); 3297 } 3298 } 3299 if (lod_control == LP_SAMPLER_LOD_BIAS || 3300 lod_control == LP_SAMPLER_LOD_EXPLICIT) { 3301 arg_types[num_param++] = LLVMTypeOf(params->lod); 3302 } 3303 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { 3304 for (i = 0; i < num_derivs; i++) { 3305 arg_types[num_param++] = LLVMTypeOf(derivs->ddx[i]); 3306 arg_types[num_param++] = LLVMTypeOf(derivs->ddy[i]); 3307 assert(LLVMTypeOf(derivs->ddx[0]) == LLVMTypeOf(derivs->ddx[i])); 3308 assert(LLVMTypeOf(derivs->ddy[0]) == LLVMTypeOf(derivs->ddy[i])); 3309 } 3310 } 3311 3312 val_type[0] = val_type[1] = val_type[2] = val_type[3] = 3313 lp_build_vec_type(gallivm, params->type); 3314 ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0); 3315 function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0); 3316 function = LLVMAddFunction(module, func_name, function_type); 3317 3318 for (i = 0; i < num_param; ++i) { 3319 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) { 3320 3321 lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS); 3322 } 3323 } 3324 3325 LLVMSetFunctionCallConv(function, LLVMFastCallConv); 3326 LLVMSetLinkage(function, LLVMInternalLinkage); 3327 3328 lp_build_sample_gen_func(gallivm, 3329 static_texture_state, 3330 static_sampler_state, 3331 dynamic_state, 3332 params->type, 3333 texture_index, 3334 sampler_index, 3335 function, 3336 num_param, 3337 sample_key); 3338 } 3339 3340 num_args = 0; 3341 args[num_args++] = params->context_ptr; 3342 if (need_cache) { 3343 args[num_args++] = params->thread_data_ptr; 3344 } 3345 for (i = 0; i < num_coords; i++) { 3346 args[num_args++] = coords[i]; 3347 } 3348 if (layer) { 3349 args[num_args++] = coords[layer]; 3350 } 3351 if (sample_key & LP_SAMPLER_SHADOW) { 3352 args[num_args++] = coords[4]; 3353 } 3354 if (sample_key & LP_SAMPLER_OFFSETS) { 3355 for (i = 0; i < num_offsets; i++) { 3356 args[num_args++] = offsets[i]; 3357 } 3358 } 3359 if (lod_control == LP_SAMPLER_LOD_BIAS || 3360 lod_control == LP_SAMPLER_LOD_EXPLICIT) { 3361 args[num_args++] = params->lod; 3362 } 3363 else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { 3364 for (i = 0; i < num_derivs; i++) { 3365 args[num_args++] = derivs->ddx[i]; 3366 args[num_args++] = derivs->ddy[i]; 3367 } 3368 } 3369 3370 assert(num_args <= LP_MAX_TEX_FUNC_ARGS); 3371 3372 tex_ret = LLVMBuildCall(builder, function, args, num_args, ""); 3373 bb = LLVMGetInsertBlock(builder); 3374 inst = LLVMGetLastInstruction(bb); 3375 LLVMSetInstructionCallConv(inst, LLVMFastCallConv); 3376 3377 for (i = 0; i < 4; i++) { 3378 params->texel[i] = LLVMBuildExtractValue(gallivm->builder, tex_ret, i, ""); 3379 } 3380 } 3381 3382 3383 /** 3384 * Build texture sampling code. 3385 * Either via a function call or inline it directly. 3386 */ 3387 void 3388 lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state, 3389 const struct lp_static_sampler_state *static_sampler_state, 3390 struct lp_sampler_dynamic_state *dynamic_state, 3391 struct gallivm_state *gallivm, 3392 const struct lp_sampler_params *params) 3393 { 3394 boolean use_tex_func = FALSE; 3395 3396 /* 3397 * Do not use a function call if the sampling is "simple enough". 3398 * We define this by 3399 * a) format 3400 * b) no mips (either one level only or no mip filter) 3401 * No mips will definitely make the code smaller, though 3402 * the format requirement is a bit iffy - there's some (SoA) formats 3403 * which definitely generate less code. This does happen to catch 3404 * some important cases though which are hurt quite a bit by using 3405 * a call (though not really because of the call overhead but because 3406 * they are reusing the same texture unit with some of the same 3407 * parameters). 3408 * Ideally we'd let llvm recognize this stuff by doing IPO passes. 3409 */ 3410 3411 if (USE_TEX_FUNC_CALL) { 3412 const struct util_format_description *format_desc; 3413 boolean simple_format; 3414 boolean simple_tex; 3415 enum lp_sampler_op_type op_type; 3416 format_desc = util_format_description(static_texture_state->format); 3417 simple_format = !format_desc || 3418 (util_format_is_rgba8_variant(format_desc) && 3419 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB); 3420 3421 op_type = (params->sample_key & LP_SAMPLER_OP_TYPE_MASK) >> 3422 LP_SAMPLER_OP_TYPE_SHIFT; 3423 simple_tex = 3424 op_type != LP_SAMPLER_OP_TEXTURE || 3425 ((static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE || 3426 static_texture_state->level_zero_only == TRUE) && 3427 static_sampler_state->min_img_filter == static_sampler_state->mag_img_filter); 3428 3429 use_tex_func = format_desc && !(simple_format && simple_tex); 3430 } 3431 3432 if (use_tex_func) { 3433 lp_build_sample_soa_func(gallivm, 3434 static_texture_state, 3435 static_sampler_state, 3436 dynamic_state, 3437 params); 3438 } 3439 else { 3440 lp_build_sample_soa_code(gallivm, 3441 static_texture_state, 3442 static_sampler_state, 3443 dynamic_state, 3444 params->type, 3445 params->sample_key, 3446 params->texture_index, 3447 params->sampler_index, 3448 params->context_ptr, 3449 params->thread_data_ptr, 3450 params->coords, 3451 params->offsets, 3452 params->derivs, 3453 params->lod, 3454 params->texel); 3455 } 3456 } 3457 3458 3459 void 3460 lp_build_size_query_soa(struct gallivm_state *gallivm, 3461 const struct lp_static_texture_state *static_state, 3462 struct lp_sampler_dynamic_state *dynamic_state, 3463 const struct lp_sampler_size_query_params *params) 3464 { 3465 LLVMValueRef lod, level, size; 3466 LLVMValueRef first_level = NULL; 3467 int dims, i; 3468 boolean has_array; 3469 unsigned num_lods = 1; 3470 struct lp_build_context bld_int_vec4; 3471 LLVMValueRef context_ptr = params->context_ptr; 3472 unsigned texture_unit = params->texture_unit; 3473 unsigned target = params->target; 3474 3475 if (static_state->format == PIPE_FORMAT_NONE) { 3476 /* 3477 * If there's nothing bound, format is NONE, and we must return 3478 * all zero as mandated by d3d10 in this case. 3479 */ 3480 unsigned chan; 3481 LLVMValueRef zero = lp_build_const_vec(gallivm, params->int_type, 0.0F); 3482 for (chan = 0; chan < 4; chan++) { 3483 params->sizes_out[chan] = zero; 3484 } 3485 return; 3486 } 3487 3488 /* 3489 * Do some sanity verification about bound texture and shader dcl target. 3490 * Not entirely sure what's possible but assume array/non-array 3491 * always compatible (probably not ok for OpenGL but d3d10 has no 3492 * distinction of arrays at the resource level). 3493 * Everything else looks bogus (though not entirely sure about rect/2d). 3494 * Currently disabled because it causes assertion failures if there's 3495 * nothing bound (or rather a dummy texture, not that this case would 3496 * return the right values). 3497 */ 3498 if (0 && static_state->target != target) { 3499 if (static_state->target == PIPE_TEXTURE_1D) 3500 assert(target == PIPE_TEXTURE_1D_ARRAY); 3501 else if (static_state->target == PIPE_TEXTURE_1D_ARRAY) 3502 assert(target == PIPE_TEXTURE_1D); 3503 else if (static_state->target == PIPE_TEXTURE_2D) 3504 assert(target == PIPE_TEXTURE_2D_ARRAY); 3505 else if (static_state->target == PIPE_TEXTURE_2D_ARRAY) 3506 assert(target == PIPE_TEXTURE_2D); 3507 else if (static_state->target == PIPE_TEXTURE_CUBE) 3508 assert(target == PIPE_TEXTURE_CUBE_ARRAY); 3509 else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY) 3510 assert(target == PIPE_TEXTURE_CUBE); 3511 else 3512 assert(0); 3513 } 3514 3515 dims = texture_dims(target); 3516 3517 switch (target) { 3518 case PIPE_TEXTURE_1D_ARRAY: 3519 case PIPE_TEXTURE_2D_ARRAY: 3520 case PIPE_TEXTURE_CUBE_ARRAY: 3521 has_array = TRUE; 3522 break; 3523 default: 3524 has_array = FALSE; 3525 break; 3526 } 3527 3528 assert(!params->int_type.floating); 3529 3530 lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128)); 3531 3532 if (params->explicit_lod) { 3533 /* FIXME: this needs to honor per-element lod */ 3534 lod = LLVMBuildExtractElement(gallivm->builder, params->explicit_lod, 3535 lp_build_const_int32(gallivm, 0), ""); 3536 first_level = dynamic_state->first_level(dynamic_state, gallivm, 3537 context_ptr, texture_unit); 3538 level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level"); 3539 lod = lp_build_broadcast_scalar(&bld_int_vec4, level); 3540 } else { 3541 lod = bld_int_vec4.zero; 3542 } 3543 3544 size = bld_int_vec4.undef; 3545 3546 size = LLVMBuildInsertElement(gallivm->builder, size, 3547 dynamic_state->width(dynamic_state, gallivm, 3548 context_ptr, texture_unit), 3549 lp_build_const_int32(gallivm, 0), ""); 3550 3551 if (dims >= 2) { 3552 size = LLVMBuildInsertElement(gallivm->builder, size, 3553 dynamic_state->height(dynamic_state, gallivm, 3554 context_ptr, texture_unit), 3555 lp_build_const_int32(gallivm, 1), ""); 3556 } 3557 3558 if (dims >= 3) { 3559 size = LLVMBuildInsertElement(gallivm->builder, size, 3560 dynamic_state->depth(dynamic_state, gallivm, 3561 context_ptr, texture_unit), 3562 lp_build_const_int32(gallivm, 2), ""); 3563 } 3564 3565 size = lp_build_minify(&bld_int_vec4, size, lod, TRUE); 3566 3567 if (has_array) { 3568 LLVMValueRef layers = dynamic_state->depth(dynamic_state, gallivm, 3569 context_ptr, texture_unit); 3570 if (target == PIPE_TEXTURE_CUBE_ARRAY) { 3571 /* 3572 * It looks like GL wants number of cubes, d3d10.1 has it undefined? 3573 * Could avoid this by passing in number of cubes instead of total 3574 * number of layers (might make things easier elsewhere too). 3575 */ 3576 LLVMValueRef six = lp_build_const_int32(gallivm, 6); 3577 layers = LLVMBuildSDiv(gallivm->builder, layers, six, ""); 3578 } 3579 size = LLVMBuildInsertElement(gallivm->builder, size, layers, 3580 lp_build_const_int32(gallivm, dims), ""); 3581 } 3582 3583 /* 3584 * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels) 3585 * if level is out of bounds (note this can't cover unbound texture 3586 * here, which also requires returning zero). 3587 */ 3588 if (params->explicit_lod && params->is_sviewinfo) { 3589 LLVMValueRef last_level, out, out1; 3590 struct lp_build_context leveli_bld; 3591 3592 /* everything is scalar for now */ 3593 lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32)); 3594 last_level = dynamic_state->last_level(dynamic_state, gallivm, 3595 context_ptr, texture_unit); 3596 3597 out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level); 3598 out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level); 3599 out = lp_build_or(&leveli_bld, out, out1); 3600 if (num_lods == 1) { 3601 out = lp_build_broadcast_scalar(&bld_int_vec4, out); 3602 } 3603 else { 3604 /* TODO */ 3605 assert(0); 3606 } 3607 size = lp_build_andnot(&bld_int_vec4, size, out); 3608 } 3609 for (i = 0; i < dims + (has_array ? 1 : 0); i++) { 3610 params->sizes_out[i] = lp_build_extract_broadcast(gallivm, bld_int_vec4.type, params->int_type, 3611 size, 3612 lp_build_const_int32(gallivm, i)); 3613 } 3614 if (params->is_sviewinfo) { 3615 for (; i < 4; i++) { 3616 params->sizes_out[i] = lp_build_const_vec(gallivm, params->int_type, 0.0); 3617 } 3618 } 3619 3620 /* 3621 * if there's no explicit_lod (buffers, rects) queries requiring nr of 3622 * mips would be illegal. 3623 */ 3624 if (params->is_sviewinfo && params->explicit_lod) { 3625 struct lp_build_context bld_int_scalar; 3626 LLVMValueRef num_levels; 3627 lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32)); 3628 3629 if (static_state->level_zero_only) { 3630 num_levels = bld_int_scalar.one; 3631 } 3632 else { 3633 LLVMValueRef last_level; 3634 3635 last_level = dynamic_state->last_level(dynamic_state, gallivm, 3636 context_ptr, texture_unit); 3637 num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level); 3638 num_levels = lp_build_add(&bld_int_scalar, num_levels, bld_int_scalar.one); 3639 } 3640 params->sizes_out[3] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, params->int_type), 3641 num_levels); 3642 } 3643 } 3644