1 /************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 /** 29 * @file 30 * Texture sampling -- SoA. 31 * 32 * @author Jose Fonseca <jfonseca (at) vmware.com> 33 * @author Brian Paul <brianp (at) vmware.com> 34 */ 35 36 #include "pipe/p_defines.h" 37 #include "pipe/p_state.h" 38 #include "pipe/p_shader_tokens.h" 39 #include "util/u_debug.h" 40 #include "util/u_dump.h" 41 #include "util/u_memory.h" 42 #include "util/u_math.h" 43 #include "util/u_format.h" 44 #include "util/u_cpu_detect.h" 45 #include "lp_bld_debug.h" 46 #include "lp_bld_type.h" 47 #include "lp_bld_const.h" 48 #include "lp_bld_conv.h" 49 #include "lp_bld_arit.h" 50 #include "lp_bld_bitarit.h" 51 #include "lp_bld_logic.h" 52 #include "lp_bld_printf.h" 53 #include "lp_bld_swizzle.h" 54 #include "lp_bld_flow.h" 55 #include "lp_bld_gather.h" 56 #include "lp_bld_format.h" 57 #include "lp_bld_sample.h" 58 #include "lp_bld_sample_aos.h" 59 #include "lp_bld_struct.h" 60 #include "lp_bld_quad.h" 61 #include "lp_bld_pack.h" 62 63 64 /** 65 * Generate code to fetch a texel from a texture at int coords (x, y, z). 66 * The computation depends on whether the texture is 1D, 2D or 3D. 67 * The result, texel, will be float vectors: 68 * texel[0] = red values 69 * texel[1] = green values 70 * texel[2] = blue values 71 * texel[3] = alpha values 72 */ 73 static void 74 lp_build_sample_texel_soa(struct lp_build_sample_context *bld, 75 unsigned unit, 76 LLVMValueRef width, 77 LLVMValueRef height, 78 LLVMValueRef depth, 79 LLVMValueRef x, 80 LLVMValueRef y, 81 LLVMValueRef z, 82 LLVMValueRef y_stride, 83 LLVMValueRef z_stride, 84 LLVMValueRef data_ptr, 85 LLVMValueRef texel_out[4]) 86 { 87 const struct lp_sampler_static_state *static_state = bld->static_state; 88 const unsigned dims = bld->dims; 89 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 90 LLVMBuilderRef builder = bld->gallivm->builder; 91 LLVMValueRef offset; 92 LLVMValueRef i, j; 93 LLVMValueRef use_border = NULL; 94 95 /* use_border = x < 0 || x >= width || y < 0 || y >= height */ 96 if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s, 97 static_state->min_img_filter, 98 static_state->mag_img_filter)) { 99 LLVMValueRef b1, b2; 100 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero); 101 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width); 102 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2"); 103 } 104 105 if (dims >= 2 && 106 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t, 107 static_state->min_img_filter, 108 static_state->mag_img_filter)) { 109 LLVMValueRef b1, b2; 110 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero); 111 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height); 112 if (use_border) { 113 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1"); 114 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2"); 115 } 116 else { 117 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2"); 118 } 119 } 120 121 if (dims == 3 && 122 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r, 123 static_state->min_img_filter, 124 static_state->mag_img_filter)) { 125 LLVMValueRef b1, b2; 126 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero); 127 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth); 128 if (use_border) { 129 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1"); 130 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2"); 131 } 132 else { 133 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2"); 134 } 135 } 136 137 /* convert x,y,z coords to linear offset from start of texture, in bytes */ 138 lp_build_sample_offset(&bld->int_coord_bld, 139 bld->format_desc, 140 x, y, z, y_stride, z_stride, 141 &offset, &i, &j); 142 143 if (use_border) { 144 /* If we can sample the border color, it means that texcoords may 145 * lie outside the bounds of the texture image. We need to do 146 * something to prevent reading out of bounds and causing a segfault. 147 * 148 * Simply AND the texture coords with !use_border. This will cause 149 * coords which are out of bounds to become zero. Zero's guaranteed 150 * to be inside the texture image. 151 */ 152 offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border); 153 } 154 155 lp_build_fetch_rgba_soa(bld->gallivm, 156 bld->format_desc, 157 bld->texel_type, 158 data_ptr, offset, 159 i, j, 160 texel_out); 161 162 /* 163 * Note: if we find an app which frequently samples the texture border 164 * we might want to implement a true conditional here to avoid sampling 165 * the texture whenever possible (since that's quite a bit of code). 166 * Ex: 167 * if (use_border) { 168 * texel = border_color; 169 * } 170 * else { 171 * texel = sample_texture(coord); 172 * } 173 * As it is now, we always sample the texture, then selectively replace 174 * the texel color results with the border color. 175 */ 176 177 if (use_border) { 178 /* select texel color or border color depending on use_border */ 179 LLVMValueRef border_color_ptr = 180 bld->dynamic_state->border_color(bld->dynamic_state, 181 bld->gallivm, unit); 182 int chan; 183 for (chan = 0; chan < 4; chan++) { 184 LLVMValueRef border_chan = 185 lp_build_array_get(bld->gallivm, border_color_ptr, 186 lp_build_const_int32(bld->gallivm, chan)); 187 LLVMValueRef border_chan_vec = 188 lp_build_broadcast_scalar(&bld->float_vec_bld, border_chan); 189 texel_out[chan] = lp_build_select(&bld->texel_bld, use_border, 190 border_chan_vec, texel_out[chan]); 191 } 192 } 193 } 194 195 196 /** 197 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes. 198 */ 199 static LLVMValueRef 200 lp_build_coord_mirror(struct lp_build_sample_context *bld, 201 LLVMValueRef coord) 202 { 203 struct lp_build_context *coord_bld = &bld->coord_bld; 204 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 205 LLVMValueRef fract, flr, isOdd; 206 207 lp_build_ifloor_fract(coord_bld, coord, &flr, &fract); 208 209 /* isOdd = flr & 1 */ 210 isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, ""); 211 212 /* make coord positive or negative depending on isOdd */ 213 coord = lp_build_set_sign(coord_bld, fract, isOdd); 214 215 /* convert isOdd to float */ 216 isOdd = lp_build_int_to_float(coord_bld, isOdd); 217 218 /* add isOdd to coord */ 219 coord = lp_build_add(coord_bld, coord, isOdd); 220 221 return coord; 222 } 223 224 225 /** 226 * Helper to compute the first coord and the weight for 227 * linear wrap repeat npot textures 228 */ 229 void 230 lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld, 231 LLVMValueRef coord_f, 232 LLVMValueRef length_i, 233 LLVMValueRef length_f, 234 LLVMValueRef *coord0_i, 235 LLVMValueRef *weight_f) 236 { 237 struct lp_build_context *coord_bld = &bld->coord_bld; 238 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 239 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5); 240 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i, 241 int_coord_bld->one); 242 LLVMValueRef mask; 243 /* wrap with normalized floats is just fract */ 244 coord_f = lp_build_fract(coord_bld, coord_f); 245 /* mul by size and subtract 0.5 */ 246 coord_f = lp_build_mul(coord_bld, coord_f, length_f); 247 coord_f = lp_build_sub(coord_bld, coord_f, half); 248 /* 249 * we avoided the 0.5/length division before the repeat wrap, 250 * now need to fix up edge cases with selects 251 */ 252 /* convert to int, compute lerp weight */ 253 lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f); 254 mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, 255 PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero); 256 *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i); 257 } 258 259 260 /** 261 * Build LLVM code for texture wrap mode for linear filtering. 262 * \param x0_out returns first integer texcoord 263 * \param x1_out returns second integer texcoord 264 * \param weight_out returns linear interpolation weight 265 */ 266 static void 267 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, 268 LLVMValueRef coord, 269 LLVMValueRef length, 270 LLVMValueRef length_f, 271 boolean is_pot, 272 unsigned wrap_mode, 273 LLVMValueRef *x0_out, 274 LLVMValueRef *x1_out, 275 LLVMValueRef *weight_out) 276 { 277 struct lp_build_context *coord_bld = &bld->coord_bld; 278 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 279 LLVMBuilderRef builder = bld->gallivm->builder; 280 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5); 281 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); 282 LLVMValueRef coord0, coord1, weight; 283 284 switch(wrap_mode) { 285 case PIPE_TEX_WRAP_REPEAT: 286 if (is_pot) { 287 /* mul by size and subtract 0.5 */ 288 coord = lp_build_mul(coord_bld, coord, length_f); 289 coord = lp_build_sub(coord_bld, coord, half); 290 /* convert to int, compute lerp weight */ 291 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 292 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 293 /* repeat wrap */ 294 coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, ""); 295 coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, ""); 296 } 297 else { 298 LLVMValueRef mask; 299 lp_build_coord_repeat_npot_linear(bld, coord, 300 length, length_f, 301 &coord0, &weight); 302 mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, 303 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); 304 coord1 = LLVMBuildAnd(builder, 305 lp_build_add(int_coord_bld, coord0, int_coord_bld->one), 306 mask, ""); 307 } 308 break; 309 310 case PIPE_TEX_WRAP_CLAMP: 311 if (bld->static_state->normalized_coords) { 312 /* scale coord to length */ 313 coord = lp_build_mul(coord_bld, coord, length_f); 314 } 315 316 /* clamp to [0, length] */ 317 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f); 318 319 coord = lp_build_sub(coord_bld, coord, half); 320 321 /* convert to int, compute lerp weight */ 322 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 323 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 324 break; 325 326 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 327 { 328 struct lp_build_context abs_coord_bld = bld->coord_bld; 329 abs_coord_bld.type.sign = FALSE; 330 331 if (bld->static_state->normalized_coords) { 332 /* mul by tex size */ 333 coord = lp_build_mul(coord_bld, coord, length_f); 334 } 335 /* clamp to length max */ 336 coord = lp_build_min(coord_bld, coord, length_f); 337 /* subtract 0.5 */ 338 coord = lp_build_sub(coord_bld, coord, half); 339 /* clamp to [0, length - 0.5] */ 340 coord = lp_build_max(coord_bld, coord, coord_bld->zero); 341 /* convert to int, compute lerp weight */ 342 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight); 343 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 344 /* coord1 = min(coord1, length-1) */ 345 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); 346 break; 347 } 348 349 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 350 { 351 LLVMValueRef min; 352 if (bld->static_state->normalized_coords) { 353 /* scale coord to length */ 354 coord = lp_build_mul(coord_bld, coord, length_f); 355 } 356 /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */ 357 coord = lp_build_sub(coord_bld, coord, half); 358 min = lp_build_const_vec(bld->gallivm, coord_bld->type, -1.0F); 359 coord = lp_build_clamp(coord_bld, coord, min, length_f); 360 /* convert to int, compute lerp weight */ 361 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 362 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 363 } 364 break; 365 366 case PIPE_TEX_WRAP_MIRROR_REPEAT: 367 /* compute mirror function */ 368 coord = lp_build_coord_mirror(bld, coord); 369 370 /* scale coord to length */ 371 coord = lp_build_mul(coord_bld, coord, length_f); 372 coord = lp_build_sub(coord_bld, coord, half); 373 374 /* convert to int, compute lerp weight */ 375 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 376 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 377 378 /* coord0 = max(coord0, 0) */ 379 coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero); 380 /* coord1 = min(coord1, length-1) */ 381 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); 382 break; 383 384 case PIPE_TEX_WRAP_MIRROR_CLAMP: 385 coord = lp_build_abs(coord_bld, coord); 386 387 if (bld->static_state->normalized_coords) { 388 /* scale coord to length */ 389 coord = lp_build_mul(coord_bld, coord, length_f); 390 } 391 392 /* clamp to [0, length] */ 393 coord = lp_build_min(coord_bld, coord, length_f); 394 395 coord = lp_build_sub(coord_bld, coord, half); 396 397 /* convert to int, compute lerp weight */ 398 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 399 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 400 break; 401 402 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 403 { 404 LLVMValueRef min, max; 405 struct lp_build_context abs_coord_bld = bld->coord_bld; 406 abs_coord_bld.type.sign = FALSE; 407 coord = lp_build_abs(coord_bld, coord); 408 409 if (bld->static_state->normalized_coords) { 410 /* scale coord to length */ 411 coord = lp_build_mul(coord_bld, coord, length_f); 412 } 413 414 /* clamp to [0.5, length - 0.5] */ 415 min = half; 416 max = lp_build_sub(coord_bld, length_f, min); 417 coord = lp_build_clamp(coord_bld, coord, min, max); 418 419 coord = lp_build_sub(coord_bld, coord, half); 420 421 /* convert to int, compute lerp weight */ 422 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight); 423 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 424 } 425 break; 426 427 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 428 { 429 coord = lp_build_abs(coord_bld, coord); 430 431 if (bld->static_state->normalized_coords) { 432 /* scale coord to length */ 433 coord = lp_build_mul(coord_bld, coord, length_f); 434 } 435 436 /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */ 437 /* skip -0.5 clamp (always positive), do sub first */ 438 coord = lp_build_sub(coord_bld, coord, half); 439 coord = lp_build_min(coord_bld, coord, length_f); 440 441 /* convert to int, compute lerp weight */ 442 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); 443 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); 444 } 445 break; 446 447 default: 448 assert(0); 449 coord0 = NULL; 450 coord1 = NULL; 451 weight = NULL; 452 } 453 454 *x0_out = coord0; 455 *x1_out = coord1; 456 *weight_out = weight; 457 } 458 459 460 /** 461 * Build LLVM code for texture wrap mode for nearest filtering. 462 * \param coord the incoming texcoord (nominally in [0,1]) 463 * \param length the texture size along one dimension, as int vector 464 * \param is_pot if TRUE, length is a power of two 465 * \param wrap_mode one of PIPE_TEX_WRAP_x 466 */ 467 static LLVMValueRef 468 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, 469 LLVMValueRef coord, 470 LLVMValueRef length, 471 LLVMValueRef length_f, 472 boolean is_pot, 473 unsigned wrap_mode) 474 { 475 struct lp_build_context *coord_bld = &bld->coord_bld; 476 struct lp_build_context *int_coord_bld = &bld->int_coord_bld; 477 LLVMBuilderRef builder = bld->gallivm->builder; 478 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); 479 LLVMValueRef icoord; 480 481 switch(wrap_mode) { 482 case PIPE_TEX_WRAP_REPEAT: 483 if (is_pot) { 484 coord = lp_build_mul(coord_bld, coord, length_f); 485 icoord = lp_build_ifloor(coord_bld, coord); 486 icoord = LLVMBuildAnd(builder, icoord, length_minus_one, ""); 487 } 488 else { 489 /* take fraction, unnormalize */ 490 coord = lp_build_fract_safe(coord_bld, coord); 491 coord = lp_build_mul(coord_bld, coord, length_f); 492 icoord = lp_build_itrunc(coord_bld, coord); 493 } 494 break; 495 496 case PIPE_TEX_WRAP_CLAMP: 497 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 498 if (bld->static_state->normalized_coords) { 499 /* scale coord to length */ 500 coord = lp_build_mul(coord_bld, coord, length_f); 501 } 502 503 /* floor */ 504 /* use itrunc instead since we clamp to 0 anyway */ 505 icoord = lp_build_itrunc(coord_bld, coord); 506 507 /* clamp to [0, length - 1]. */ 508 icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero, 509 length_minus_one); 510 break; 511 512 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 513 /* Note: this is the same as CLAMP_TO_EDGE, except min = -1 */ 514 { 515 LLVMValueRef min, max; 516 517 if (bld->static_state->normalized_coords) { 518 /* scale coord to length */ 519 coord = lp_build_mul(coord_bld, coord, length_f); 520 } 521 522 icoord = lp_build_ifloor(coord_bld, coord); 523 524 /* clamp to [-1, length] */ 525 min = lp_build_negate(int_coord_bld, int_coord_bld->one); 526 max = length; 527 icoord = lp_build_clamp(int_coord_bld, icoord, min, max); 528 } 529 break; 530 531 case PIPE_TEX_WRAP_MIRROR_REPEAT: 532 /* compute mirror function */ 533 coord = lp_build_coord_mirror(bld, coord); 534 535 /* scale coord to length */ 536 assert(bld->static_state->normalized_coords); 537 coord = lp_build_mul(coord_bld, coord, length_f); 538 539 /* itrunc == ifloor here */ 540 icoord = lp_build_itrunc(coord_bld, coord); 541 542 /* clamp to [0, length - 1] */ 543 icoord = lp_build_min(int_coord_bld, icoord, length_minus_one); 544 break; 545 546 case PIPE_TEX_WRAP_MIRROR_CLAMP: 547 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 548 coord = lp_build_abs(coord_bld, coord); 549 550 if (bld->static_state->normalized_coords) { 551 /* scale coord to length */ 552 coord = lp_build_mul(coord_bld, coord, length_f); 553 } 554 555 /* itrunc == ifloor here */ 556 icoord = lp_build_itrunc(coord_bld, coord); 557 558 /* clamp to [0, length - 1] */ 559 icoord = lp_build_min(int_coord_bld, icoord, length_minus_one); 560 break; 561 562 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 563 coord = lp_build_abs(coord_bld, coord); 564 565 if (bld->static_state->normalized_coords) { 566 /* scale coord to length */ 567 coord = lp_build_mul(coord_bld, coord, length_f); 568 } 569 570 /* itrunc == ifloor here */ 571 icoord = lp_build_itrunc(coord_bld, coord); 572 573 /* clamp to [0, length] */ 574 icoord = lp_build_min(int_coord_bld, icoord, length); 575 break; 576 577 default: 578 assert(0); 579 icoord = NULL; 580 } 581 582 return icoord; 583 } 584 585 586 /** 587 * Generate code to sample a mipmap level with nearest filtering. 588 * If sampling a cube texture, r = cube face in [0,5]. 589 */ 590 static void 591 lp_build_sample_image_nearest(struct lp_build_sample_context *bld, 592 unsigned unit, 593 LLVMValueRef size, 594 LLVMValueRef row_stride_vec, 595 LLVMValueRef img_stride_vec, 596 LLVMValueRef data_ptr, 597 LLVMValueRef s, 598 LLVMValueRef t, 599 LLVMValueRef r, 600 LLVMValueRef colors_out[4]) 601 { 602 const unsigned dims = bld->dims; 603 LLVMValueRef width_vec; 604 LLVMValueRef height_vec; 605 LLVMValueRef depth_vec; 606 LLVMValueRef flt_size; 607 LLVMValueRef flt_width_vec; 608 LLVMValueRef flt_height_vec; 609 LLVMValueRef flt_depth_vec; 610 LLVMValueRef x, y, z; 611 612 lp_build_extract_image_sizes(bld, 613 bld->int_size_type, 614 bld->int_coord_type, 615 size, 616 &width_vec, &height_vec, &depth_vec); 617 618 flt_size = lp_build_int_to_float(&bld->float_size_bld, size); 619 620 lp_build_extract_image_sizes(bld, 621 bld->float_size_type, 622 bld->coord_type, 623 flt_size, 624 &flt_width_vec, &flt_height_vec, &flt_depth_vec); 625 626 /* 627 * Compute integer texcoords. 628 */ 629 x = lp_build_sample_wrap_nearest(bld, s, width_vec, flt_width_vec, 630 bld->static_state->pot_width, 631 bld->static_state->wrap_s); 632 lp_build_name(x, "tex.x.wrapped"); 633 634 if (dims >= 2) { 635 y = lp_build_sample_wrap_nearest(bld, t, height_vec, flt_height_vec, 636 bld->static_state->pot_height, 637 bld->static_state->wrap_t); 638 lp_build_name(y, "tex.y.wrapped"); 639 640 if (dims == 3) { 641 z = lp_build_sample_wrap_nearest(bld, r, depth_vec, flt_depth_vec, 642 bld->static_state->pot_depth, 643 bld->static_state->wrap_r); 644 lp_build_name(z, "tex.z.wrapped"); 645 } 646 else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { 647 z = r; 648 } 649 else { 650 z = NULL; 651 } 652 } 653 else { 654 y = z = NULL; 655 } 656 657 /* 658 * Get texture colors. 659 */ 660 lp_build_sample_texel_soa(bld, unit, 661 width_vec, height_vec, depth_vec, 662 x, y, z, 663 row_stride_vec, img_stride_vec, 664 data_ptr, colors_out); 665 } 666 667 668 /** 669 * Generate code to sample a mipmap level with linear filtering. 670 * If sampling a cube texture, r = cube face in [0,5]. 671 */ 672 static void 673 lp_build_sample_image_linear(struct lp_build_sample_context *bld, 674 unsigned unit, 675 LLVMValueRef size, 676 LLVMValueRef row_stride_vec, 677 LLVMValueRef img_stride_vec, 678 LLVMValueRef data_ptr, 679 LLVMValueRef s, 680 LLVMValueRef t, 681 LLVMValueRef r, 682 LLVMValueRef colors_out[4]) 683 { 684 const unsigned dims = bld->dims; 685 LLVMValueRef width_vec; 686 LLVMValueRef height_vec; 687 LLVMValueRef depth_vec; 688 LLVMValueRef flt_size; 689 LLVMValueRef flt_width_vec; 690 LLVMValueRef flt_height_vec; 691 LLVMValueRef flt_depth_vec; 692 LLVMValueRef x0, y0, z0, x1, y1, z1; 693 LLVMValueRef s_fpart, t_fpart, r_fpart; 694 LLVMValueRef neighbors[2][2][4]; 695 int chan; 696 697 lp_build_extract_image_sizes(bld, 698 bld->int_size_type, 699 bld->int_coord_type, 700 size, 701 &width_vec, &height_vec, &depth_vec); 702 703 flt_size = lp_build_int_to_float(&bld->float_size_bld, size); 704 705 lp_build_extract_image_sizes(bld, 706 bld->float_size_type, 707 bld->coord_type, 708 flt_size, 709 &flt_width_vec, &flt_height_vec, &flt_depth_vec); 710 711 /* 712 * Compute integer texcoords. 713 */ 714 lp_build_sample_wrap_linear(bld, s, width_vec, flt_width_vec, 715 bld->static_state->pot_width, 716 bld->static_state->wrap_s, 717 &x0, &x1, &s_fpart); 718 lp_build_name(x0, "tex.x0.wrapped"); 719 lp_build_name(x1, "tex.x1.wrapped"); 720 721 if (dims >= 2) { 722 lp_build_sample_wrap_linear(bld, t, height_vec, flt_height_vec, 723 bld->static_state->pot_height, 724 bld->static_state->wrap_t, 725 &y0, &y1, &t_fpart); 726 lp_build_name(y0, "tex.y0.wrapped"); 727 lp_build_name(y1, "tex.y1.wrapped"); 728 729 if (dims == 3) { 730 lp_build_sample_wrap_linear(bld, r, depth_vec, flt_depth_vec, 731 bld->static_state->pot_depth, 732 bld->static_state->wrap_r, 733 &z0, &z1, &r_fpart); 734 lp_build_name(z0, "tex.z0.wrapped"); 735 lp_build_name(z1, "tex.z1.wrapped"); 736 } 737 else if (bld->static_state->target == PIPE_TEXTURE_CUBE) { 738 z0 = z1 = r; /* cube face */ 739 r_fpart = NULL; 740 } 741 else { 742 z0 = z1 = NULL; 743 r_fpart = NULL; 744 } 745 } 746 else { 747 y0 = y1 = t_fpart = NULL; 748 z0 = z1 = r_fpart = NULL; 749 } 750 751 /* 752 * Get texture colors. 753 */ 754 /* get x0/x1 texels */ 755 lp_build_sample_texel_soa(bld, unit, 756 width_vec, height_vec, depth_vec, 757 x0, y0, z0, 758 row_stride_vec, img_stride_vec, 759 data_ptr, neighbors[0][0]); 760 lp_build_sample_texel_soa(bld, unit, 761 width_vec, height_vec, depth_vec, 762 x1, y0, z0, 763 row_stride_vec, img_stride_vec, 764 data_ptr, neighbors[0][1]); 765 766 if (dims == 1) { 767 /* Interpolate two samples from 1D image to produce one color */ 768 for (chan = 0; chan < 4; chan++) { 769 colors_out[chan] = lp_build_lerp(&bld->texel_bld, s_fpart, 770 neighbors[0][0][chan], 771 neighbors[0][1][chan]); 772 } 773 } 774 else { 775 /* 2D/3D texture */ 776 LLVMValueRef colors0[4]; 777 778 /* get x0/x1 texels at y1 */ 779 lp_build_sample_texel_soa(bld, unit, 780 width_vec, height_vec, depth_vec, 781 x0, y1, z0, 782 row_stride_vec, img_stride_vec, 783 data_ptr, neighbors[1][0]); 784 lp_build_sample_texel_soa(bld, unit, 785 width_vec, height_vec, depth_vec, 786 x1, y1, z0, 787 row_stride_vec, img_stride_vec, 788 data_ptr, neighbors[1][1]); 789 790 /* Bilinear interpolate the four samples from the 2D image / 3D slice */ 791 for (chan = 0; chan < 4; chan++) { 792 colors0[chan] = lp_build_lerp_2d(&bld->texel_bld, 793 s_fpart, t_fpart, 794 neighbors[0][0][chan], 795 neighbors[0][1][chan], 796 neighbors[1][0][chan], 797 neighbors[1][1][chan]); 798 } 799 800 if (dims == 3) { 801 LLVMValueRef neighbors1[2][2][4]; 802 LLVMValueRef colors1[4]; 803 804 /* get x0/x1/y0/y1 texels at z1 */ 805 lp_build_sample_texel_soa(bld, unit, 806 width_vec, height_vec, depth_vec, 807 x0, y0, z1, 808 row_stride_vec, img_stride_vec, 809 data_ptr, neighbors1[0][0]); 810 lp_build_sample_texel_soa(bld, unit, 811 width_vec, height_vec, depth_vec, 812 x1, y0, z1, 813 row_stride_vec, img_stride_vec, 814 data_ptr, neighbors1[0][1]); 815 lp_build_sample_texel_soa(bld, unit, 816 width_vec, height_vec, depth_vec, 817 x0, y1, z1, 818 row_stride_vec, img_stride_vec, 819 data_ptr, neighbors1[1][0]); 820 lp_build_sample_texel_soa(bld, unit, 821 width_vec, height_vec, depth_vec, 822 x1, y1, z1, 823 row_stride_vec, img_stride_vec, 824 data_ptr, neighbors1[1][1]); 825 826 /* Bilinear interpolate the four samples from the second Z slice */ 827 for (chan = 0; chan < 4; chan++) { 828 colors1[chan] = lp_build_lerp_2d(&bld->texel_bld, 829 s_fpart, t_fpart, 830 neighbors1[0][0][chan], 831 neighbors1[0][1][chan], 832 neighbors1[1][0][chan], 833 neighbors1[1][1][chan]); 834 } 835 836 /* Linearly interpolate the two samples from the two 3D slices */ 837 for (chan = 0; chan < 4; chan++) { 838 colors_out[chan] = lp_build_lerp(&bld->texel_bld, 839 r_fpart, 840 colors0[chan], colors1[chan]); 841 } 842 } 843 else { 844 /* 2D tex */ 845 for (chan = 0; chan < 4; chan++) { 846 colors_out[chan] = colors0[chan]; 847 } 848 } 849 } 850 } 851 852 853 /** 854 * Sample the texture/mipmap using given image filter and mip filter. 855 * data0_ptr and data1_ptr point to the two mipmap levels to sample 856 * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes. 857 * If we're using nearest miplevel sampling the '1' values will be null/unused. 858 */ 859 static void 860 lp_build_sample_mipmap(struct lp_build_sample_context *bld, 861 unsigned unit, 862 unsigned img_filter, 863 unsigned mip_filter, 864 LLVMValueRef s, 865 LLVMValueRef t, 866 LLVMValueRef r, 867 LLVMValueRef ilevel0, 868 LLVMValueRef ilevel1, 869 LLVMValueRef lod_fpart, 870 LLVMValueRef *colors_out) 871 { 872 LLVMBuilderRef builder = bld->gallivm->builder; 873 LLVMValueRef size0 = NULL; 874 LLVMValueRef size1 = NULL; 875 LLVMValueRef row_stride0_vec = NULL; 876 LLVMValueRef row_stride1_vec = NULL; 877 LLVMValueRef img_stride0_vec = NULL; 878 LLVMValueRef img_stride1_vec = NULL; 879 LLVMValueRef data_ptr0 = NULL; 880 LLVMValueRef data_ptr1 = NULL; 881 LLVMValueRef colors0[4], colors1[4]; 882 unsigned chan; 883 884 /* sample the first mipmap level */ 885 lp_build_mipmap_level_sizes(bld, ilevel0, 886 &size0, 887 &row_stride0_vec, &img_stride0_vec); 888 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); 889 if (img_filter == PIPE_TEX_FILTER_NEAREST) { 890 lp_build_sample_image_nearest(bld, unit, 891 size0, 892 row_stride0_vec, img_stride0_vec, 893 data_ptr0, s, t, r, 894 colors0); 895 } 896 else { 897 assert(img_filter == PIPE_TEX_FILTER_LINEAR); 898 lp_build_sample_image_linear(bld, unit, 899 size0, 900 row_stride0_vec, img_stride0_vec, 901 data_ptr0, s, t, r, 902 colors0); 903 } 904 905 /* Store the first level's colors in the output variables */ 906 for (chan = 0; chan < 4; chan++) { 907 LLVMBuildStore(builder, colors0[chan], colors_out[chan]); 908 } 909 910 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { 911 struct lp_build_if_state if_ctx; 912 LLVMValueRef need_lerp; 913 unsigned num_quads = bld->coord_bld.type.length / 4; 914 915 /* need_lerp = lod_fpart > 0 */ 916 if (num_quads == 1) { 917 need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT, 918 lod_fpart, bld->perquadf_bld.zero, 919 "need_lerp"); 920 } 921 else { 922 /* 923 * We'll do mip filtering if any of the quads need it. 924 * It might be better to split the vectors here and only fetch/filter 925 * quads which need it. 926 */ 927 /* 928 * We unfortunately need to clamp lod_fpart here since we can get 929 * negative values which would screw up filtering if not all 930 * lod_fpart values have same sign. 931 */ 932 lod_fpart = lp_build_max(&bld->perquadf_bld, lod_fpart, 933 bld->perquadf_bld.zero); 934 need_lerp = lp_build_compare(bld->gallivm, bld->perquadf_bld.type, 935 PIPE_FUNC_GREATER, 936 lod_fpart, bld->perquadf_bld.zero); 937 need_lerp = lp_build_any_true_range(&bld->perquadi_bld, num_quads, need_lerp); 938 } 939 940 lp_build_if(&if_ctx, bld->gallivm, need_lerp); 941 { 942 /* sample the second mipmap level */ 943 lp_build_mipmap_level_sizes(bld, ilevel1, 944 &size1, 945 &row_stride1_vec, &img_stride1_vec); 946 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); 947 if (img_filter == PIPE_TEX_FILTER_NEAREST) { 948 lp_build_sample_image_nearest(bld, unit, 949 size1, 950 row_stride1_vec, img_stride1_vec, 951 data_ptr1, s, t, r, 952 colors1); 953 } 954 else { 955 lp_build_sample_image_linear(bld, unit, 956 size1, 957 row_stride1_vec, img_stride1_vec, 958 data_ptr1, s, t, r, 959 colors1); 960 } 961 962 /* interpolate samples from the two mipmap levels */ 963 964 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm, 965 bld->perquadf_bld.type, 966 bld->texel_bld.type, 967 lod_fpart); 968 969 for (chan = 0; chan < 4; chan++) { 970 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, 971 colors0[chan], colors1[chan]); 972 LLVMBuildStore(builder, colors0[chan], colors_out[chan]); 973 } 974 } 975 lp_build_endif(&if_ctx); 976 } 977 } 978 979 /** 980 * Calculate cube face, lod, mip levels. 981 */ 982 static void 983 lp_build_sample_common(struct lp_build_sample_context *bld, 984 unsigned unit, 985 LLVMValueRef *s, 986 LLVMValueRef *t, 987 LLVMValueRef *r, 988 const struct lp_derivatives *derivs, 989 LLVMValueRef lod_bias, /* optional */ 990 LLVMValueRef explicit_lod, /* optional */ 991 LLVMValueRef *lod_ipart, 992 LLVMValueRef *lod_fpart, 993 LLVMValueRef *ilevel0, 994 LLVMValueRef *ilevel1) 995 { 996 const unsigned mip_filter = bld->static_state->min_mip_filter; 997 const unsigned min_filter = bld->static_state->min_img_filter; 998 const unsigned mag_filter = bld->static_state->mag_img_filter; 999 LLVMValueRef first_level; 1000 struct lp_derivatives face_derivs; 1001 1002 /* 1003 printf("%s mip %d min %d mag %d\n", __FUNCTION__, 1004 mip_filter, min_filter, mag_filter); 1005 */ 1006 1007 /* 1008 * Choose cube face, recompute texcoords and derivatives for the chosen face. 1009 */ 1010 if (bld->static_state->target == PIPE_TEXTURE_CUBE) { 1011 LLVMValueRef face, face_s, face_t; 1012 lp_build_cube_lookup(bld, *s, *t, *r, &face, &face_s, &face_t); 1013 *s = face_s; /* vec */ 1014 *t = face_t; /* vec */ 1015 /* use 'r' to indicate cube face */ 1016 *r = face; /* vec */ 1017 1018 /* recompute ddx, ddy using the new (s,t) face texcoords */ 1019 face_derivs.ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(&bld->coord_bld, *s, *t); 1020 face_derivs.ddx_ddy[1] = NULL; 1021 derivs = &face_derivs; 1022 } 1023 1024 /* 1025 * Compute the level of detail (float). 1026 */ 1027 if (min_filter != mag_filter || 1028 mip_filter != PIPE_TEX_MIPFILTER_NONE) { 1029 /* Need to compute lod either to choose mipmap levels or to 1030 * distinguish between minification/magnification with one mipmap level. 1031 */ 1032 lp_build_lod_selector(bld, unit, derivs, 1033 lod_bias, explicit_lod, 1034 mip_filter, 1035 lod_ipart, lod_fpart); 1036 } else { 1037 *lod_ipart = bld->perquadi_bld.zero; 1038 } 1039 1040 /* 1041 * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1 1042 */ 1043 switch (mip_filter) { 1044 default: 1045 assert(0 && "bad mip_filter value in lp_build_sample_soa()"); 1046 /* fall-through */ 1047 case PIPE_TEX_MIPFILTER_NONE: 1048 /* always use mip level 0 */ 1049 if (bld->static_state->target == PIPE_TEXTURE_CUBE) { 1050 /* XXX this is a work-around for an apparent bug in LLVM 2.7. 1051 * We should be able to set ilevel0 = const(0) but that causes 1052 * bad x86 code to be emitted. 1053 * XXX should probably disable that on other llvm versions. 1054 */ 1055 assert(*lod_ipart); 1056 lp_build_nearest_mip_level(bld, unit, *lod_ipart, ilevel0); 1057 } 1058 else { 1059 first_level = bld->dynamic_state->first_level(bld->dynamic_state, 1060 bld->gallivm, unit); 1061 first_level = lp_build_broadcast_scalar(&bld->perquadi_bld, first_level); 1062 *ilevel0 = first_level; 1063 } 1064 break; 1065 case PIPE_TEX_MIPFILTER_NEAREST: 1066 assert(*lod_ipart); 1067 lp_build_nearest_mip_level(bld, unit, *lod_ipart, ilevel0); 1068 break; 1069 case PIPE_TEX_MIPFILTER_LINEAR: 1070 assert(*lod_ipart); 1071 assert(*lod_fpart); 1072 lp_build_linear_mip_levels(bld, unit, 1073 *lod_ipart, lod_fpart, 1074 ilevel0, ilevel1); 1075 break; 1076 } 1077 } 1078 1079 /** 1080 * General texture sampling codegen. 1081 * This function handles texture sampling for all texture targets (1D, 1082 * 2D, 3D, cube) and all filtering modes. 1083 */ 1084 static void 1085 lp_build_sample_general(struct lp_build_sample_context *bld, 1086 unsigned unit, 1087 LLVMValueRef s, 1088 LLVMValueRef t, 1089 LLVMValueRef r, 1090 LLVMValueRef lod_ipart, 1091 LLVMValueRef lod_fpart, 1092 LLVMValueRef ilevel0, 1093 LLVMValueRef ilevel1, 1094 LLVMValueRef *colors_out) 1095 { 1096 struct lp_build_context *int_bld = &bld->int_bld; 1097 LLVMBuilderRef builder = bld->gallivm->builder; 1098 const unsigned mip_filter = bld->static_state->min_mip_filter; 1099 const unsigned min_filter = bld->static_state->min_img_filter; 1100 const unsigned mag_filter = bld->static_state->mag_img_filter; 1101 LLVMValueRef texels[4]; 1102 unsigned chan; 1103 1104 /* 1105 * Get/interpolate texture colors. 1106 */ 1107 1108 for (chan = 0; chan < 4; ++chan) { 1109 texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, ""); 1110 lp_build_name(texels[chan], "sampler%u_texel_%c_var", unit, "xyzw"[chan]); 1111 } 1112 1113 if (min_filter == mag_filter) { 1114 /* no need to distinguish between minification and magnification */ 1115 lp_build_sample_mipmap(bld, unit, 1116 min_filter, mip_filter, 1117 s, t, r, 1118 ilevel0, ilevel1, lod_fpart, 1119 texels); 1120 } 1121 else { 1122 /* Emit conditional to choose min image filter or mag image filter 1123 * depending on the lod being > 0 or <= 0, respectively. 1124 */ 1125 struct lp_build_if_state if_ctx; 1126 LLVMValueRef minify; 1127 1128 /* minify = lod >= 0.0 */ 1129 minify = LLVMBuildICmp(builder, LLVMIntSGE, 1130 lod_ipart, int_bld->zero, ""); 1131 1132 lp_build_if(&if_ctx, bld->gallivm, minify); 1133 { 1134 /* Use the minification filter */ 1135 lp_build_sample_mipmap(bld, unit, 1136 min_filter, mip_filter, 1137 s, t, r, 1138 ilevel0, ilevel1, lod_fpart, 1139 texels); 1140 } 1141 lp_build_else(&if_ctx); 1142 { 1143 /* Use the magnification filter */ 1144 lp_build_sample_mipmap(bld, unit, 1145 mag_filter, PIPE_TEX_MIPFILTER_NONE, 1146 s, t, r, 1147 ilevel0, NULL, NULL, 1148 texels); 1149 } 1150 lp_build_endif(&if_ctx); 1151 } 1152 1153 for (chan = 0; chan < 4; ++chan) { 1154 colors_out[chan] = LLVMBuildLoad(builder, texels[chan], ""); 1155 lp_build_name(colors_out[chan], "sampler%u_texel_%c", unit, "xyzw"[chan]); 1156 } 1157 } 1158 1159 1160 /** 1161 * Do shadow test/comparison. 1162 * \param p the texcoord Z (aka R, aka P) component 1163 * \param texel the texel to compare against (use the X channel) 1164 */ 1165 static void 1166 lp_build_sample_compare(struct lp_build_sample_context *bld, 1167 LLVMValueRef p, 1168 LLVMValueRef texel[4]) 1169 { 1170 struct lp_build_context *texel_bld = &bld->texel_bld; 1171 LLVMBuilderRef builder = bld->gallivm->builder; 1172 LLVMValueRef res; 1173 const unsigned chan = 0; 1174 1175 if (bld->static_state->compare_mode == PIPE_TEX_COMPARE_NONE) 1176 return; 1177 1178 /* debug code */ 1179 if (0) { 1180 LLVMValueRef indx = lp_build_const_int32(bld->gallivm, 0); 1181 LLVMValueRef coord = LLVMBuildExtractElement(builder, p, indx, ""); 1182 LLVMValueRef tex = LLVMBuildExtractElement(builder, texel[chan], indx, ""); 1183 lp_build_printf(bld->gallivm, "shadow compare coord %f to texture %f\n", 1184 coord, tex); 1185 } 1186 1187 /* Clamp p coords to [0,1] */ 1188 p = lp_build_clamp(&bld->coord_bld, p, 1189 bld->coord_bld.zero, 1190 bld->coord_bld.one); 1191 1192 /* result = (p FUNC texel) ? 1 : 0 */ 1193 res = lp_build_cmp(texel_bld, bld->static_state->compare_func, 1194 p, texel[chan]); 1195 res = lp_build_select(texel_bld, res, texel_bld->one, texel_bld->zero); 1196 1197 /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */ 1198 texel[0] = 1199 texel[1] = 1200 texel[2] = res; 1201 texel[3] = texel_bld->one; 1202 } 1203 1204 1205 /** 1206 * Just set texels to white instead of actually sampling the texture. 1207 * For debugging. 1208 */ 1209 void 1210 lp_build_sample_nop(struct gallivm_state *gallivm, 1211 struct lp_type type, 1212 unsigned num_coords, 1213 const LLVMValueRef *coords, 1214 LLVMValueRef texel_out[4]) 1215 { 1216 LLVMValueRef one = lp_build_one(gallivm, type); 1217 unsigned chan; 1218 1219 for (chan = 0; chan < 4; chan++) { 1220 texel_out[chan] = one; 1221 } 1222 } 1223 1224 1225 /** 1226 * Build texture sampling code. 1227 * 'texel' will return a vector of four LLVMValueRefs corresponding to 1228 * R, G, B, A. 1229 * \param type vector float type to use for coords, etc. 1230 * \param derivs partial derivatives of (s,t,r,q) with respect to x and y 1231 */ 1232 void 1233 lp_build_sample_soa(struct gallivm_state *gallivm, 1234 const struct lp_sampler_static_state *static_state, 1235 struct lp_sampler_dynamic_state *dynamic_state, 1236 struct lp_type type, 1237 unsigned unit, 1238 unsigned num_coords, 1239 const LLVMValueRef *coords, 1240 const struct lp_derivatives *derivs, 1241 LLVMValueRef lod_bias, /* optional */ 1242 LLVMValueRef explicit_lod, /* optional */ 1243 LLVMValueRef texel_out[4]) 1244 { 1245 unsigned dims = texture_dims(static_state->target); 1246 struct lp_build_sample_context bld; 1247 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 1248 LLVMBuilderRef builder = gallivm->builder; 1249 LLVMValueRef tex_width, tex_height, tex_depth; 1250 LLVMValueRef s; 1251 LLVMValueRef t; 1252 LLVMValueRef r; 1253 1254 if (0) { 1255 enum pipe_format fmt = static_state->format; 1256 debug_printf("Sample from %s\n", util_format_name(fmt)); 1257 } 1258 1259 assert(type.floating); 1260 1261 /* Setup our build context */ 1262 memset(&bld, 0, sizeof bld); 1263 bld.gallivm = gallivm; 1264 bld.static_state = static_state; 1265 bld.dynamic_state = dynamic_state; 1266 bld.format_desc = util_format_description(static_state->format); 1267 bld.dims = dims; 1268 1269 bld.vector_width = lp_type_width(type); 1270 1271 bld.float_type = lp_type_float(32); 1272 bld.int_type = lp_type_int(32); 1273 bld.coord_type = type; 1274 bld.int_coord_type = lp_int_type(type); 1275 bld.float_size_type = lp_type_float(32); 1276 bld.float_size_type.length = dims > 1 ? 4 : 1; 1277 bld.int_size_type = lp_int_type(bld.float_size_type); 1278 bld.texel_type = type; 1279 bld.perquadf_type = type; 1280 /* we want native vector size to be able to use our intrinsics */ 1281 bld.perquadf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1; 1282 bld.perquadi_type = lp_int_type(bld.perquadf_type); 1283 1284 lp_build_context_init(&bld.float_bld, gallivm, bld.float_type); 1285 lp_build_context_init(&bld.float_vec_bld, gallivm, type); 1286 lp_build_context_init(&bld.int_bld, gallivm, bld.int_type); 1287 lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type); 1288 lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type); 1289 lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type); 1290 lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type); 1291 lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type); 1292 lp_build_context_init(&bld.perquadf_bld, gallivm, bld.perquadf_type); 1293 lp_build_context_init(&bld.perquadi_bld, gallivm, bld.perquadi_type); 1294 1295 /* Get the dynamic state */ 1296 tex_width = dynamic_state->width(dynamic_state, gallivm, unit); 1297 tex_height = dynamic_state->height(dynamic_state, gallivm, unit); 1298 tex_depth = dynamic_state->depth(dynamic_state, gallivm, unit); 1299 bld.row_stride_array = dynamic_state->row_stride(dynamic_state, gallivm, unit); 1300 bld.img_stride_array = dynamic_state->img_stride(dynamic_state, gallivm, unit); 1301 bld.data_array = dynamic_state->data_ptr(dynamic_state, gallivm, unit); 1302 /* Note that data_array is an array[level] of pointers to texture images */ 1303 1304 s = coords[0]; 1305 t = coords[1]; 1306 r = coords[2]; 1307 1308 /* width, height, depth as single int vector */ 1309 if (dims <= 1) { 1310 bld.int_size = tex_width; 1311 } 1312 else { 1313 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_bld.undef, 1314 tex_width, LLVMConstInt(i32t, 0, 0), ""); 1315 if (dims >= 2) { 1316 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size, 1317 tex_height, LLVMConstInt(i32t, 1, 0), ""); 1318 if (dims >= 3) { 1319 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size, 1320 tex_depth, LLVMConstInt(i32t, 2, 0), ""); 1321 } 1322 } 1323 } 1324 1325 if (0) { 1326 /* For debug: no-op texture sampling */ 1327 lp_build_sample_nop(gallivm, 1328 bld.texel_type, 1329 num_coords, 1330 coords, 1331 texel_out); 1332 } 1333 else { 1334 LLVMValueRef lod_ipart = NULL, lod_fpart = NULL; 1335 LLVMValueRef ilevel0 = NULL, ilevel1 = NULL; 1336 unsigned num_quads = type.length / 4; 1337 const unsigned mip_filter = bld.static_state->min_mip_filter; 1338 boolean use_aos = util_format_fits_8unorm(bld.format_desc) && 1339 lp_is_simple_wrap_mode(static_state->wrap_s) && 1340 lp_is_simple_wrap_mode(static_state->wrap_t); 1341 1342 if ((gallivm_debug & GALLIVM_DEBUG_PERF) && 1343 !use_aos && util_format_fits_8unorm(bld.format_desc)) { 1344 debug_printf("%s: using floating point linear filtering for %s\n", 1345 __FUNCTION__, bld.format_desc->short_name); 1346 debug_printf(" min_img %d mag_img %d mip %d wraps %d wrapt %d\n", 1347 static_state->min_img_filter, 1348 static_state->mag_img_filter, 1349 static_state->min_mip_filter, 1350 static_state->wrap_s, 1351 static_state->wrap_t); 1352 } 1353 1354 lp_build_sample_common(&bld, unit, 1355 &s, &t, &r, 1356 derivs, lod_bias, explicit_lod, 1357 &lod_ipart, &lod_fpart, 1358 &ilevel0, &ilevel1); 1359 1360 /* 1361 * we only try 8-wide sampling with soa as it appears to 1362 * be a loss with aos with AVX. 1363 */ 1364 if (num_quads == 1 || (mip_filter == PIPE_TEX_MIPFILTER_NONE && 1365 !use_aos)) { 1366 1367 if (num_quads > 1) { 1368 LLVMValueRef index0 = lp_build_const_int32(gallivm, 0); 1369 /* These parameters are the same for all quads */ 1370 lod_ipart = LLVMBuildExtractElement(builder, lod_ipart, index0, ""); 1371 ilevel0 = LLVMBuildExtractElement(builder, ilevel0, index0, ""); 1372 } 1373 if (use_aos) { 1374 /* do sampling/filtering with fixed pt arithmetic */ 1375 lp_build_sample_aos(&bld, unit, 1376 s, t, r, 1377 lod_ipart, lod_fpart, 1378 ilevel0, ilevel1, 1379 texel_out); 1380 } 1381 1382 else { 1383 lp_build_sample_general(&bld, unit, 1384 s, t, r, 1385 lod_ipart, lod_fpart, 1386 ilevel0, ilevel1, 1387 texel_out); 1388 } 1389 } 1390 else { 1391 struct lp_build_if_state if_ctx; 1392 LLVMValueRef notsame_levels, notsame; 1393 LLVMValueRef index0 = lp_build_const_int32(gallivm, 0); 1394 LLVMValueRef texels[4]; 1395 LLVMValueRef texelout[4]; 1396 unsigned j; 1397 1398 texels[0] = lp_build_alloca(gallivm, bld.texel_bld.vec_type, "texr"); 1399 texels[1] = lp_build_alloca(gallivm, bld.texel_bld.vec_type, "texg"); 1400 texels[2] = lp_build_alloca(gallivm, bld.texel_bld.vec_type, "texb"); 1401 texels[3] = lp_build_alloca(gallivm, bld.texel_bld.vec_type, "texa"); 1402 1403 /* only build the if if we MAY split, otherwise always split */ 1404 if (!use_aos) { 1405 notsame = lp_build_extract_broadcast(gallivm, 1406 bld.perquadi_bld.type, 1407 bld.perquadi_bld.type, 1408 ilevel0, index0); 1409 notsame = lp_build_sub(&bld.perquadi_bld, ilevel0, notsame); 1410 notsame_levels = lp_build_any_true_range(&bld.perquadi_bld, num_quads, 1411 notsame); 1412 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { 1413 notsame = lp_build_extract_broadcast(gallivm, 1414 bld.perquadi_bld.type, 1415 bld.perquadi_bld.type, 1416 ilevel1, index0); 1417 notsame = lp_build_sub(&bld.perquadi_bld, ilevel1, notsame); 1418 notsame = lp_build_any_true_range(&bld.perquadi_bld, num_quads, notsame); 1419 notsame_levels = LLVMBuildOr(builder, notsame_levels, notsame, ""); 1420 } 1421 lp_build_if(&if_ctx, gallivm, notsame_levels); 1422 } 1423 1424 { 1425 struct lp_build_sample_context bld4; 1426 struct lp_type type4 = type; 1427 unsigned i; 1428 LLVMValueRef texelout4[4]; 1429 LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16]; 1430 1431 type4.length = 4; 1432 1433 /* Setup our build context */ 1434 memset(&bld4, 0, sizeof bld4); 1435 bld4.gallivm = bld.gallivm; 1436 bld4.static_state = bld.static_state; 1437 bld4.dynamic_state = bld.dynamic_state; 1438 bld4.format_desc = bld.format_desc; 1439 bld4.dims = bld.dims; 1440 bld4.row_stride_array = bld.row_stride_array; 1441 bld4.img_stride_array = bld.img_stride_array; 1442 bld4.data_array = bld.data_array; 1443 bld4.int_size = bld.int_size; 1444 1445 bld4.vector_width = lp_type_width(type4); 1446 1447 bld4.float_type = lp_type_float(32); 1448 bld4.int_type = lp_type_int(32); 1449 bld4.coord_type = type4; 1450 bld4.int_coord_type = lp_int_type(type4); 1451 bld4.float_size_type = lp_type_float(32); 1452 bld4.float_size_type.length = dims > 1 ? 4 : 1; 1453 bld4.int_size_type = lp_int_type(bld4.float_size_type); 1454 bld4.texel_type = type4; 1455 bld4.perquadf_type = type4; 1456 /* we want native vector size to be able to use our intrinsics */ 1457 bld4.perquadf_type.length = 1; 1458 bld4.perquadi_type = lp_int_type(bld4.perquadf_type); 1459 1460 lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type); 1461 lp_build_context_init(&bld4.float_vec_bld, gallivm, type4); 1462 lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type); 1463 lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type); 1464 lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type); 1465 lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type); 1466 lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type); 1467 lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type); 1468 lp_build_context_init(&bld4.perquadf_bld, gallivm, bld4.perquadf_type); 1469 lp_build_context_init(&bld4.perquadi_bld, gallivm, bld4.perquadi_type); 1470 1471 for (i = 0; i < num_quads; i++) { 1472 LLVMValueRef s4, t4, r4; 1473 LLVMValueRef lod_iparts, lod_fparts = NULL; 1474 LLVMValueRef ilevel0s, ilevel1s = NULL; 1475 LLVMValueRef indexi = lp_build_const_int32(gallivm, i); 1476 1477 s4 = lp_build_extract_range(gallivm, s, 4*i, 4); 1478 t4 = lp_build_extract_range(gallivm, t, 4*i, 4); 1479 r4 = lp_build_extract_range(gallivm, r, 4*i, 4); 1480 lod_iparts = LLVMBuildExtractElement(builder, lod_ipart, indexi, ""); 1481 ilevel0s = LLVMBuildExtractElement(builder, ilevel0, indexi, ""); 1482 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { 1483 ilevel1s = LLVMBuildExtractElement(builder, ilevel1, indexi, ""); 1484 lod_fparts = LLVMBuildExtractElement(builder, lod_fpart, indexi, ""); 1485 } 1486 1487 if (use_aos) { 1488 /* do sampling/filtering with fixed pt arithmetic */ 1489 lp_build_sample_aos(&bld4, unit, 1490 s4, t4, r4, 1491 lod_iparts, lod_fparts, 1492 ilevel0s, ilevel1s, 1493 texelout4); 1494 } 1495 1496 else { 1497 lp_build_sample_general(&bld4, unit, 1498 s4, t4, r4, 1499 lod_iparts, lod_fparts, 1500 ilevel0s, ilevel1s, 1501 texelout4); 1502 } 1503 for (j = 0; j < 4; j++) { 1504 texelouttmp[j][i] = texelout4[j]; 1505 } 1506 } 1507 for (j = 0; j < 4; j++) { 1508 texelout[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads); 1509 LLVMBuildStore(builder, texelout[j], texels[j]); 1510 } 1511 } 1512 if (!use_aos) { 1513 LLVMValueRef ilevel0s, lod_iparts, ilevel1s = NULL; 1514 1515 lp_build_else(&if_ctx); 1516 1517 /* These parameters are the same for all quads */ 1518 lod_iparts = LLVMBuildExtractElement(builder, lod_ipart, index0, ""); 1519 ilevel0s = LLVMBuildExtractElement(builder, ilevel0, index0, ""); 1520 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { 1521 ilevel1s = LLVMBuildExtractElement(builder, ilevel1, index0, ""); 1522 } 1523 1524 if (use_aos) { 1525 /* do sampling/filtering with fixed pt arithmetic */ 1526 lp_build_sample_aos(&bld, unit, 1527 s, t, r, 1528 lod_iparts, lod_fpart, 1529 ilevel0s, ilevel1s, 1530 texelout); 1531 } 1532 1533 else { 1534 lp_build_sample_general(&bld, unit, 1535 s, t, r, 1536 lod_iparts, lod_fpart, 1537 ilevel0s, ilevel1s, 1538 texelout); 1539 } 1540 for (j = 0; j < 4; j++) { 1541 LLVMBuildStore(builder, texelout[j], texels[j]); 1542 } 1543 1544 lp_build_endif(&if_ctx); 1545 } 1546 1547 for (j = 0; j < 4; j++) { 1548 texel_out[j] = LLVMBuildLoad(builder, texels[j], ""); 1549 } 1550 } 1551 } 1552 1553 lp_build_sample_compare(&bld, r, texel_out); 1554 1555 apply_sampler_swizzle(&bld, texel_out); 1556 } 1557 1558 void 1559 lp_build_size_query_soa(struct gallivm_state *gallivm, 1560 const struct lp_sampler_static_state *static_state, 1561 struct lp_sampler_dynamic_state *dynamic_state, 1562 struct lp_type int_type, 1563 unsigned unit, 1564 LLVMValueRef explicit_lod, 1565 LLVMValueRef *sizes_out) 1566 { 1567 LLVMValueRef lod; 1568 LLVMValueRef size; 1569 int dims, i; 1570 struct lp_build_context bld_int_vec; 1571 1572 switch (static_state->target) { 1573 case PIPE_TEXTURE_1D: 1574 case PIPE_BUFFER: 1575 dims = 1; 1576 break; 1577 case PIPE_TEXTURE_2D: 1578 case PIPE_TEXTURE_CUBE: 1579 case PIPE_TEXTURE_RECT: 1580 dims = 2; 1581 break; 1582 case PIPE_TEXTURE_3D: 1583 dims = 3; 1584 break; 1585 1586 default: 1587 assert(0); 1588 return; 1589 } 1590 1591 assert(!int_type.floating); 1592 1593 lp_build_context_init(&bld_int_vec, gallivm, lp_type_int_vec(32, 128)); 1594 1595 if (explicit_lod) { 1596 LLVMValueRef first_level; 1597 lod = LLVMBuildExtractElement(gallivm->builder, explicit_lod, lp_build_const_int32(gallivm, 0), ""); 1598 first_level = dynamic_state->first_level(dynamic_state, gallivm, unit); 1599 lod = lp_build_broadcast_scalar(&bld_int_vec, 1600 LLVMBuildAdd(gallivm->builder, lod, first_level, "lod")); 1601 1602 } else { 1603 lod = bld_int_vec.zero; 1604 } 1605 1606 size = bld_int_vec.undef; 1607 1608 size = LLVMBuildInsertElement(gallivm->builder, size, 1609 dynamic_state->width(dynamic_state, gallivm, unit), 1610 lp_build_const_int32(gallivm, 0), ""); 1611 1612 if (dims >= 2) { 1613 size = LLVMBuildInsertElement(gallivm->builder, size, 1614 dynamic_state->height(dynamic_state, gallivm, unit), 1615 lp_build_const_int32(gallivm, 1), ""); 1616 } 1617 1618 if (dims >= 3) { 1619 size = LLVMBuildInsertElement(gallivm->builder, size, 1620 dynamic_state->depth(dynamic_state, gallivm, unit), 1621 lp_build_const_int32(gallivm, 2), ""); 1622 } 1623 1624 size = lp_build_minify(&bld_int_vec, size, lod); 1625 1626 for (i=0; i < dims; i++) { 1627 sizes_out[i] = lp_build_extract_broadcast(gallivm, bld_int_vec.type, int_type, 1628 size, 1629 lp_build_const_int32(gallivm, i)); 1630 } 1631 } 1632