1 /************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29 /** 30 * @file 31 * Position and shader input interpolation. 32 * 33 * @author Jose Fonseca <jfonseca (at) vmware.com> 34 */ 35 36 #include "pipe/p_shader_tokens.h" 37 #include "util/u_debug.h" 38 #include "util/u_memory.h" 39 #include "util/u_math.h" 40 #include "tgsi/tgsi_scan.h" 41 #include "gallivm/lp_bld_debug.h" 42 #include "gallivm/lp_bld_const.h" 43 #include "gallivm/lp_bld_arit.h" 44 #include "gallivm/lp_bld_swizzle.h" 45 #include "gallivm/lp_bld_flow.h" 46 #include "lp_bld_interp.h" 47 48 49 /* 50 * The shader JIT function operates on blocks of quads. 51 * Each block has 2x2 quads and each quad has 2x2 pixels. 52 * 53 * We iterate over the quads in order 0, 1, 2, 3: 54 * 55 * ################# 56 * # | # | # 57 * #---0---#---1---# 58 * # | # | # 59 * ################# 60 * # | # | # 61 * #---2---#---3---# 62 * # | # | # 63 * ################# 64 * 65 * If we iterate over multiple quads at once, quads 01 and 23 are processed 66 * together. 67 * 68 * Within each quad, we have four pixels which are represented in SOA 69 * order: 70 * 71 * ######### 72 * # 0 | 1 # 73 * #---+---# 74 * # 2 | 3 # 75 * ######### 76 * 77 * So the green channel (for example) of the four pixels is stored in 78 * a single vector register: {g0, g1, g2, g3}. 79 * The order stays the same even with multiple quads: 80 * 0 1 4 5 81 * 2 3 6 7 82 * is stored as g0..g7 83 */ 84 85 86 /** 87 * Do one perspective divide per quad. 88 * 89 * For perspective interpolation, the final attribute value is given 90 * 91 * a' = a/w = a * oow 92 * 93 * where 94 * 95 * a = a0 + dadx*x + dady*y 96 * w = w0 + dwdx*x + dwdy*y 97 * oow = 1/w = 1/(w0 + dwdx*x + dwdy*y) 98 * 99 * Instead of computing the division per pixel, with this macro we compute the 100 * division on the upper left pixel of each quad, and use a linear 101 * approximation in the remaining pixels, given by: 102 * 103 * da'dx = (dadx - dwdx*a)*oow 104 * da'dy = (dady - dwdy*a)*oow 105 * 106 * Ironically, this actually makes things slower -- probably because the 107 * divide hardware unit is rarely used, whereas the multiply unit is typically 108 * already saturated. 109 */ 110 #define PERSPECTIVE_DIVIDE_PER_QUAD 0 111 112 113 static const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3}; 114 static const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3}; 115 116 117 static void 118 attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix) 119 { 120 if(attrib == 0) 121 lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix); 122 else 123 lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix); 124 } 125 126 static void 127 calc_offsets(struct lp_build_context *coeff_bld, 128 unsigned quad_start_index, 129 LLVMValueRef *pixoffx, 130 LLVMValueRef *pixoffy) 131 { 132 unsigned i; 133 unsigned num_pix = coeff_bld->type.length; 134 struct gallivm_state *gallivm = coeff_bld->gallivm; 135 LLVMBuilderRef builder = coeff_bld->gallivm->builder; 136 LLVMValueRef nr, pixxf, pixyf; 137 138 *pixoffx = coeff_bld->undef; 139 *pixoffy = coeff_bld->undef; 140 141 for (i = 0; i < num_pix; i++) { 142 nr = lp_build_const_int32(gallivm, i); 143 pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] + 144 (quad_start_index & 1) * 2); 145 pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] + 146 (quad_start_index & 2)); 147 *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, ""); 148 *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, ""); 149 } 150 } 151 152 153 /* Much easier, and significantly less instructions in the per-stamp 154 * part (less than half) but overall more instructions so a loss if 155 * most quads are active. Might be a win though with larger vectors. 156 * No ability to do per-quad divide (doable but not implemented) 157 * Could be made to work with passed in pixel offsets (i.e. active quad merging). 158 */ 159 static void 160 coeffs_init_simple(struct lp_build_interp_soa_context *bld, 161 LLVMValueRef a0_ptr, 162 LLVMValueRef dadx_ptr, 163 LLVMValueRef dady_ptr) 164 { 165 struct lp_build_context *coeff_bld = &bld->coeff_bld; 166 struct lp_build_context *setup_bld = &bld->setup_bld; 167 struct gallivm_state *gallivm = coeff_bld->gallivm; 168 LLVMBuilderRef builder = gallivm->builder; 169 unsigned attrib; 170 171 for (attrib = 0; attrib < bld->num_attribs; ++attrib) { 172 /* 173 * always fetch all 4 values for performance/simplicity 174 * Note: we do that here because it seems to generate better 175 * code. It generates a lot of moves initially but less 176 * moves later. As far as I can tell this looks like a 177 * llvm issue, instead of simply reloading the values from 178 * the passed in pointers it if it runs out of registers 179 * it spills/reloads them. Maybe some optimization passes 180 * would help. 181 * Might want to investigate this again later. 182 */ 183 const unsigned interp = bld->interp[attrib]; 184 LLVMValueRef index = lp_build_const_int32(gallivm, 185 attrib * TGSI_NUM_CHANNELS); 186 LLVMValueRef ptr; 187 LLVMValueRef dadxaos = setup_bld->zero; 188 LLVMValueRef dadyaos = setup_bld->zero; 189 LLVMValueRef a0aos = setup_bld->zero; 190 191 switch (interp) { 192 case LP_INTERP_PERSPECTIVE: 193 /* fall-through */ 194 195 case LP_INTERP_LINEAR: 196 ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""); 197 ptr = LLVMBuildBitCast(builder, ptr, 198 LLVMPointerType(setup_bld->vec_type, 0), ""); 199 dadxaos = LLVMBuildLoad(builder, ptr, ""); 200 201 ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, ""); 202 ptr = LLVMBuildBitCast(builder, ptr, 203 LLVMPointerType(setup_bld->vec_type, 0), ""); 204 dadyaos = LLVMBuildLoad(builder, ptr, ""); 205 206 attrib_name(dadxaos, attrib, 0, ".dadxaos"); 207 attrib_name(dadyaos, attrib, 0, ".dadyaos"); 208 /* fall-through */ 209 210 case LP_INTERP_CONSTANT: 211 case LP_INTERP_FACING: 212 ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, ""); 213 ptr = LLVMBuildBitCast(builder, ptr, 214 LLVMPointerType(setup_bld->vec_type, 0), ""); 215 a0aos = LLVMBuildLoad(builder, ptr, ""); 216 attrib_name(a0aos, attrib, 0, ".a0aos"); 217 break; 218 219 case LP_INTERP_POSITION: 220 /* Nothing to do as the position coeffs are already setup in slot 0 */ 221 continue; 222 223 default: 224 assert(0); 225 break; 226 } 227 bld->a0aos[attrib] = a0aos; 228 bld->dadxaos[attrib] = dadxaos; 229 bld->dadyaos[attrib] = dadyaos; 230 } 231 } 232 233 /** 234 * Interpolate the shader input attribute values. 235 * This is called for each (group of) quad(s). 236 */ 237 static void 238 attribs_update_simple(struct lp_build_interp_soa_context *bld, 239 struct gallivm_state *gallivm, 240 int quad_start_index, 241 LLVMValueRef loop_iter, 242 int start, 243 int end) 244 { 245 LLVMBuilderRef builder = gallivm->builder; 246 struct lp_build_context *coeff_bld = &bld->coeff_bld; 247 struct lp_build_context *setup_bld = &bld->setup_bld; 248 LLVMValueRef oow = NULL; 249 unsigned attrib; 250 LLVMValueRef pixoffx; 251 LLVMValueRef pixoffy; 252 253 /* could do this with code-generated passed in pixel offsets too */ 254 if (bld->dynamic_offsets) { 255 LLVMValueRef ptr; 256 257 assert(loop_iter); 258 ptr = LLVMBuildGEP(builder, bld->xoffset_store, &loop_iter, 1, ""); 259 pixoffx = LLVMBuildLoad(builder, ptr, ""); 260 ptr = LLVMBuildGEP(builder, bld->yoffset_store, &loop_iter, 1, ""); 261 pixoffy = LLVMBuildLoad(builder, ptr, ""); 262 } 263 else { 264 calc_offsets(coeff_bld, quad_start_index, &pixoffx, &pixoffy); 265 } 266 267 pixoffx = LLVMBuildFAdd(builder, pixoffx, 268 lp_build_broadcast_scalar(coeff_bld, bld->x), ""); 269 pixoffy = LLVMBuildFAdd(builder, pixoffy, 270 lp_build_broadcast_scalar(coeff_bld, bld->y), ""); 271 272 for (attrib = start; attrib < end; attrib++) { 273 const unsigned mask = bld->mask[attrib]; 274 const unsigned interp = bld->interp[attrib]; 275 unsigned chan; 276 277 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 278 if (mask & (1 << chan)) { 279 LLVMValueRef index; 280 LLVMValueRef dadx = coeff_bld->zero; 281 LLVMValueRef dady = coeff_bld->zero; 282 LLVMValueRef a = coeff_bld->zero; 283 284 index = lp_build_const_int32(gallivm, chan); 285 switch (interp) { 286 case LP_INTERP_PERSPECTIVE: 287 /* fall-through */ 288 289 case LP_INTERP_LINEAR: 290 if (attrib == 0 && chan == 0) { 291 dadx = coeff_bld->one; 292 } 293 else if (attrib == 0 && chan == 1) { 294 dady = coeff_bld->one; 295 } 296 else { 297 dadx = lp_build_extract_broadcast(gallivm, setup_bld->type, 298 coeff_bld->type, bld->dadxaos[attrib], 299 index); 300 dady = lp_build_extract_broadcast(gallivm, setup_bld->type, 301 coeff_bld->type, bld->dadyaos[attrib], 302 index); 303 a = lp_build_extract_broadcast(gallivm, setup_bld->type, 304 coeff_bld->type, bld->a0aos[attrib], 305 index); 306 } 307 /* 308 * a = a0 + (x * dadx + y * dady) 309 */ 310 dadx = LLVMBuildFMul(builder, dadx, pixoffx, ""); 311 dady = LLVMBuildFMul(builder, dady, pixoffy, ""); 312 a = LLVMBuildFAdd(builder, a, dadx, ""); 313 a = LLVMBuildFAdd(builder, a, dady, ""); 314 315 if (interp == LP_INTERP_PERSPECTIVE) { 316 if (oow == NULL) { 317 LLVMValueRef w = bld->attribs[0][3]; 318 assert(attrib != 0); 319 assert(bld->mask[0] & TGSI_WRITEMASK_W); 320 oow = lp_build_rcp(coeff_bld, w); 321 } 322 a = lp_build_mul(coeff_bld, a, oow); 323 } 324 break; 325 326 case LP_INTERP_CONSTANT: 327 case LP_INTERP_FACING: 328 a = lp_build_extract_broadcast(gallivm, setup_bld->type, 329 coeff_bld->type, bld->a0aos[attrib], 330 index); 331 break; 332 333 case LP_INTERP_POSITION: 334 assert(attrib > 0); 335 a = bld->attribs[0][chan]; 336 break; 337 338 default: 339 assert(0); 340 break; 341 } 342 343 if ((attrib == 0) && (chan == 2)){ 344 /* FIXME: Depth values can exceed 1.0, due to the fact that 345 * setup interpolation coefficients refer to (0,0) which causes 346 * precision loss. So we must clamp to 1.0 here to avoid artifacts 347 */ 348 a = lp_build_min(coeff_bld, a, coeff_bld->one); 349 } 350 bld->attribs[attrib][chan] = a; 351 } 352 } 353 } 354 } 355 356 /** 357 * Initialize the bld->a, dadq fields. This involves fetching 358 * those values from the arrays which are passed into the JIT function. 359 */ 360 static void 361 coeffs_init(struct lp_build_interp_soa_context *bld, 362 LLVMValueRef a0_ptr, 363 LLVMValueRef dadx_ptr, 364 LLVMValueRef dady_ptr) 365 { 366 struct lp_build_context *coeff_bld = &bld->coeff_bld; 367 struct lp_build_context *setup_bld = &bld->setup_bld; 368 struct gallivm_state *gallivm = coeff_bld->gallivm; 369 LLVMBuilderRef builder = gallivm->builder; 370 LLVMValueRef pixoffx, pixoffy; 371 unsigned attrib; 372 unsigned chan; 373 unsigned i; 374 375 pixoffx = coeff_bld->undef; 376 pixoffy = coeff_bld->undef; 377 for (i = 0; i < coeff_bld->type.length; i++) { 378 LLVMValueRef nr = lp_build_const_int32(gallivm, i); 379 LLVMValueRef pixxf = lp_build_const_float(gallivm, quad_offset_x[i]); 380 LLVMValueRef pixyf = lp_build_const_float(gallivm, quad_offset_y[i]); 381 pixoffx = LLVMBuildInsertElement(builder, pixoffx, pixxf, nr, ""); 382 pixoffy = LLVMBuildInsertElement(builder, pixoffy, pixyf, nr, ""); 383 } 384 385 386 for (attrib = 0; attrib < bld->num_attribs; ++attrib) { 387 const unsigned mask = bld->mask[attrib]; 388 const unsigned interp = bld->interp[attrib]; 389 LLVMValueRef index = lp_build_const_int32(gallivm, 390 attrib * TGSI_NUM_CHANNELS); 391 LLVMValueRef ptr; 392 LLVMValueRef dadxaos = setup_bld->zero; 393 LLVMValueRef dadyaos = setup_bld->zero; 394 LLVMValueRef a0aos = setup_bld->zero; 395 396 /* always fetch all 4 values for performance/simplicity */ 397 switch (interp) { 398 case LP_INTERP_PERSPECTIVE: 399 /* fall-through */ 400 401 case LP_INTERP_LINEAR: 402 ptr = LLVMBuildGEP(builder, dadx_ptr, &index, 1, ""); 403 ptr = LLVMBuildBitCast(builder, ptr, 404 LLVMPointerType(setup_bld->vec_type, 0), ""); 405 dadxaos = LLVMBuildLoad(builder, ptr, ""); 406 407 ptr = LLVMBuildGEP(builder, dady_ptr, &index, 1, ""); 408 ptr = LLVMBuildBitCast(builder, ptr, 409 LLVMPointerType(setup_bld->vec_type, 0), ""); 410 dadyaos = LLVMBuildLoad(builder, ptr, ""); 411 412 attrib_name(dadxaos, attrib, 0, ".dadxaos"); 413 attrib_name(dadyaos, attrib, 0, ".dadyaos"); 414 /* fall-through */ 415 416 case LP_INTERP_CONSTANT: 417 case LP_INTERP_FACING: 418 ptr = LLVMBuildGEP(builder, a0_ptr, &index, 1, ""); 419 ptr = LLVMBuildBitCast(builder, ptr, 420 LLVMPointerType(setup_bld->vec_type, 0), ""); 421 a0aos = LLVMBuildLoad(builder, ptr, ""); 422 attrib_name(a0aos, attrib, 0, ".a0aos"); 423 break; 424 425 case LP_INTERP_POSITION: 426 /* Nothing to do as the position coeffs are already setup in slot 0 */ 427 continue; 428 429 default: 430 assert(0); 431 break; 432 } 433 434 /* 435 * a = a0 + (x * dadx + y * dady) 436 * a0aos is the attrib value at top left corner of stamp 437 */ 438 if (interp != LP_INTERP_CONSTANT && 439 interp != LP_INTERP_FACING) { 440 LLVMValueRef axaos, ayaos; 441 axaos = LLVMBuildFMul(builder, lp_build_broadcast_scalar(setup_bld, bld->x), 442 dadxaos, ""); 443 ayaos = LLVMBuildFMul(builder, lp_build_broadcast_scalar(setup_bld, bld->y), 444 dadyaos, ""); 445 a0aos = LLVMBuildFAdd(builder, a0aos, ayaos, ""); 446 a0aos = LLVMBuildFAdd(builder, a0aos, axaos, ""); 447 } 448 449 /* 450 * dadq = {0, dadx, dady, dadx + dady} 451 * for two quads (side by side) this is: 452 * {0, dadx, dady, dadx+dady, 2*dadx, 2*dadx+dady, 3*dadx+dady} 453 */ 454 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { 455 /* this generates a CRAPLOAD of shuffles... */ 456 if (mask & (1 << chan)) { 457 LLVMValueRef dadx, dady; 458 LLVMValueRef dadq, dadq2; 459 LLVMValueRef a; 460 LLVMValueRef chan_index = lp_build_const_int32(gallivm, chan); 461 462 if (attrib == 0 && chan == 0) { 463 a = lp_build_broadcast_scalar(coeff_bld, bld->x); 464 dadx = coeff_bld->one; 465 dady = coeff_bld->zero; 466 } 467 else if (attrib == 0 && chan == 1) { 468 a = lp_build_broadcast_scalar(coeff_bld, bld->y); 469 dady = coeff_bld->one; 470 dadx = coeff_bld->zero; 471 } 472 else { 473 dadx = lp_build_extract_broadcast(gallivm, setup_bld->type, 474 coeff_bld->type, dadxaos, chan_index); 475 dady = lp_build_extract_broadcast(gallivm, setup_bld->type, 476 coeff_bld->type, dadyaos, chan_index); 477 478 /* 479 * a = {a, a, a, a} 480 */ 481 a = lp_build_extract_broadcast(gallivm, setup_bld->type, 482 coeff_bld->type, a0aos, chan_index); 483 } 484 485 dadx = LLVMBuildFMul(builder, dadx, pixoffx, ""); 486 dady = LLVMBuildFMul(builder, dady, pixoffy, ""); 487 dadq = LLVMBuildFAdd(builder, dadx, dady, ""); 488 489 /* 490 * Compute the attrib values on the upper-left corner of each 491 * group of quads. 492 * Note that if we process 2 quads at once this doesn't 493 * really exactly to what we want. 494 * We need to access elem 0 and 2 respectively later if we process 495 * 2 quads at once. 496 */ 497 498 if (interp != LP_INTERP_CONSTANT && 499 interp != LP_INTERP_FACING) { 500 dadq2 = LLVMBuildFAdd(builder, dadq, dadq, ""); 501 a = LLVMBuildFAdd(builder, a, dadq2, ""); 502 } 503 504 #if PERSPECTIVE_DIVIDE_PER_QUAD 505 /* 506 * a *= 1 / w 507 */ 508 509 /* 510 * XXX since we're only going to access elements 0,2 out of 8 511 * if we have 8-wide vectors we should do the division only 4-wide. 512 * a is really a 2-elements in a 4-wide vector disguised as 8-wide 513 * in this case. 514 */ 515 if (interp == LP_INTERP_PERSPECTIVE) { 516 LLVMValueRef w = bld->a[0][3]; 517 assert(attrib != 0); 518 assert(bld->mask[0] & TGSI_WRITEMASK_W); 519 if (!bld->oow) { 520 bld->oow = lp_build_rcp(coeff_bld, w); 521 lp_build_name(bld->oow, "oow"); 522 } 523 a = lp_build_mul(coeff_bld, a, bld->oow); 524 } 525 #endif 526 527 attrib_name(a, attrib, chan, ".a"); 528 attrib_name(dadq, attrib, chan, ".dadq"); 529 530 if (bld->dynamic_offsets) { 531 bld->a[attrib][chan] = lp_build_alloca(gallivm, 532 LLVMTypeOf(a), ""); 533 LLVMBuildStore(builder, a, bld->a[attrib][chan]); 534 } 535 else { 536 bld->a[attrib][chan] = a; 537 } 538 bld->dadq[attrib][chan] = dadq; 539 } 540 } 541 } 542 } 543 544 545 /** 546 * Increment the shader input attribute values. 547 * This is called when we move from one quad to the next. 548 */ 549 static void 550 attribs_update(struct lp_build_interp_soa_context *bld, 551 struct gallivm_state *gallivm, 552 int quad_start_index, 553 LLVMValueRef loop_iter, 554 int start, 555 int end) 556 { 557 LLVMBuilderRef builder = gallivm->builder; 558 struct lp_build_context *coeff_bld = &bld->coeff_bld; 559 LLVMValueRef shuffle = lp_build_const_int_vec(gallivm, coeff_bld->type, quad_start_index); 560 LLVMValueRef oow = NULL; 561 unsigned attrib; 562 unsigned chan; 563 564 assert(quad_start_index < 4); 565 566 for(attrib = start; attrib < end; ++attrib) { 567 const unsigned mask = bld->mask[attrib]; 568 const unsigned interp = bld->interp[attrib]; 569 for(chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { 570 if(mask & (1 << chan)) { 571 LLVMValueRef a; 572 if (interp == LP_INTERP_CONSTANT || 573 interp == LP_INTERP_FACING) { 574 a = bld->a[attrib][chan]; 575 if (bld->dynamic_offsets) { 576 a = LLVMBuildLoad(builder, a, ""); 577 } 578 } 579 else if (interp == LP_INTERP_POSITION) { 580 assert(attrib > 0); 581 a = bld->attribs[0][chan]; 582 } 583 else { 584 LLVMValueRef dadq; 585 586 a = bld->a[attrib][chan]; 587 588 /* 589 * Broadcast the attribute value for this quad into all elements 590 */ 591 592 if (bld->dynamic_offsets) { 593 /* stored as vector load as float */ 594 LLVMTypeRef ptr_type = LLVMPointerType(LLVMFloatTypeInContext( 595 gallivm->context), 0); 596 LLVMValueRef ptr; 597 a = LLVMBuildBitCast(builder, a, ptr_type, ""); 598 ptr = LLVMBuildGEP(builder, a, &loop_iter, 1, ""); 599 a = LLVMBuildLoad(builder, ptr, ""); 600 a = lp_build_broadcast_scalar(&bld->coeff_bld, a); 601 } 602 else { 603 a = LLVMBuildShuffleVector(builder, 604 a, coeff_bld->undef, shuffle, ""); 605 } 606 607 /* 608 * Get the derivatives. 609 */ 610 611 dadq = bld->dadq[attrib][chan]; 612 613 #if PERSPECTIVE_DIVIDE_PER_QUAD 614 if (interp == LP_INTERP_PERSPECTIVE) { 615 LLVMValueRef dwdq = bld->dadq[0][3]; 616 617 if (oow == NULL) { 618 assert(bld->oow); 619 oow = LLVMBuildShuffleVector(coeff_bld->builder, 620 bld->oow, coeff_bld->undef, 621 shuffle, ""); 622 } 623 624 dadq = lp_build_sub(coeff_bld, 625 dadq, 626 lp_build_mul(coeff_bld, a, dwdq)); 627 dadq = lp_build_mul(coeff_bld, dadq, oow); 628 } 629 #endif 630 631 /* 632 * Add the derivatives 633 */ 634 635 a = lp_build_add(coeff_bld, a, dadq); 636 637 #if !PERSPECTIVE_DIVIDE_PER_QUAD 638 if (interp == LP_INTERP_PERSPECTIVE) { 639 if (oow == NULL) { 640 LLVMValueRef w = bld->attribs[0][3]; 641 assert(attrib != 0); 642 assert(bld->mask[0] & TGSI_WRITEMASK_W); 643 oow = lp_build_rcp(coeff_bld, w); 644 } 645 a = lp_build_mul(coeff_bld, a, oow); 646 } 647 #endif 648 649 if (attrib == 0 && chan == 2) { 650 /* FIXME: Depth values can exceed 1.0, due to the fact that 651 * setup interpolation coefficients refer to (0,0) which causes 652 * precision loss. So we must clamp to 1.0 here to avoid artifacts 653 */ 654 a = lp_build_min(coeff_bld, a, coeff_bld->one); 655 } 656 657 attrib_name(a, attrib, chan, ""); 658 } 659 bld->attribs[attrib][chan] = a; 660 } 661 } 662 } 663 } 664 665 666 /** 667 * Generate the position vectors. 668 * 669 * Parameter x0, y0 are the integer values with upper left coordinates. 670 */ 671 static void 672 pos_init(struct lp_build_interp_soa_context *bld, 673 LLVMValueRef x0, 674 LLVMValueRef y0) 675 { 676 LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder; 677 struct lp_build_context *coeff_bld = &bld->coeff_bld; 678 679 bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, ""); 680 bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, ""); 681 } 682 683 684 /** 685 * Initialize fragment shader input attribute info. 686 */ 687 void 688 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld, 689 struct gallivm_state *gallivm, 690 unsigned num_inputs, 691 const struct lp_shader_input *inputs, 692 LLVMBuilderRef builder, 693 struct lp_type type, 694 boolean dynamic_offsets, 695 LLVMValueRef a0_ptr, 696 LLVMValueRef dadx_ptr, 697 LLVMValueRef dady_ptr, 698 LLVMValueRef x0, 699 LLVMValueRef y0) 700 { 701 struct lp_type coeff_type; 702 struct lp_type setup_type; 703 unsigned attrib; 704 unsigned chan; 705 706 memset(bld, 0, sizeof *bld); 707 708 memset(&coeff_type, 0, sizeof coeff_type); 709 coeff_type.floating = TRUE; 710 coeff_type.sign = TRUE; 711 coeff_type.width = 32; 712 coeff_type.length = type.length; 713 714 memset(&setup_type, 0, sizeof setup_type); 715 setup_type.floating = TRUE; 716 setup_type.sign = TRUE; 717 setup_type.width = 32; 718 setup_type.length = TGSI_NUM_CHANNELS; 719 720 721 /* XXX: we don't support interpolating into any other types */ 722 assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0); 723 724 lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type); 725 lp_build_context_init(&bld->setup_bld, gallivm, setup_type); 726 727 /* For convenience */ 728 bld->pos = bld->attribs[0]; 729 bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1]; 730 731 /* Position */ 732 bld->mask[0] = TGSI_WRITEMASK_XYZW; 733 bld->interp[0] = LP_INTERP_LINEAR; 734 735 /* Inputs */ 736 for (attrib = 0; attrib < num_inputs; ++attrib) { 737 bld->mask[1 + attrib] = inputs[attrib].usage_mask; 738 bld->interp[1 + attrib] = inputs[attrib].interp; 739 } 740 bld->num_attribs = 1 + num_inputs; 741 742 /* Ensure all masked out input channels have a valid value */ 743 for (attrib = 0; attrib < bld->num_attribs; ++attrib) { 744 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { 745 bld->attribs[attrib][chan] = bld->coeff_bld.undef; 746 } 747 } 748 749 pos_init(bld, x0, y0); 750 751 if (coeff_type.length > 4) { 752 bld->simple_interp = TRUE; 753 if (dynamic_offsets) { 754 /*XXXthis should use a global static table */ 755 unsigned i; 756 unsigned num_loops = 16 / type.length; 757 LLVMValueRef pixoffx, pixoffy, index; 758 LLVMValueRef ptr; 759 760 bld->dynamic_offsets = TRUE; 761 bld->xoffset_store = lp_build_array_alloca(gallivm, 762 lp_build_vec_type(gallivm, type), 763 lp_build_const_int32(gallivm, num_loops), 764 ""); 765 bld->yoffset_store = lp_build_array_alloca(gallivm, 766 lp_build_vec_type(gallivm, type), 767 lp_build_const_int32(gallivm, num_loops), 768 ""); 769 for (i = 0; i < num_loops; i++) { 770 index = lp_build_const_int32(gallivm, i); 771 calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy); 772 ptr = LLVMBuildGEP(builder, bld->xoffset_store, &index, 1, ""); 773 LLVMBuildStore(builder, pixoffx, ptr); 774 ptr = LLVMBuildGEP(builder, bld->yoffset_store, &index, 1, ""); 775 LLVMBuildStore(builder, pixoffy, ptr); 776 } 777 } 778 coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr); 779 } 780 else { 781 bld->simple_interp = FALSE; 782 if (dynamic_offsets) { 783 bld->dynamic_offsets = TRUE; 784 } 785 coeffs_init(bld, a0_ptr, dadx_ptr, dady_ptr); 786 } 787 788 } 789 790 791 /** 792 * Advance the position and inputs to the given quad within the block. 793 */ 794 void 795 lp_build_interp_soa_update_inputs(struct lp_build_interp_soa_context *bld, 796 struct gallivm_state *gallivm, 797 int quad_start_index) 798 { 799 assert(quad_start_index < 4); 800 801 if (bld->simple_interp) { 802 attribs_update_simple(bld, gallivm, quad_start_index, NULL, 1, bld->num_attribs); 803 } 804 else { 805 attribs_update(bld, gallivm, quad_start_index, NULL, 1, bld->num_attribs); 806 } 807 } 808 809 void 810 lp_build_interp_soa_update_pos(struct lp_build_interp_soa_context *bld, 811 struct gallivm_state *gallivm, 812 int quad_start_index) 813 { 814 assert(quad_start_index < 4); 815 816 if (bld->simple_interp) { 817 attribs_update_simple(bld, gallivm, quad_start_index, NULL, 0, 1); 818 } 819 else { 820 attribs_update(bld, gallivm, quad_start_index, NULL, 0, 1); 821 } 822 } 823 824 void 825 lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld, 826 struct gallivm_state *gallivm, 827 LLVMValueRef quad_start_index) 828 { 829 if (bld->simple_interp) { 830 attribs_update_simple(bld, gallivm, 0, quad_start_index, 1, bld->num_attribs); 831 } 832 else { 833 attribs_update(bld, gallivm, 0, quad_start_index, 1, bld->num_attribs); 834 } 835 } 836 837 void 838 lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld, 839 struct gallivm_state *gallivm, 840 LLVMValueRef quad_start_index) 841 { 842 if (bld->simple_interp) { 843 attribs_update_simple(bld, gallivm, 0, quad_start_index, 0, 1); 844 } 845 else { 846 attribs_update(bld, gallivm, 0, quad_start_index, 0, 1); 847 } 848 } 849 850