1 /************************************************************************** 2 * 3 * Copyright 2010 VMware. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29 #include "util/u_math.h" 30 #include "util/u_memory.h" 31 #include "util/simple_list.h" 32 #include "util/os_time.h" 33 #include "gallivm/lp_bld_arit.h" 34 #include "gallivm/lp_bld_bitarit.h" 35 #include "gallivm/lp_bld_const.h" 36 #include "gallivm/lp_bld_debug.h" 37 #include "gallivm/lp_bld_init.h" 38 #include "gallivm/lp_bld_logic.h" 39 #include "gallivm/lp_bld_intr.h" 40 #include "gallivm/lp_bld_flow.h" 41 #include "gallivm/lp_bld_type.h" 42 43 #include "lp_perf.h" 44 #include "lp_debug.h" 45 #include "lp_flush.h" 46 #include "lp_screen.h" 47 #include "lp_context.h" 48 #include "lp_state.h" 49 #include "lp_state_fs.h" 50 #include "lp_state_setup.h" 51 52 53 /** Setup shader number (for debugging) */ 54 static unsigned setup_no = 0; 55 56 57 /* currently organized to interpolate full float[4] attributes even 58 * when some elements are unused. Later, can pack vertex data more 59 * closely. 60 */ 61 62 63 struct lp_setup_args 64 { 65 /* Function arguments: 66 */ 67 LLVMValueRef v0; 68 LLVMValueRef v1; 69 LLVMValueRef v2; 70 LLVMValueRef facing; /* boolean */ 71 LLVMValueRef a0; 72 LLVMValueRef dadx; 73 LLVMValueRef dady; 74 75 /* Derived: 76 */ 77 LLVMValueRef x0_center; 78 LLVMValueRef y0_center; 79 LLVMValueRef dy20_ooa; 80 LLVMValueRef dy01_ooa; 81 LLVMValueRef dx20_ooa; 82 LLVMValueRef dx01_ooa; 83 struct lp_build_context bld; 84 }; 85 86 87 static void 88 store_coef(struct gallivm_state *gallivm, 89 struct lp_setup_args *args, 90 unsigned slot, 91 LLVMValueRef a0, 92 LLVMValueRef dadx, 93 LLVMValueRef dady) 94 { 95 LLVMBuilderRef builder = gallivm->builder; 96 LLVMValueRef idx = lp_build_const_int32(gallivm, slot); 97 98 LLVMBuildStore(builder, 99 a0, 100 LLVMBuildGEP(builder, args->a0, &idx, 1, "")); 101 102 LLVMBuildStore(builder, 103 dadx, 104 LLVMBuildGEP(builder, args->dadx, &idx, 1, "")); 105 106 LLVMBuildStore(builder, 107 dady, 108 LLVMBuildGEP(builder, args->dady, &idx, 1, "")); 109 } 110 111 112 113 static void 114 emit_constant_coef4(struct gallivm_state *gallivm, 115 struct lp_setup_args *args, 116 unsigned slot, 117 LLVMValueRef vert) 118 { 119 store_coef(gallivm, args, slot, vert, args->bld.zero, args->bld.zero); 120 } 121 122 123 124 /** 125 * Setup the fragment input attribute with the front-facing value. 126 * \param frontface is the triangle front facing? 127 */ 128 static void 129 emit_facing_coef(struct gallivm_state *gallivm, 130 struct lp_setup_args *args, 131 unsigned slot ) 132 { 133 LLVMBuilderRef builder = gallivm->builder; 134 LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); 135 LLVMValueRef a0_0 = args->facing; 136 LLVMValueRef a0_0f = LLVMBuildSIToFP(builder, a0_0, float_type, ""); 137 LLVMValueRef a0, face_val; 138 const unsigned char swizzles[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, 139 PIPE_SWIZZLE_0, PIPE_SWIZZLE_0 }; 140 /* Our face val is either 1 or 0 so we do 141 * face = (val * 2) - 1 142 * to make it 1 or -1 143 */ 144 face_val = 145 LLVMBuildFAdd(builder, 146 LLVMBuildFMul(builder, a0_0f, 147 lp_build_const_float(gallivm, 2.0), 148 ""), 149 lp_build_const_float(gallivm, -1.0), 150 "facing"); 151 face_val = lp_build_broadcast_scalar(&args->bld, face_val); 152 a0 = lp_build_swizzle_aos(&args->bld, face_val, swizzles); 153 154 store_coef(gallivm, args, slot, a0, args->bld.zero, args->bld.zero); 155 } 156 157 158 static LLVMValueRef 159 vert_attrib(struct gallivm_state *gallivm, 160 LLVMValueRef vert, 161 int attr, 162 int elem, 163 const char *name) 164 { 165 LLVMBuilderRef b = gallivm->builder; 166 LLVMValueRef idx[2]; 167 idx[0] = lp_build_const_int32(gallivm, attr); 168 idx[1] = lp_build_const_int32(gallivm, elem); 169 return LLVMBuildLoad(b, LLVMBuildGEP(b, vert, idx, 2, ""), name); 170 } 171 172 173 static void 174 lp_twoside(struct gallivm_state *gallivm, 175 struct lp_setup_args *args, 176 const struct lp_setup_variant_key *key, 177 int bcolor_slot, 178 LLVMValueRef attribv[3]) 179 { 180 LLVMBuilderRef b = gallivm->builder; 181 LLVMValueRef a0_back, a1_back, a2_back; 182 LLVMValueRef idx2 = lp_build_const_int32(gallivm, bcolor_slot); 183 184 LLVMValueRef facing = args->facing; 185 LLVMValueRef front_facing = LLVMBuildICmp(b, LLVMIntEQ, facing, 186 lp_build_const_int32(gallivm, 0), ""); /** need i1 for if condition */ 187 188 a0_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx2, 1, ""), "v0a_back"); 189 a1_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx2, 1, ""), "v1a_back"); 190 a2_back = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx2, 1, ""), "v2a_back"); 191 192 /* Possibly swap the front and back attrib values, 193 * 194 * Prefer select to if so we don't have to worry about phis or 195 * allocas. 196 */ 197 attribv[0] = LLVMBuildSelect(b, front_facing, a0_back, attribv[0], ""); 198 attribv[1] = LLVMBuildSelect(b, front_facing, a1_back, attribv[1], ""); 199 attribv[2] = LLVMBuildSelect(b, front_facing, a2_back, attribv[2], ""); 200 201 } 202 203 static void 204 lp_do_offset_tri(struct gallivm_state *gallivm, 205 struct lp_setup_args *args, 206 const struct lp_setup_variant_key *key, 207 LLVMValueRef inv_det, 208 LLVMValueRef dxyz01, 209 LLVMValueRef dxyz20, 210 LLVMValueRef attribv[3]) 211 { 212 LLVMBuilderRef b = gallivm->builder; 213 struct lp_build_context flt_scalar_bld; 214 struct lp_build_context int_scalar_bld; 215 struct lp_build_context *bld = &args->bld; 216 LLVMValueRef zoffset, mult; 217 LLVMValueRef z0_new, z1_new, z2_new; 218 LLVMValueRef dzdxdzdy, dzdx, dzdy, dzxyz20, dyzzx01, dyzzx01_dzxyz20, dzx01_dyz20; 219 LLVMValueRef z0z1, z0z1z2; 220 LLVMValueRef max, max_value, res12; 221 LLVMValueRef shuffles[4]; 222 LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context); 223 LLVMValueRef onei = lp_build_const_int32(gallivm, 1); 224 LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0); 225 LLVMValueRef twoi = lp_build_const_int32(gallivm, 2); 226 LLVMValueRef threei = lp_build_const_int32(gallivm, 3); 227 228 /* (res12) = cross(e,f).xy */ 229 shuffles[0] = twoi; 230 shuffles[1] = zeroi; 231 shuffles[2] = onei; 232 shuffles[3] = twoi; 233 dzxyz20 = LLVMBuildShuffleVector(b, dxyz20, dxyz20, LLVMConstVector(shuffles, 4), ""); 234 235 shuffles[0] = onei; 236 shuffles[1] = twoi; 237 shuffles[2] = twoi; 238 shuffles[3] = zeroi; 239 dyzzx01 = LLVMBuildShuffleVector(b, dxyz01, dxyz01, LLVMConstVector(shuffles, 4), ""); 240 241 dyzzx01_dzxyz20 = LLVMBuildFMul(b, dzxyz20, dyzzx01, "dyzzx01_dzxyz20"); 242 243 shuffles[0] = twoi; 244 shuffles[1] = threei; 245 shuffles[2] = LLVMGetUndef(shuf_type); 246 shuffles[3] = LLVMGetUndef(shuf_type); 247 dzx01_dyz20 = LLVMBuildShuffleVector(b, dyzzx01_dzxyz20, dyzzx01_dzxyz20, 248 LLVMConstVector(shuffles, 4), ""); 249 250 res12 = LLVMBuildFSub(b, dyzzx01_dzxyz20, dzx01_dyz20, "res12"); 251 252 /* dzdx = fabsf(res1 * inv_det), dydx = fabsf(res2 * inv_det)*/ 253 dzdxdzdy = LLVMBuildFMul(b, res12, inv_det, "dzdxdzdy"); 254 dzdxdzdy = lp_build_abs(bld, dzdxdzdy); 255 256 dzdx = LLVMBuildExtractElement(b, dzdxdzdy, zeroi, ""); 257 dzdy = LLVMBuildExtractElement(b, dzdxdzdy, onei, ""); 258 259 /* mult = MAX2(dzdx, dzdy) * pgon_offset_scale */ 260 max = LLVMBuildFCmp(b, LLVMRealUGT, dzdx, dzdy, ""); 261 max_value = LLVMBuildSelect(b, max, dzdx, dzdy, "max"); 262 263 mult = LLVMBuildFMul(b, max_value, 264 lp_build_const_float(gallivm, key->pgon_offset_scale), ""); 265 266 lp_build_context_init(&flt_scalar_bld, gallivm, lp_type_float_vec(32, 32)); 267 268 if (key->floating_point_depth) { 269 /* 270 * bias = pgon_offset_units * 2^(exponent(max(z0, z1, z2)) - mantissa_bits) + 271 * MAX2(dzdx, dzdy) * pgon_offset_scale 272 * 273 * NOTE: Assumes IEEE float32. 274 */ 275 LLVMValueRef c23_shifted, exp_mask, bias, exp; 276 LLVMValueRef maxz_value, maxz0z1_value; 277 278 lp_build_context_init(&int_scalar_bld, gallivm, lp_type_int_vec(32, 32)); 279 280 c23_shifted = lp_build_const_int32(gallivm, 23 << 23); 281 exp_mask = lp_build_const_int32(gallivm, 0xff << 23); 282 283 maxz0z1_value = lp_build_max(&flt_scalar_bld, 284 LLVMBuildExtractElement(b, attribv[0], twoi, ""), 285 LLVMBuildExtractElement(b, attribv[1], twoi, "")); 286 287 maxz_value = lp_build_max(&flt_scalar_bld, 288 LLVMBuildExtractElement(b, attribv[2], twoi, ""), 289 maxz0z1_value); 290 291 exp = LLVMBuildBitCast(b, maxz_value, int_scalar_bld.vec_type, ""); 292 exp = lp_build_and(&int_scalar_bld, exp, exp_mask); 293 exp = lp_build_sub(&int_scalar_bld, exp, c23_shifted); 294 /* Clamping to zero means mrd will be zero for very small numbers, 295 * but specs do not indicate this should be prevented by clamping 296 * mrd to smallest normal number instead. */ 297 exp = lp_build_max(&int_scalar_bld, exp, int_scalar_bld.zero); 298 exp = LLVMBuildBitCast(b, exp, flt_scalar_bld.vec_type, ""); 299 300 bias = LLVMBuildFMul(b, exp, 301 lp_build_const_float(gallivm, key->pgon_offset_units), 302 "bias"); 303 304 zoffset = LLVMBuildFAdd(b, bias, mult, "zoffset"); 305 } else { 306 /* 307 * bias = pgon_offset_units + MAX2(dzdx, dzdy) * pgon_offset_scale 308 */ 309 zoffset = LLVMBuildFAdd(b, 310 lp_build_const_float(gallivm, key->pgon_offset_units), 311 mult, "zoffset"); 312 } 313 314 if (key->pgon_offset_clamp > 0) { 315 zoffset = lp_build_min(&flt_scalar_bld, 316 lp_build_const_float(gallivm, key->pgon_offset_clamp), 317 zoffset); 318 } 319 else if (key->pgon_offset_clamp < 0) { 320 zoffset = lp_build_max(&flt_scalar_bld, 321 lp_build_const_float(gallivm, key->pgon_offset_clamp), 322 zoffset); 323 } 324 325 /* yuck */ 326 shuffles[0] = twoi; 327 shuffles[1] = lp_build_const_int32(gallivm, 6); 328 shuffles[2] = LLVMGetUndef(shuf_type); 329 shuffles[3] = LLVMGetUndef(shuf_type); 330 z0z1 = LLVMBuildShuffleVector(b, attribv[0], attribv[1], LLVMConstVector(shuffles, 4), ""); 331 shuffles[0] = zeroi; 332 shuffles[1] = onei; 333 shuffles[2] = lp_build_const_int32(gallivm, 6); 334 shuffles[3] = LLVMGetUndef(shuf_type); 335 z0z1z2 = LLVMBuildShuffleVector(b, z0z1, attribv[2], LLVMConstVector(shuffles, 4), ""); 336 zoffset = lp_build_broadcast_scalar(bld, zoffset); 337 338 /* clamp and do offset */ 339 /* 340 * FIXME I suspect the clamp (is that even right to always clamp to fixed 341 * 0.0/1.0?) should really be per fragment? 342 */ 343 z0z1z2 = lp_build_clamp(bld, LLVMBuildFAdd(b, z0z1z2, zoffset, ""), bld->zero, bld->one); 344 345 /* insert into args->a0.z, a1.z, a2.z: 346 */ 347 z0_new = LLVMBuildExtractElement(b, z0z1z2, zeroi, ""); 348 z1_new = LLVMBuildExtractElement(b, z0z1z2, onei, ""); 349 z2_new = LLVMBuildExtractElement(b, z0z1z2, twoi, ""); 350 attribv[0] = LLVMBuildInsertElement(b, attribv[0], z0_new, twoi, ""); 351 attribv[1] = LLVMBuildInsertElement(b, attribv[1], z1_new, twoi, ""); 352 attribv[2] = LLVMBuildInsertElement(b, attribv[2], z2_new, twoi, ""); 353 } 354 355 static void 356 load_attribute(struct gallivm_state *gallivm, 357 struct lp_setup_args *args, 358 const struct lp_setup_variant_key *key, 359 unsigned vert_attr, 360 LLVMValueRef attribv[3]) 361 { 362 LLVMBuilderRef b = gallivm->builder; 363 LLVMValueRef idx = lp_build_const_int32(gallivm, vert_attr); 364 365 /* Load the vertex data 366 */ 367 attribv[0] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v0, &idx, 1, ""), "v0a"); 368 attribv[1] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v1, &idx, 1, ""), "v1a"); 369 attribv[2] = LLVMBuildLoad(b, LLVMBuildGEP(b, args->v2, &idx, 1, ""), "v2a"); 370 371 372 /* Potentially modify it according to twoside, etc: 373 */ 374 if (key->twoside) { 375 if (vert_attr == key->color_slot && key->bcolor_slot >= 0) 376 lp_twoside(gallivm, args, key, key->bcolor_slot, attribv); 377 else if (vert_attr == key->spec_slot && key->bspec_slot >= 0) 378 lp_twoside(gallivm, args, key, key->bspec_slot, attribv); 379 } 380 } 381 382 /* 383 * FIXME: interpolation is always done wrt fb origin (0/0). 384 * However, if some (small) tri is far away from the origin and gradients 385 * are large, this can lead to HUGE errors, since the a0 value calculated 386 * here can get very large (with the actual values inside the triangle way 387 * smaller), leading to complete loss of accuracy. This could be prevented 388 * by using some point inside (or at corner) of the tri as interpolation 389 * origin, or just use barycentric interpolation (which GL suggests and is 390 * what real hw does - you can get the barycentric coordinates from the 391 * edge functions in rasterization in principle (though we skip these 392 * sometimes completely in case of tris covering a block fully, 393 * which obviously wouldn't work)). 394 */ 395 static void 396 emit_coef4( struct gallivm_state *gallivm, 397 struct lp_setup_args *args, 398 unsigned slot, 399 LLVMValueRef a0, 400 LLVMValueRef a1, 401 LLVMValueRef a2) 402 { 403 LLVMBuilderRef b = gallivm->builder; 404 LLVMValueRef attr_0; 405 LLVMValueRef dy20_ooa = args->dy20_ooa; 406 LLVMValueRef dy01_ooa = args->dy01_ooa; 407 LLVMValueRef dx20_ooa = args->dx20_ooa; 408 LLVMValueRef dx01_ooa = args->dx01_ooa; 409 LLVMValueRef x0_center = args->x0_center; 410 LLVMValueRef y0_center = args->y0_center; 411 LLVMValueRef da01 = LLVMBuildFSub(b, a0, a1, "da01"); 412 LLVMValueRef da20 = LLVMBuildFSub(b, a2, a0, "da20"); 413 414 /* Calculate dadx (vec4f) 415 */ 416 LLVMValueRef da01_dy20_ooa = LLVMBuildFMul(b, da01, dy20_ooa, "da01_dy20_ooa"); 417 LLVMValueRef da20_dy01_ooa = LLVMBuildFMul(b, da20, dy01_ooa, "da20_dy01_ooa"); 418 LLVMValueRef dadx = LLVMBuildFSub(b, da01_dy20_ooa, da20_dy01_ooa, "dadx"); 419 420 /* Calculate dady (vec4f) 421 */ 422 LLVMValueRef da01_dx20_ooa = LLVMBuildFMul(b, da01, dx20_ooa, "da01_dx20_ooa"); 423 LLVMValueRef da20_dx01_ooa = LLVMBuildFMul(b, da20, dx01_ooa, "da20_dx01_ooa"); 424 LLVMValueRef dady = LLVMBuildFSub(b, da20_dx01_ooa, da01_dx20_ooa, "dady"); 425 426 /* Calculate a0 - the attribute value at the origin 427 */ 428 LLVMValueRef dadx_x0 = LLVMBuildFMul(b, dadx, x0_center, "dadx_x0"); 429 LLVMValueRef dady_y0 = LLVMBuildFMul(b, dady, y0_center, "dady_y0"); 430 LLVMValueRef attr_v0 = LLVMBuildFAdd(b, dadx_x0, dady_y0, "attr_v0"); 431 attr_0 = LLVMBuildFSub(b, a0, attr_v0, "attr_0"); 432 433 store_coef(gallivm, args, slot, attr_0, dadx, dady); 434 } 435 436 437 static void 438 emit_linear_coef( struct gallivm_state *gallivm, 439 struct lp_setup_args *args, 440 unsigned slot, 441 LLVMValueRef attribv[3]) 442 { 443 /* nothing to do anymore */ 444 emit_coef4(gallivm, 445 args, slot, 446 attribv[0], 447 attribv[1], 448 attribv[2]); 449 } 450 451 452 /** 453 * Compute a0, dadx and dady for a perspective-corrected interpolant, 454 * for a triangle. 455 * We basically multiply the vertex value by 1/w before computing 456 * the plane coefficients (a0, dadx, dady). 457 * Later, when we compute the value at a particular fragment position we'll 458 * divide the interpolated value by the interpolated W at that fragment. 459 */ 460 static void 461 apply_perspective_corr( struct gallivm_state *gallivm, 462 struct lp_setup_args *args, 463 unsigned slot, 464 LLVMValueRef attribv[3]) 465 { 466 LLVMBuilderRef b = gallivm->builder; 467 468 /* premultiply by 1/w (v[0][3] is always 1/w): 469 */ 470 LLVMValueRef v0_oow = lp_build_broadcast_scalar(&args->bld, 471 vert_attrib(gallivm, args->v0, 0, 3, "v0_oow")); 472 LLVMValueRef v1_oow = lp_build_broadcast_scalar(&args->bld, 473 vert_attrib(gallivm, args->v1, 0, 3, "v1_oow")); 474 LLVMValueRef v2_oow = lp_build_broadcast_scalar(&args->bld, 475 vert_attrib(gallivm, args->v2, 0, 3, "v2_oow")); 476 477 attribv[0] = LLVMBuildFMul(b, attribv[0], v0_oow, "v0_oow_v0a"); 478 attribv[1] = LLVMBuildFMul(b, attribv[1], v1_oow, "v1_oow_v1a"); 479 attribv[2] = LLVMBuildFMul(b, attribv[2], v2_oow, "v2_oow_v2a"); 480 } 481 482 483 /** 484 * Applys cylindrical wrapping to vertex attributes if enabled. 485 * Input coordinates must be in [0, 1] range, otherwise results are undefined. 486 * 487 * @param cyl_wrap TGSI_CYLINDRICAL_WRAP_x flags 488 */ 489 static void 490 emit_apply_cyl_wrap(struct gallivm_state *gallivm, 491 struct lp_setup_args *args, 492 uint cyl_wrap, 493 LLVMValueRef attribv[3]) 494 495 { 496 LLVMBuilderRef builder = gallivm->builder; 497 struct lp_type type = args->bld.type; 498 LLVMTypeRef float_vec_type = args->bld.vec_type; 499 LLVMValueRef pos_half; 500 LLVMValueRef neg_half; 501 LLVMValueRef cyl_mask; 502 LLVMValueRef offset; 503 LLVMValueRef delta; 504 LLVMValueRef one; 505 506 if (!cyl_wrap) 507 return; 508 509 /* Constants */ 510 pos_half = lp_build_const_vec(gallivm, type, +0.5f); 511 neg_half = lp_build_const_vec(gallivm, type, -0.5f); 512 cyl_mask = lp_build_const_mask_aos(gallivm, type, cyl_wrap, 4); 513 514 one = lp_build_const_vec(gallivm, type, 1.0f); 515 one = LLVMBuildBitCast(builder, one, lp_build_int_vec_type(gallivm, type), ""); 516 one = LLVMBuildAnd(builder, one, cyl_mask, ""); 517 518 /* Edge v0 -> v1 */ 519 delta = LLVMBuildFSub(builder, attribv[1], attribv[0], ""); 520 521 offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half); 522 offset = LLVMBuildAnd(builder, offset, one, ""); 523 offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); 524 attribv[0] = LLVMBuildFAdd(builder, attribv[0], offset, ""); 525 526 offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half); 527 offset = LLVMBuildAnd(builder, offset, one, ""); 528 offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); 529 attribv[1] = LLVMBuildFAdd(builder, attribv[1], offset, ""); 530 531 /* Edge v1 -> v2 */ 532 delta = LLVMBuildFSub(builder, attribv[2], attribv[1], ""); 533 534 offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half); 535 offset = LLVMBuildAnd(builder, offset, one, ""); 536 offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); 537 attribv[1] = LLVMBuildFAdd(builder, attribv[1], offset, ""); 538 539 offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half); 540 offset = LLVMBuildAnd(builder, offset, one, ""); 541 offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); 542 attribv[2] = LLVMBuildFAdd(builder, attribv[2], offset, ""); 543 544 /* Edge v2 -> v0 */ 545 delta = LLVMBuildFSub(builder, attribv[0], attribv[2], ""); 546 547 offset = lp_build_compare(gallivm, type, PIPE_FUNC_GREATER, delta, pos_half); 548 offset = LLVMBuildAnd(builder, offset, one, ""); 549 offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); 550 attribv[2] = LLVMBuildFAdd(builder, attribv[2], offset, ""); 551 552 offset = lp_build_compare(gallivm, type, PIPE_FUNC_LESS, delta, neg_half); 553 offset = LLVMBuildAnd(builder, offset, one, ""); 554 offset = LLVMBuildBitCast(builder, offset, float_vec_type, ""); 555 attribv[0] = LLVMBuildFAdd(builder, attribv[0], offset, ""); 556 } 557 558 559 /** 560 * Compute the inputs-> dadx, dady, a0 values. 561 */ 562 static void 563 emit_tri_coef( struct gallivm_state *gallivm, 564 const struct lp_setup_variant_key *key, 565 struct lp_setup_args *args) 566 { 567 unsigned slot; 568 569 LLVMValueRef attribs[3]; 570 571 /* setup interpolation for all the remaining attributes: 572 */ 573 for (slot = 0; slot < key->num_inputs; slot++) { 574 switch (key->inputs[slot].interp) { 575 case LP_INTERP_CONSTANT: 576 load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs); 577 if (key->flatshade_first) { 578 emit_constant_coef4(gallivm, args, slot+1, attribs[0]); 579 } 580 else { 581 emit_constant_coef4(gallivm, args, slot+1, attribs[2]); 582 } 583 break; 584 585 case LP_INTERP_LINEAR: 586 load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs); 587 emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap, attribs); 588 emit_linear_coef(gallivm, args, slot+1, attribs); 589 break; 590 591 case LP_INTERP_PERSPECTIVE: 592 load_attribute(gallivm, args, key, key->inputs[slot].src_index, attribs); 593 emit_apply_cyl_wrap(gallivm, args, key->inputs[slot].cyl_wrap, attribs); 594 apply_perspective_corr(gallivm, args, slot+1, attribs); 595 emit_linear_coef(gallivm, args, slot+1, attribs); 596 break; 597 598 case LP_INTERP_POSITION: 599 /* 600 * The generated pixel interpolators will pick up the coeffs from 601 * slot 0. 602 */ 603 break; 604 605 case LP_INTERP_FACING: 606 emit_facing_coef(gallivm, args, slot+1); 607 break; 608 609 default: 610 assert(0); 611 } 612 } 613 } 614 615 616 /* XXX: generic code: 617 */ 618 static void 619 set_noalias(LLVMBuilderRef builder, 620 LLVMValueRef function, 621 const LLVMTypeRef *arg_types, 622 int nr_args) 623 { 624 int i; 625 for(i = 0; i < nr_args; ++i) 626 if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) 627 lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS); 628 } 629 630 static void 631 init_args(struct gallivm_state *gallivm, 632 const struct lp_setup_variant_key *key, 633 struct lp_setup_args *args) 634 { 635 LLVMBuilderRef b = gallivm->builder; 636 LLVMTypeRef shuf_type = LLVMInt32TypeInContext(gallivm->context); 637 LLVMValueRef onef = lp_build_const_float(gallivm, 1.0); 638 LLVMValueRef onei = lp_build_const_int32(gallivm, 1); 639 LLVMValueRef zeroi = lp_build_const_int32(gallivm, 0); 640 LLVMValueRef pixel_center, xy0_center, dxy01, dxy20, dyx20; 641 LLVMValueRef e, f, ef, ooa; 642 LLVMValueRef shuffles[4], shuf10; 643 LLVMValueRef attr_pos[3]; 644 struct lp_type typef4 = lp_type_float_vec(32, 128); 645 struct lp_build_context bld; 646 647 lp_build_context_init(&bld, gallivm, typef4); 648 args->bld = bld; 649 650 /* The internal position input is in slot zero: 651 */ 652 load_attribute(gallivm, args, key, 0, attr_pos); 653 654 pixel_center = lp_build_const_vec(gallivm, typef4, 655 key->pixel_center_half ? 0.5 : 0.0); 656 657 /* 658 * xy are first two elems in v0a/v1a/v2a but just use vec4 arit 659 * also offset_tri uses actually xyz in them 660 */ 661 xy0_center = LLVMBuildFSub(b, attr_pos[0], pixel_center, "xy0_center" ); 662 663 dxy01 = LLVMBuildFSub(b, attr_pos[0], attr_pos[1], "dxy01"); 664 dxy20 = LLVMBuildFSub(b, attr_pos[2], attr_pos[0], "dxy20"); 665 666 shuffles[0] = onei; 667 shuffles[1] = zeroi; 668 shuffles[2] = LLVMGetUndef(shuf_type); 669 shuffles[3] = LLVMGetUndef(shuf_type); 670 shuf10 = LLVMConstVector(shuffles, 4); 671 672 dyx20 = LLVMBuildShuffleVector(b, dxy20, dxy20, shuf10, ""); 673 674 ef = LLVMBuildFMul(b, dxy01, dyx20, "ef"); 675 e = LLVMBuildExtractElement(b, ef, zeroi, ""); 676 f = LLVMBuildExtractElement(b, ef, onei, ""); 677 678 ooa = LLVMBuildFDiv(b, onef, LLVMBuildFSub(b, e, f, ""), "ooa"); 679 680 ooa = lp_build_broadcast_scalar(&bld, ooa); 681 682 /* tri offset calc shares a lot of arithmetic, do it here */ 683 if (key->pgon_offset_scale != 0.0f || key->pgon_offset_units != 0.0f) { 684 lp_do_offset_tri(gallivm, args, key, ooa, dxy01, dxy20, attr_pos); 685 } 686 687 dxy20 = LLVMBuildFMul(b, dxy20, ooa, ""); 688 dxy01 = LLVMBuildFMul(b, dxy01, ooa, ""); 689 690 args->dy20_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, onei); 691 args->dy01_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, onei); 692 693 args->dx20_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy20, zeroi); 694 args->dx01_ooa = lp_build_extract_broadcast(gallivm, typef4, typef4, dxy01, zeroi); 695 696 args->x0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, zeroi); 697 args->y0_center = lp_build_extract_broadcast(gallivm, typef4, typef4, xy0_center, onei); 698 699 emit_linear_coef(gallivm, args, 0, attr_pos); 700 } 701 702 /** 703 * Generate the runtime callable function for the coefficient calculation. 704 * 705 */ 706 static struct lp_setup_variant * 707 generate_setup_variant(struct lp_setup_variant_key *key, 708 struct llvmpipe_context *lp) 709 { 710 struct lp_setup_variant *variant = NULL; 711 struct gallivm_state *gallivm; 712 struct lp_setup_args args; 713 char func_name[64]; 714 LLVMTypeRef vec4f_type; 715 LLVMTypeRef func_type; 716 LLVMTypeRef arg_types[7]; 717 LLVMBasicBlockRef block; 718 LLVMBuilderRef builder; 719 int64_t t0 = 0, t1; 720 721 if (0) 722 goto fail; 723 724 variant = CALLOC_STRUCT(lp_setup_variant); 725 if (!variant) 726 goto fail; 727 728 variant->no = setup_no++; 729 730 util_snprintf(func_name, sizeof(func_name), "setup_variant_%u", 731 variant->no); 732 733 variant->gallivm = gallivm = gallivm_create(func_name, lp->context); 734 if (!variant->gallivm) { 735 goto fail; 736 } 737 738 builder = gallivm->builder; 739 740 if (LP_DEBUG & DEBUG_COUNTERS) { 741 t0 = os_time_get(); 742 } 743 744 memcpy(&variant->key, key, key->size); 745 variant->list_item_global.base = variant; 746 747 /* Currently always deal with full 4-wide vertex attributes from 748 * the vertices. 749 */ 750 751 vec4f_type = LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4); 752 753 arg_types[0] = LLVMPointerType(vec4f_type, 0); /* v0 */ 754 arg_types[1] = LLVMPointerType(vec4f_type, 0); /* v1 */ 755 arg_types[2] = LLVMPointerType(vec4f_type, 0); /* v2 */ 756 arg_types[3] = LLVMInt32TypeInContext(gallivm->context); /* facing */ 757 arg_types[4] = LLVMPointerType(vec4f_type, 0); /* a0, aligned */ 758 arg_types[5] = LLVMPointerType(vec4f_type, 0); /* dadx, aligned */ 759 arg_types[6] = LLVMPointerType(vec4f_type, 0); /* dady, aligned */ 760 761 func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context), 762 arg_types, ARRAY_SIZE(arg_types), 0); 763 764 variant->function = LLVMAddFunction(gallivm->module, func_name, func_type); 765 if (!variant->function) 766 goto fail; 767 768 LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); 769 770 args.v0 = LLVMGetParam(variant->function, 0); 771 args.v1 = LLVMGetParam(variant->function, 1); 772 args.v2 = LLVMGetParam(variant->function, 2); 773 args.facing = LLVMGetParam(variant->function, 3); 774 args.a0 = LLVMGetParam(variant->function, 4); 775 args.dadx = LLVMGetParam(variant->function, 5); 776 args.dady = LLVMGetParam(variant->function, 6); 777 778 lp_build_name(args.v0, "in_v0"); 779 lp_build_name(args.v1, "in_v1"); 780 lp_build_name(args.v2, "in_v2"); 781 lp_build_name(args.facing, "in_facing"); 782 lp_build_name(args.a0, "out_a0"); 783 lp_build_name(args.dadx, "out_dadx"); 784 lp_build_name(args.dady, "out_dady"); 785 786 /* 787 * Function body 788 */ 789 block = LLVMAppendBasicBlockInContext(gallivm->context, 790 variant->function, "entry"); 791 LLVMPositionBuilderAtEnd(builder, block); 792 793 set_noalias(builder, variant->function, arg_types, ARRAY_SIZE(arg_types)); 794 init_args(gallivm, &variant->key, &args); 795 emit_tri_coef(gallivm, &variant->key, &args); 796 797 LLVMBuildRetVoid(builder); 798 799 gallivm_verify_function(gallivm, variant->function); 800 801 gallivm_compile_module(gallivm); 802 803 variant->jit_function = (lp_jit_setup_triangle) 804 gallivm_jit_function(gallivm, variant->function); 805 if (!variant->jit_function) 806 goto fail; 807 808 gallivm_free_ir(variant->gallivm); 809 810 /* 811 * Update timing information: 812 */ 813 if (LP_DEBUG & DEBUG_COUNTERS) { 814 t1 = os_time_get(); 815 LP_COUNT_ADD(llvm_compile_time, t1 - t0); 816 LP_COUNT_ADD(nr_llvm_compiles, 1); 817 } 818 819 return variant; 820 821 fail: 822 if (variant) { 823 if (variant->gallivm) { 824 gallivm_destroy(variant->gallivm); 825 } 826 FREE(variant); 827 } 828 829 return NULL; 830 } 831 832 833 834 static void 835 lp_make_setup_variant_key(struct llvmpipe_context *lp, 836 struct lp_setup_variant_key *key) 837 { 838 struct lp_fragment_shader *fs = lp->fs; 839 unsigned i; 840 841 assert(sizeof key->inputs[0] == sizeof(uint)); 842 843 key->num_inputs = fs->info.base.num_inputs; 844 key->flatshade_first = lp->rasterizer->flatshade_first; 845 key->pixel_center_half = lp->rasterizer->half_pixel_center; 846 key->twoside = lp->rasterizer->light_twoside; 847 key->size = Offset(struct lp_setup_variant_key, 848 inputs[key->num_inputs]); 849 850 key->color_slot = lp->color_slot[0]; 851 key->bcolor_slot = lp->bcolor_slot[0]; 852 key->spec_slot = lp->color_slot[1]; 853 key->bspec_slot = lp->bcolor_slot[1]; 854 855 /* 856 * If depth is floating point, depth bias is calculated with respect 857 * to the primitive's maximum Z value. Retain the original depth bias 858 * value until that stage. 859 */ 860 key->floating_point_depth = lp->floating_point_depth; 861 862 if (key->floating_point_depth) { 863 key->pgon_offset_units = (float) lp->rasterizer->offset_units; 864 } else { 865 key->pgon_offset_units = 866 (float) (lp->rasterizer->offset_units * lp->mrd); 867 } 868 869 key->pgon_offset_scale = lp->rasterizer->offset_scale; 870 key->pgon_offset_clamp = lp->rasterizer->offset_clamp; 871 key->pad = 0; 872 memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]); 873 for (i = 0; i < key->num_inputs; i++) { 874 if (key->inputs[i].interp == LP_INTERP_COLOR) { 875 if (lp->rasterizer->flatshade) 876 key->inputs[i].interp = LP_INTERP_CONSTANT; 877 else 878 key->inputs[i].interp = LP_INTERP_PERSPECTIVE; 879 } 880 } 881 882 } 883 884 885 static void 886 remove_setup_variant(struct llvmpipe_context *lp, 887 struct lp_setup_variant *variant) 888 { 889 if (gallivm_debug & GALLIVM_DEBUG_IR) { 890 debug_printf("llvmpipe: del setup_variant #%u total %u\n", 891 variant->no, lp->nr_setup_variants); 892 } 893 894 if (variant->gallivm) { 895 gallivm_destroy(variant->gallivm); 896 } 897 898 remove_from_list(&variant->list_item_global); 899 lp->nr_setup_variants--; 900 FREE(variant); 901 } 902 903 904 905 /* When the number of setup variants exceeds a threshold, cull a 906 * fraction (currently a quarter) of them. 907 */ 908 static void 909 cull_setup_variants(struct llvmpipe_context *lp) 910 { 911 struct pipe_context *pipe = &lp->pipe; 912 int i; 913 914 /* 915 * XXX: we need to flush the context until we have some sort of reference 916 * counting in fragment shaders as they may still be binned 917 * Flushing alone might not be sufficient we need to wait on it too. 918 */ 919 llvmpipe_finish(pipe, __FUNCTION__); 920 921 for (i = 0; i < LP_MAX_SETUP_VARIANTS / 4; i++) { 922 struct lp_setup_variant_list_item *item; 923 if (is_empty_list(&lp->setup_variants_list)) { 924 break; 925 } 926 item = last_elem(&lp->setup_variants_list); 927 assert(item); 928 assert(item->base); 929 remove_setup_variant(lp, item->base); 930 } 931 } 932 933 934 /** 935 * Update fragment/vertex shader linkage state. This is called just 936 * prior to drawing something when some fragment-related state has 937 * changed. 938 */ 939 void 940 llvmpipe_update_setup(struct llvmpipe_context *lp) 941 { 942 struct lp_setup_variant_key *key = &lp->setup_variant.key; 943 struct lp_setup_variant *variant = NULL; 944 struct lp_setup_variant_list_item *li; 945 946 lp_make_setup_variant_key(lp, key); 947 948 foreach(li, &lp->setup_variants_list) { 949 if(li->base->key.size == key->size && 950 memcmp(&li->base->key, key, key->size) == 0) { 951 variant = li->base; 952 break; 953 } 954 } 955 956 if (variant) { 957 move_to_head(&lp->setup_variants_list, &variant->list_item_global); 958 } 959 else { 960 if (lp->nr_setup_variants >= LP_MAX_SETUP_VARIANTS) { 961 cull_setup_variants(lp); 962 } 963 964 variant = generate_setup_variant(key, lp); 965 if (variant) { 966 insert_at_head(&lp->setup_variants_list, &variant->list_item_global); 967 lp->nr_setup_variants++; 968 } 969 } 970 971 lp_setup_set_setup_variant(lp->setup, variant); 972 } 973 974 void 975 lp_delete_setup_variants(struct llvmpipe_context *lp) 976 { 977 struct lp_setup_variant_list_item *li; 978 li = first_elem(&lp->setup_variants_list); 979 while(!at_end(&lp->setup_variants_list, li)) { 980 struct lp_setup_variant_list_item *next = next_elem(li); 981 remove_setup_variant(lp, li->base); 982 li = next; 983 } 984 } 985 986 void 987 lp_dump_setup_coef(const struct lp_setup_variant_key *key, 988 const float (*sa0)[4], 989 const float (*sdadx)[4], 990 const float (*sdady)[4]) 991 { 992 int i, slot; 993 994 for (i = 0; i < TGSI_NUM_CHANNELS; i++) { 995 float a0 = sa0 [0][i]; 996 float dadx = sdadx[0][i]; 997 float dady = sdady[0][i]; 998 999 debug_printf("POS.%c: a0 = %f, dadx = %f, dady = %f\n", 1000 "xyzw"[i], a0, dadx, dady); 1001 } 1002 1003 for (slot = 0; slot < key->num_inputs; slot++) { 1004 unsigned usage_mask = key->inputs[slot].usage_mask; 1005 for (i = 0; i < TGSI_NUM_CHANNELS; i++) { 1006 if (usage_mask & (1 << i)) { 1007 float a0 = sa0 [1 + slot][i]; 1008 float dadx = sdadx[1 + slot][i]; 1009 float dady = sdady[1 + slot][i]; 1010 1011 debug_printf("IN[%u].%c: a0 = %f, dadx = %f, dady = %f\n", 1012 slot, "xyzw"[i], a0, dadx, dady); 1013 } 1014 } 1015 } 1016 } 1017