1 /**************************************************************************** 2 * Copyright (C) 2015 Intel Corporation. All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 ***************************************************************************/ 23 24 // llvm redefines DEBUG 25 #pragma push_macro("DEBUG") 26 #undef DEBUG 27 #include "JitManager.h" 28 #include "llvm-c/Core.h" 29 #include "llvm/Support/CBindingWrapping.h" 30 #pragma pop_macro("DEBUG") 31 32 #include "state.h" 33 #include "state_llvm.h" 34 #include "builder.h" 35 36 #include "tgsi/tgsi_strings.h" 37 #include "util/u_format.h" 38 #include "gallivm/lp_bld_init.h" 39 #include "gallivm/lp_bld_flow.h" 40 #include "gallivm/lp_bld_struct.h" 41 #include "gallivm/lp_bld_tgsi.h" 42 43 #include "swr_context.h" 44 #include "swr_context_llvm.h" 45 #include "swr_resource.h" 46 #include "swr_state.h" 47 #include "swr_screen.h" 48 49 using namespace SwrJit; 50 51 static unsigned 52 locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info); 53 54 bool operator==(const swr_jit_fs_key &lhs, const swr_jit_fs_key &rhs) 55 { 56 return !memcmp(&lhs, &rhs, sizeof(lhs)); 57 } 58 59 bool operator==(const swr_jit_vs_key &lhs, const swr_jit_vs_key &rhs) 60 { 61 return !memcmp(&lhs, &rhs, sizeof(lhs)); 62 } 63 64 static void 65 swr_generate_sampler_key(const struct lp_tgsi_info &info, 66 struct swr_context *ctx, 67 unsigned shader_type, 68 struct swr_jit_sampler_key &key) 69 { 70 key.nr_samplers = info.base.file_max[TGSI_FILE_SAMPLER] + 1; 71 72 for (unsigned i = 0; i < key.nr_samplers; i++) { 73 if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { 74 lp_sampler_static_sampler_state( 75 &key.sampler[i].sampler_state, 76 ctx->samplers[shader_type][i]); 77 } 78 } 79 80 /* 81 * XXX If TGSI_FILE_SAMPLER_VIEW exists assume all texture opcodes 82 * are dx10-style? Can't really have mixed opcodes, at least not 83 * if we want to skip the holes here (without rescanning tgsi). 84 */ 85 if (info.base.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) { 86 key.nr_sampler_views = 87 info.base.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; 88 for (unsigned i = 0; i < key.nr_sampler_views; i++) { 89 if (info.base.file_mask[TGSI_FILE_SAMPLER_VIEW] & (1 << i)) { 90 const struct pipe_sampler_view *view = 91 ctx->sampler_views[shader_type][i]; 92 lp_sampler_static_texture_state( 93 &key.sampler[i].texture_state, view); 94 if (view) { 95 struct swr_resource *swr_res = swr_resource(view->texture); 96 const struct util_format_description *desc = 97 util_format_description(view->format); 98 if (swr_res->has_depth && swr_res->has_stencil && 99 !util_format_has_depth(desc)) 100 key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT; 101 } 102 } 103 } 104 } else { 105 key.nr_sampler_views = key.nr_samplers; 106 for (unsigned i = 0; i < key.nr_sampler_views; i++) { 107 if (info.base.file_mask[TGSI_FILE_SAMPLER] & (1 << i)) { 108 const struct pipe_sampler_view *view = 109 ctx->sampler_views[shader_type][i]; 110 lp_sampler_static_texture_state( 111 &key.sampler[i].texture_state, view); 112 if (view) { 113 struct swr_resource *swr_res = swr_resource(view->texture); 114 const struct util_format_description *desc = 115 util_format_description(view->format); 116 if (swr_res->has_depth && swr_res->has_stencil && 117 !util_format_has_depth(desc)) 118 key.sampler[i].texture_state.format = PIPE_FORMAT_S8_UINT; 119 } 120 } 121 } 122 } 123 } 124 125 void 126 swr_generate_fs_key(struct swr_jit_fs_key &key, 127 struct swr_context *ctx, 128 swr_fragment_shader *swr_fs) 129 { 130 memset(&key, 0, sizeof(key)); 131 132 key.nr_cbufs = ctx->framebuffer.nr_cbufs; 133 key.light_twoside = ctx->rasterizer->light_twoside; 134 key.sprite_coord_enable = ctx->rasterizer->sprite_coord_enable; 135 memcpy(&key.vs_output_semantic_name, 136 &ctx->vs->info.base.output_semantic_name, 137 sizeof(key.vs_output_semantic_name)); 138 memcpy(&key.vs_output_semantic_idx, 139 &ctx->vs->info.base.output_semantic_index, 140 sizeof(key.vs_output_semantic_idx)); 141 142 swr_generate_sampler_key(swr_fs->info, ctx, PIPE_SHADER_FRAGMENT, key); 143 } 144 145 void 146 swr_generate_vs_key(struct swr_jit_vs_key &key, 147 struct swr_context *ctx, 148 swr_vertex_shader *swr_vs) 149 { 150 memset(&key, 0, sizeof(key)); 151 152 key.clip_plane_mask = 153 swr_vs->info.base.clipdist_writemask ? 154 swr_vs->info.base.clipdist_writemask & ctx->rasterizer->clip_plane_enable : 155 ctx->rasterizer->clip_plane_enable; 156 157 swr_generate_sampler_key(swr_vs->info, ctx, PIPE_SHADER_VERTEX, key); 158 } 159 160 struct BuilderSWR : public Builder { 161 BuilderSWR(JitManager *pJitMgr, const char *pName) 162 : Builder(pJitMgr) 163 { 164 pJitMgr->SetupNewModule(); 165 gallivm = gallivm_create(pName, wrap(&JM()->mContext)); 166 pJitMgr->mpCurrentModule = unwrap(gallivm->module); 167 } 168 169 ~BuilderSWR() { 170 gallivm_free_ir(gallivm); 171 } 172 173 struct gallivm_state *gallivm; 174 PFN_VERTEX_FUNC CompileVS(struct swr_context *ctx, swr_jit_vs_key &key); 175 PFN_PIXEL_KERNEL CompileFS(struct swr_context *ctx, swr_jit_fs_key &key); 176 }; 177 178 PFN_VERTEX_FUNC 179 BuilderSWR::CompileVS(struct swr_context *ctx, swr_jit_vs_key &key) 180 { 181 struct swr_vertex_shader *swr_vs = ctx->vs; 182 183 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; 184 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; 185 186 memset(outputs, 0, sizeof(outputs)); 187 188 AttrBuilder attrBuilder; 189 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); 190 AttributeSet attrSet = AttributeSet::get( 191 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); 192 193 std::vector<Type *> vsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), 194 PointerType::get(Gen_SWR_VS_CONTEXT(JM()), 0)}; 195 FunctionType *vsFuncType = 196 FunctionType::get(Type::getVoidTy(JM()->mContext), vsArgs, false); 197 198 // create new vertex shader function 199 auto pFunction = Function::Create(vsFuncType, 200 GlobalValue::ExternalLinkage, 201 "VS", 202 JM()->mpCurrentModule); 203 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); 204 205 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); 206 IRB()->SetInsertPoint(block); 207 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); 208 209 auto argitr = pFunction->arg_begin(); 210 Value *hPrivateData = &*argitr++; 211 hPrivateData->setName("hPrivateData"); 212 Value *pVsCtx = &*argitr++; 213 pVsCtx->setName("vsCtx"); 214 215 Value *consts_ptr = GEP(hPrivateData, {C(0), C(swr_draw_context_constantVS)}); 216 217 consts_ptr->setName("vs_constants"); 218 Value *const_sizes_ptr = 219 GEP(hPrivateData, {0, swr_draw_context_num_constantsVS}); 220 const_sizes_ptr->setName("num_vs_constants"); 221 222 Value *vtxInput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVin}); 223 224 for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) { 225 const unsigned mask = swr_vs->info.base.input_usage_mask[attrib]; 226 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 227 if (mask & (1 << channel)) { 228 inputs[attrib][channel] = 229 wrap(LOAD(vtxInput, {0, 0, attrib, channel})); 230 } 231 } 232 } 233 234 struct lp_build_sampler_soa *sampler = 235 swr_sampler_soa_create(key.sampler, PIPE_SHADER_VERTEX); 236 237 struct lp_bld_tgsi_system_values system_values; 238 memset(&system_values, 0, sizeof(system_values)); 239 system_values.instance_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_InstanceID})); 240 system_values.vertex_id = wrap(LOAD(pVsCtx, {0, SWR_VS_CONTEXT_VertexID})); 241 242 lp_build_tgsi_soa(gallivm, 243 swr_vs->pipe.tokens, 244 lp_type_float_vec(32, 32 * 8), 245 NULL, // mask 246 wrap(consts_ptr), 247 wrap(const_sizes_ptr), 248 &system_values, 249 inputs, 250 outputs, 251 wrap(hPrivateData), // (sampler context) 252 NULL, // thread data 253 sampler, // sampler 254 &swr_vs->info.base, 255 NULL); // geometry shader face 256 257 sampler->destroy(sampler); 258 259 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 260 261 Value *vtxOutput = LOAD(pVsCtx, {0, SWR_VS_CONTEXT_pVout}); 262 263 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 264 for (uint32_t attrib = 0; attrib < PIPE_MAX_SHADER_OUTPUTS; attrib++) { 265 if (!outputs[attrib][channel]) 266 continue; 267 268 Value *val = LOAD(unwrap(outputs[attrib][channel])); 269 270 uint32_t outSlot = attrib; 271 if (swr_vs->info.base.output_semantic_name[attrib] == TGSI_SEMANTIC_PSIZE) 272 outSlot = VERTEX_POINT_SIZE_SLOT; 273 STORE(val, vtxOutput, {0, 0, outSlot, channel}); 274 } 275 } 276 277 if (ctx->rasterizer->clip_plane_enable || 278 swr_vs->info.base.culldist_writemask) { 279 unsigned clip_mask = ctx->rasterizer->clip_plane_enable; 280 281 unsigned cv = 0; 282 if (swr_vs->info.base.writes_clipvertex) { 283 cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPVERTEX, 0, 284 &swr_vs->info.base); 285 } else { 286 for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { 287 if (swr_vs->info.base.output_semantic_name[i] == TGSI_SEMANTIC_POSITION && 288 swr_vs->info.base.output_semantic_index[i] == 0) { 289 cv = i; 290 break; 291 } 292 } 293 } 294 LLVMValueRef cx = LLVMBuildLoad(gallivm->builder, outputs[cv][0], ""); 295 LLVMValueRef cy = LLVMBuildLoad(gallivm->builder, outputs[cv][1], ""); 296 LLVMValueRef cz = LLVMBuildLoad(gallivm->builder, outputs[cv][2], ""); 297 LLVMValueRef cw = LLVMBuildLoad(gallivm->builder, outputs[cv][3], ""); 298 299 for (unsigned val = 0; val < PIPE_MAX_CLIP_PLANES; val++) { 300 // clip distance overrides user clip planes 301 if ((swr_vs->info.base.clipdist_writemask & clip_mask & (1 << val)) || 302 ((swr_vs->info.base.culldist_writemask << swr_vs->info.base.num_written_clipdistance) & (1 << val))) { 303 unsigned cv = 1 + locate_linkage(TGSI_SEMANTIC_CLIPDIST, val < 4 ? 0 : 1, 304 &swr_vs->info.base); 305 if (val < 4) { 306 LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val], ""); 307 STORE(unwrap(dist), vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT, val}); 308 } else { 309 LLVMValueRef dist = LLVMBuildLoad(gallivm->builder, outputs[cv][val - 4], ""); 310 STORE(unwrap(dist), vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4}); 311 } 312 continue; 313 } 314 315 if (!(clip_mask & (1 << val))) 316 continue; 317 318 Value *px = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 0})); 319 Value *py = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 1})); 320 Value *pz = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 2})); 321 Value *pw = LOAD(GEP(hPrivateData, {0, swr_draw_context_userClipPlanes, val, 3})); 322 Value *dist = FADD(FMUL(unwrap(cx), VBROADCAST(px)), 323 FADD(FMUL(unwrap(cy), VBROADCAST(py)), 324 FADD(FMUL(unwrap(cz), VBROADCAST(pz)), 325 FMUL(unwrap(cw), VBROADCAST(pw))))); 326 327 if (val < 4) 328 STORE(dist, vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_LO_SLOT, val}); 329 else 330 STORE(dist, vtxOutput, {0, 0, VERTEX_CLIPCULL_DIST_HI_SLOT, val - 4}); 331 } 332 } 333 334 RET_VOID(); 335 336 gallivm_verify_function(gallivm, wrap(pFunction)); 337 gallivm_compile_module(gallivm); 338 339 // lp_debug_dump_value(func); 340 341 PFN_VERTEX_FUNC pFunc = 342 (PFN_VERTEX_FUNC)gallivm_jit_function(gallivm, wrap(pFunction)); 343 344 debug_printf("vert shader %p\n", pFunc); 345 assert(pFunc && "Error: VertShader = NULL"); 346 347 JM()->mIsModuleFinalized = true; 348 349 return pFunc; 350 } 351 352 PFN_VERTEX_FUNC 353 swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key) 354 { 355 BuilderSWR builder( 356 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr), 357 "VS"); 358 PFN_VERTEX_FUNC func = builder.CompileVS(ctx, key); 359 360 ctx->vs->map.insert(std::make_pair(key, make_unique<VariantVS>(builder.gallivm, func))); 361 return func; 362 } 363 364 static unsigned 365 locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info) 366 { 367 for (int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { 368 if ((info->output_semantic_name[i] == name) 369 && (info->output_semantic_index[i] == index)) { 370 return i - 1; // position is not part of the linkage 371 } 372 } 373 374 return 0xFFFFFFFF; 375 } 376 377 PFN_PIXEL_KERNEL 378 BuilderSWR::CompileFS(struct swr_context *ctx, swr_jit_fs_key &key) 379 { 380 struct swr_fragment_shader *swr_fs = ctx->fs; 381 382 LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; 383 LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; 384 385 memset(inputs, 0, sizeof(inputs)); 386 memset(outputs, 0, sizeof(outputs)); 387 388 struct lp_build_sampler_soa *sampler = NULL; 389 390 AttrBuilder attrBuilder; 391 attrBuilder.addStackAlignmentAttr(JM()->mVWidth * sizeof(float)); 392 AttributeSet attrSet = AttributeSet::get( 393 JM()->mContext, AttributeSet::FunctionIndex, attrBuilder); 394 395 std::vector<Type *> fsArgs{PointerType::get(Gen_swr_draw_context(JM()), 0), 396 PointerType::get(Gen_SWR_PS_CONTEXT(JM()), 0)}; 397 FunctionType *funcType = 398 FunctionType::get(Type::getVoidTy(JM()->mContext), fsArgs, false); 399 400 auto pFunction = Function::Create(funcType, 401 GlobalValue::ExternalLinkage, 402 "FS", 403 JM()->mpCurrentModule); 404 pFunction->addAttributes(AttributeSet::FunctionIndex, attrSet); 405 406 BasicBlock *block = BasicBlock::Create(JM()->mContext, "entry", pFunction); 407 IRB()->SetInsertPoint(block); 408 LLVMPositionBuilderAtEnd(gallivm->builder, wrap(block)); 409 410 auto args = pFunction->arg_begin(); 411 Value *hPrivateData = &*args++; 412 hPrivateData->setName("hPrivateData"); 413 Value *pPS = &*args++; 414 pPS->setName("psCtx"); 415 416 Value *consts_ptr = GEP(hPrivateData, {0, swr_draw_context_constantFS}); 417 consts_ptr->setName("fs_constants"); 418 Value *const_sizes_ptr = 419 GEP(hPrivateData, {0, swr_draw_context_num_constantsFS}); 420 const_sizes_ptr->setName("num_fs_constants"); 421 422 // load *pAttribs, *pPerspAttribs 423 Value *pRawAttribs = LOAD(pPS, {0, SWR_PS_CONTEXT_pAttribs}, "pRawAttribs"); 424 Value *pPerspAttribs = 425 LOAD(pPS, {0, SWR_PS_CONTEXT_pPerspAttribs}, "pPerspAttribs"); 426 427 swr_fs->constantMask = 0; 428 swr_fs->flatConstantMask = 0; 429 swr_fs->pointSpriteMask = 0; 430 431 for (int attrib = 0; attrib < PIPE_MAX_SHADER_INPUTS; attrib++) { 432 const unsigned mask = swr_fs->info.base.input_usage_mask[attrib]; 433 const unsigned interpMode = swr_fs->info.base.input_interpolate[attrib]; 434 const unsigned interpLoc = swr_fs->info.base.input_interpolate_loc[attrib]; 435 436 if (!mask) 437 continue; 438 439 // load i,j 440 Value *vi = nullptr, *vj = nullptr; 441 switch (interpLoc) { 442 case TGSI_INTERPOLATE_LOC_CENTER: 443 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_center}, "i"); 444 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_center}, "j"); 445 break; 446 case TGSI_INTERPOLATE_LOC_CENTROID: 447 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_centroid}, "i"); 448 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_centroid}, "j"); 449 break; 450 case TGSI_INTERPOLATE_LOC_SAMPLE: 451 vi = LOAD(pPS, {0, SWR_PS_CONTEXT_vI, PixelPositions_sample}, "i"); 452 vj = LOAD(pPS, {0, SWR_PS_CONTEXT_vJ, PixelPositions_sample}, "j"); 453 break; 454 } 455 456 // load/compute w 457 Value *vw = nullptr, *pAttribs; 458 if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE || 459 interpMode == TGSI_INTERPOLATE_COLOR) { 460 pAttribs = pPerspAttribs; 461 switch (interpLoc) { 462 case TGSI_INTERPOLATE_LOC_CENTER: 463 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center})); 464 break; 465 case TGSI_INTERPOLATE_LOC_CENTROID: 466 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_centroid})); 467 break; 468 case TGSI_INTERPOLATE_LOC_SAMPLE: 469 vw = VRCP(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_sample})); 470 break; 471 } 472 } else { 473 pAttribs = pRawAttribs; 474 vw = VIMMED1(1.f); 475 } 476 477 vw->setName("w"); 478 479 ubyte semantic_name = swr_fs->info.base.input_semantic_name[attrib]; 480 ubyte semantic_idx = swr_fs->info.base.input_semantic_index[attrib]; 481 482 if (semantic_name == TGSI_SEMANTIC_FACE) { 483 Value *ff = 484 UI_TO_FP(LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), mFP32Ty); 485 ff = FSUB(FMUL(ff, C(2.0f)), C(1.0f)); 486 ff = VECTOR_SPLAT(JM()->mVWidth, ff, "vFrontFace"); 487 488 inputs[attrib][0] = wrap(ff); 489 inputs[attrib][1] = wrap(VIMMED1(0.0f)); 490 inputs[attrib][2] = wrap(VIMMED1(0.0f)); 491 inputs[attrib][3] = wrap(VIMMED1(1.0f)); 492 continue; 493 } else if (semantic_name == TGSI_SEMANTIC_POSITION) { // gl_FragCoord 494 if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] == 495 TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER) { 496 inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_center}, "vX")); 497 inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_center}, "vY")); 498 } else { 499 inputs[attrib][0] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vX, PixelPositions_UL}, "vX")); 500 inputs[attrib][1] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vY, PixelPositions_UL}, "vY")); 501 } 502 inputs[attrib][2] = wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vZ}, "vZ")); 503 inputs[attrib][3] = 504 wrap(LOAD(pPS, {0, SWR_PS_CONTEXT_vOneOverW, PixelPositions_center}, "vOneOverW")); 505 continue; 506 } else if (semantic_name == TGSI_SEMANTIC_PRIMID) { 507 Value *primID = LOAD(pPS, {0, SWR_PS_CONTEXT_primID}, "primID"); 508 inputs[attrib][0] = wrap(VECTOR_SPLAT(JM()->mVWidth, primID)); 509 inputs[attrib][1] = wrap(VIMMED1(0)); 510 inputs[attrib][2] = wrap(VIMMED1(0)); 511 inputs[attrib][3] = wrap(VIMMED1(0)); 512 continue; 513 } 514 515 unsigned linkedAttrib = 516 locate_linkage(semantic_name, semantic_idx, &ctx->vs->info.base); 517 if (semantic_name == TGSI_SEMANTIC_GENERIC && 518 key.sprite_coord_enable & (1 << semantic_idx)) { 519 /* we add an extra attrib to the backendState in swr_update_derived. */ 520 linkedAttrib = ctx->vs->info.base.num_outputs - 1; 521 swr_fs->pointSpriteMask |= (1 << linkedAttrib); 522 } else if (linkedAttrib == 0xFFFFFFFF) { 523 inputs[attrib][0] = wrap(VIMMED1(0.0f)); 524 inputs[attrib][1] = wrap(VIMMED1(0.0f)); 525 inputs[attrib][2] = wrap(VIMMED1(0.0f)); 526 inputs[attrib][3] = wrap(VIMMED1(1.0f)); 527 /* If we're reading in color and 2-sided lighting is enabled, we have 528 * to keep going. 529 */ 530 if (semantic_name != TGSI_SEMANTIC_COLOR || !key.light_twoside) 531 continue; 532 } else { 533 if (interpMode == TGSI_INTERPOLATE_CONSTANT) { 534 swr_fs->constantMask |= 1 << linkedAttrib; 535 } else if (interpMode == TGSI_INTERPOLATE_COLOR) { 536 swr_fs->flatConstantMask |= 1 << linkedAttrib; 537 } 538 } 539 540 unsigned bcolorAttrib = 0xFFFFFFFF; 541 Value *offset = NULL; 542 if (semantic_name == TGSI_SEMANTIC_COLOR && key.light_twoside) { 543 bcolorAttrib = locate_linkage( 544 TGSI_SEMANTIC_BCOLOR, semantic_idx, &ctx->vs->info.base); 545 /* Neither front nor back colors were available. Nothing to load. */ 546 if (bcolorAttrib == 0xFFFFFFFF && linkedAttrib == 0xFFFFFFFF) 547 continue; 548 /* If there is no front color, just always use the back color. */ 549 if (linkedAttrib == 0xFFFFFFFF) 550 linkedAttrib = bcolorAttrib; 551 552 if (bcolorAttrib != 0xFFFFFFFF) { 553 if (interpMode == TGSI_INTERPOLATE_CONSTANT) { 554 swr_fs->constantMask |= 1 << bcolorAttrib; 555 } else if (interpMode == TGSI_INTERPOLATE_COLOR) { 556 swr_fs->flatConstantMask |= 1 << bcolorAttrib; 557 } 558 559 unsigned diff = 12 * (bcolorAttrib - linkedAttrib); 560 561 if (diff) { 562 Value *back = 563 XOR(C(1), LOAD(pPS, {0, SWR_PS_CONTEXT_frontFace}), "backFace"); 564 565 offset = MUL(back, C(diff)); 566 offset->setName("offset"); 567 } 568 } 569 } 570 571 for (int channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 572 if (mask & (1 << channel)) { 573 Value *indexA = C(linkedAttrib * 12 + channel); 574 Value *indexB = C(linkedAttrib * 12 + channel + 4); 575 Value *indexC = C(linkedAttrib * 12 + channel + 8); 576 577 if (offset) { 578 indexA = ADD(indexA, offset); 579 indexB = ADD(indexB, offset); 580 indexC = ADD(indexC, offset); 581 } 582 583 Value *va = VBROADCAST(LOAD(GEP(pAttribs, indexA))); 584 Value *vb = VBROADCAST(LOAD(GEP(pAttribs, indexB))); 585 Value *vc = VBROADCAST(LOAD(GEP(pAttribs, indexC))); 586 587 if (interpMode == TGSI_INTERPOLATE_CONSTANT) { 588 inputs[attrib][channel] = wrap(va); 589 } else { 590 Value *vk = FSUB(FSUB(VIMMED1(1.0f), vi), vj); 591 592 vc = FMUL(vk, vc); 593 594 Value *interp = FMUL(va, vi); 595 Value *interp1 = FMUL(vb, vj); 596 interp = FADD(interp, interp1); 597 interp = FADD(interp, vc); 598 if (interpMode == TGSI_INTERPOLATE_PERSPECTIVE || 599 interpMode == TGSI_INTERPOLATE_COLOR) 600 interp = FMUL(interp, vw); 601 inputs[attrib][channel] = wrap(interp); 602 } 603 } 604 } 605 } 606 607 sampler = swr_sampler_soa_create(key.sampler, PIPE_SHADER_FRAGMENT); 608 609 struct lp_bld_tgsi_system_values system_values; 610 memset(&system_values, 0, sizeof(system_values)); 611 612 struct lp_build_mask_context mask; 613 614 if (swr_fs->info.base.uses_kill) { 615 Value *mask_val = LOAD(pPS, {0, SWR_PS_CONTEXT_activeMask}, "activeMask"); 616 lp_build_mask_begin( 617 &mask, gallivm, lp_type_float_vec(32, 32 * 8), wrap(mask_val)); 618 } 619 620 lp_build_tgsi_soa(gallivm, 621 swr_fs->pipe.tokens, 622 lp_type_float_vec(32, 32 * 8), 623 swr_fs->info.base.uses_kill ? &mask : NULL, // mask 624 wrap(consts_ptr), 625 wrap(const_sizes_ptr), 626 &system_values, 627 inputs, 628 outputs, 629 wrap(hPrivateData), 630 NULL, // thread data 631 sampler, // sampler 632 &swr_fs->info.base, 633 NULL); // geometry shader face 634 635 sampler->destroy(sampler); 636 637 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 638 639 for (uint32_t attrib = 0; attrib < swr_fs->info.base.num_outputs; 640 attrib++) { 641 switch (swr_fs->info.base.output_semantic_name[attrib]) { 642 case TGSI_SEMANTIC_POSITION: { 643 // write z 644 LLVMValueRef outZ = 645 LLVMBuildLoad(gallivm->builder, outputs[attrib][2], ""); 646 STORE(unwrap(outZ), pPS, {0, SWR_PS_CONTEXT_vZ}); 647 break; 648 } 649 case TGSI_SEMANTIC_COLOR: { 650 for (uint32_t channel = 0; channel < TGSI_NUM_CHANNELS; channel++) { 651 if (!outputs[attrib][channel]) 652 continue; 653 654 LLVMValueRef out = 655 LLVMBuildLoad(gallivm->builder, outputs[attrib][channel], ""); 656 if (swr_fs->info.base.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] && 657 swr_fs->info.base.output_semantic_index[attrib] == 0) { 658 for (uint32_t rt = 0; rt < key.nr_cbufs; rt++) { 659 STORE(unwrap(out), 660 pPS, 661 {0, SWR_PS_CONTEXT_shaded, rt, channel}); 662 } 663 } else { 664 STORE(unwrap(out), 665 pPS, 666 {0, 667 SWR_PS_CONTEXT_shaded, 668 swr_fs->info.base.output_semantic_index[attrib], 669 channel}); 670 } 671 } 672 break; 673 } 674 default: { 675 fprintf(stderr, 676 "unknown output from FS %s[%d]\n", 677 tgsi_semantic_names[swr_fs->info.base 678 .output_semantic_name[attrib]], 679 swr_fs->info.base.output_semantic_index[attrib]); 680 break; 681 } 682 } 683 } 684 685 LLVMValueRef mask_result = 0; 686 if (swr_fs->info.base.uses_kill) { 687 mask_result = lp_build_mask_end(&mask); 688 } 689 690 IRB()->SetInsertPoint(unwrap(LLVMGetInsertBlock(gallivm->builder))); 691 692 if (swr_fs->info.base.uses_kill) { 693 STORE(unwrap(mask_result), pPS, {0, SWR_PS_CONTEXT_activeMask}); 694 } 695 696 RET_VOID(); 697 698 gallivm_verify_function(gallivm, wrap(pFunction)); 699 700 gallivm_compile_module(gallivm); 701 702 PFN_PIXEL_KERNEL kernel = 703 (PFN_PIXEL_KERNEL)gallivm_jit_function(gallivm, wrap(pFunction)); 704 debug_printf("frag shader %p\n", kernel); 705 assert(kernel && "Error: FragShader = NULL"); 706 707 JM()->mIsModuleFinalized = true; 708 709 return kernel; 710 } 711 712 PFN_PIXEL_KERNEL 713 swr_compile_fs(struct swr_context *ctx, swr_jit_fs_key &key) 714 { 715 BuilderSWR builder( 716 reinterpret_cast<JitManager *>(swr_screen(ctx->pipe.screen)->hJitMgr), 717 "FS"); 718 PFN_PIXEL_KERNEL func = builder.CompileFS(ctx, key); 719 720 ctx->fs->map.insert(std::make_pair(key, make_unique<VariantFS>(builder.gallivm, func))); 721 return func; 722 } 723