1 /* 2 * Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 * Intel funded Tungsten Graphics to 4 * develop this 3D driver. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial 16 * portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 */ 26 #include "brw_context.h" 27 #include "brw_wm.h" 28 #include "brw_state.h" 29 #include "brw_shader.h" 30 #include "main/enums.h" 31 #include "main/formats.h" 32 #include "main/fbobject.h" 33 #include "main/samplerobj.h" 34 #include "main/framebuffer.h" 35 #include "program/prog_parameter.h" 36 #include "program/program.h" 37 #include "intel_mipmap_tree.h" 38 #include "intel_image.h" 39 #include "brw_nir.h" 40 #include "brw_program.h" 41 42 #include "util/ralloc.h" 43 44 static void 45 assign_fs_binding_table_offsets(const struct gen_device_info *devinfo, 46 const struct gl_program *prog, 47 const struct brw_wm_prog_key *key, 48 struct brw_wm_prog_data *prog_data) 49 { 50 uint32_t next_binding_table_offset = 0; 51 52 /* If there are no color regions, we still perform an FB write to a null 53 * renderbuffer, which we place at surface index 0. 54 */ 55 prog_data->binding_table.render_target_start = next_binding_table_offset; 56 next_binding_table_offset += MAX2(key->nr_color_regions, 1); 57 58 next_binding_table_offset = 59 brw_assign_common_binding_table_offsets(devinfo, prog, &prog_data->base, 60 next_binding_table_offset); 61 62 if (prog->nir->info->outputs_read && !key->coherent_fb_fetch) { 63 prog_data->binding_table.render_target_read_start = 64 next_binding_table_offset; 65 next_binding_table_offset += key->nr_color_regions; 66 } 67 } 68 69 static void 70 brw_wm_debug_recompile(struct brw_context *brw, struct gl_program *prog, 71 const struct brw_wm_prog_key *key) 72 { 73 perf_debug("Recompiling fragment shader for program %d\n", prog->Id); 74 75 bool found = false; 76 const struct brw_wm_prog_key *old_key = 77 brw_find_previous_compile(&brw->cache, BRW_CACHE_FS_PROG, 78 key->program_string_id); 79 80 if (!old_key) { 81 perf_debug(" Didn't find previous compile in the shader cache for debug\n"); 82 return; 83 } 84 85 found |= key_debug(brw, "alphatest, computed depth, depth test, or " 86 "depth write", 87 old_key->iz_lookup, key->iz_lookup); 88 found |= key_debug(brw, "depth statistics", 89 old_key->stats_wm, key->stats_wm); 90 found |= key_debug(brw, "flat shading", 91 old_key->flat_shade, key->flat_shade); 92 found |= key_debug(brw, "per-sample interpolation", 93 old_key->persample_interp, key->persample_interp); 94 found |= key_debug(brw, "number of color buffers", 95 old_key->nr_color_regions, key->nr_color_regions); 96 found |= key_debug(brw, "MRT alpha test or alpha-to-coverage", 97 old_key->replicate_alpha, key->replicate_alpha); 98 found |= key_debug(brw, "fragment color clamping", 99 old_key->clamp_fragment_color, key->clamp_fragment_color); 100 found |= key_debug(brw, "multisampled FBO", 101 old_key->multisample_fbo, key->multisample_fbo); 102 found |= key_debug(brw, "line smoothing", 103 old_key->line_aa, key->line_aa); 104 found |= key_debug(brw, "input slots valid", 105 old_key->input_slots_valid, key->input_slots_valid); 106 found |= key_debug(brw, "mrt alpha test function", 107 old_key->alpha_test_func, key->alpha_test_func); 108 found |= key_debug(brw, "mrt alpha test reference value", 109 old_key->alpha_test_ref, key->alpha_test_ref); 110 111 found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex); 112 113 if (!found) { 114 perf_debug(" Something else\n"); 115 } 116 } 117 118 /** 119 * All Mesa program -> GPU code generation goes through this function. 120 * Depending on the instructions used (i.e. flow control instructions) 121 * we'll use one of two code generators. 122 */ 123 static bool 124 brw_codegen_wm_prog(struct brw_context *brw, 125 struct brw_program *fp, 126 struct brw_wm_prog_key *key, 127 struct brw_vue_map *vue_map) 128 { 129 const struct gen_device_info *devinfo = &brw->screen->devinfo; 130 struct gl_context *ctx = &brw->ctx; 131 void *mem_ctx = ralloc_context(NULL); 132 struct brw_wm_prog_data prog_data; 133 const GLuint *program; 134 GLuint program_size; 135 bool start_busy = false; 136 double start_time = 0; 137 138 memset(&prog_data, 0, sizeof(prog_data)); 139 140 /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */ 141 if (fp->program.is_arb_asm) 142 prog_data.base.use_alt_mode = true; 143 144 assign_fs_binding_table_offsets(devinfo, &fp->program, key, &prog_data); 145 146 /* Allocate the references to the uniforms that will end up in the 147 * prog_data associated with the compiled program, and which will be freed 148 * by the state cache. 149 */ 150 int param_count = fp->program.nir->num_uniforms / 4; 151 prog_data.base.nr_image_params = fp->program.info.num_images; 152 /* The backend also sometimes adds params for texture size. */ 153 param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; 154 prog_data.base.param = 155 rzalloc_array(NULL, const gl_constant_value *, param_count); 156 prog_data.base.pull_param = 157 rzalloc_array(NULL, const gl_constant_value *, param_count); 158 prog_data.base.image_param = 159 rzalloc_array(NULL, struct brw_image_param, 160 prog_data.base.nr_image_params); 161 prog_data.base.nr_params = param_count; 162 163 if (!fp->program.is_arb_asm) { 164 brw_nir_setup_glsl_uniforms(fp->program.nir, &fp->program, 165 &prog_data.base, true); 166 } else { 167 brw_nir_setup_arb_uniforms(fp->program.nir, &fp->program, 168 &prog_data.base); 169 170 if (unlikely(INTEL_DEBUG & DEBUG_WM)) 171 brw_dump_arb_asm("fragment", &fp->program); 172 } 173 174 if (unlikely(brw->perf_debug)) { 175 start_busy = (brw->batch.last_bo && 176 drm_intel_bo_busy(brw->batch.last_bo)); 177 start_time = get_time(); 178 } 179 180 int st_index8 = -1, st_index16 = -1; 181 if (INTEL_DEBUG & DEBUG_SHADER_TIME) { 182 st_index8 = brw_get_shader_time_index(brw, &fp->program, ST_FS8, 183 !fp->program.is_arb_asm); 184 st_index16 = brw_get_shader_time_index(brw, &fp->program, ST_FS16, 185 !fp->program.is_arb_asm); 186 } 187 188 char *error_str = NULL; 189 program = brw_compile_fs(brw->screen->compiler, brw, mem_ctx, 190 key, &prog_data, fp->program.nir, 191 &fp->program, st_index8, st_index16, 192 true, brw->use_rep_send, vue_map, 193 &program_size, &error_str); 194 195 if (program == NULL) { 196 if (!fp->program.is_arb_asm) { 197 fp->program.sh.data->LinkStatus = false; 198 ralloc_strcat(&fp->program.sh.data->InfoLog, error_str); 199 } 200 201 _mesa_problem(NULL, "Failed to compile fragment shader: %s\n", error_str); 202 203 ralloc_free(mem_ctx); 204 return false; 205 } 206 207 if (unlikely(brw->perf_debug)) { 208 if (fp->compiled_once) 209 brw_wm_debug_recompile(brw, &fp->program, key); 210 fp->compiled_once = true; 211 212 if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) { 213 perf_debug("FS compile took %.03f ms and stalled the GPU\n", 214 (get_time() - start_time) * 1000); 215 } 216 } 217 218 brw_alloc_stage_scratch(brw, &brw->wm.base, 219 prog_data.base.total_scratch, 220 devinfo->max_wm_threads); 221 222 if (unlikely((INTEL_DEBUG & DEBUG_WM) && fp->program.is_arb_asm)) 223 fprintf(stderr, "\n"); 224 225 brw_upload_cache(&brw->cache, BRW_CACHE_FS_PROG, 226 key, sizeof(struct brw_wm_prog_key), 227 program, program_size, 228 &prog_data, sizeof(prog_data), 229 &brw->wm.base.prog_offset, &brw->wm.base.prog_data); 230 231 ralloc_free(mem_ctx); 232 233 return true; 234 } 235 236 bool 237 brw_debug_recompile_sampler_key(struct brw_context *brw, 238 const struct brw_sampler_prog_key_data *old_key, 239 const struct brw_sampler_prog_key_data *key) 240 { 241 bool found = false; 242 243 for (unsigned int i = 0; i < MAX_SAMPLERS; i++) { 244 found |= key_debug(brw, "EXT_texture_swizzle or DEPTH_TEXTURE_MODE", 245 old_key->swizzles[i], key->swizzles[i]); 246 } 247 found |= key_debug(brw, "GL_CLAMP enabled on any texture unit's 1st coordinate", 248 old_key->gl_clamp_mask[0], key->gl_clamp_mask[0]); 249 found |= key_debug(brw, "GL_CLAMP enabled on any texture unit's 2nd coordinate", 250 old_key->gl_clamp_mask[1], key->gl_clamp_mask[1]); 251 found |= key_debug(brw, "GL_CLAMP enabled on any texture unit's 3rd coordinate", 252 old_key->gl_clamp_mask[2], key->gl_clamp_mask[2]); 253 found |= key_debug(brw, "gather channel quirk on any texture unit", 254 old_key->gather_channel_quirk_mask, key->gather_channel_quirk_mask); 255 found |= key_debug(brw, "compressed multisample layout", 256 old_key->compressed_multisample_layout_mask, 257 key->compressed_multisample_layout_mask); 258 found |= key_debug(brw, "16x msaa", 259 old_key->msaa_16, 260 key->msaa_16); 261 262 found |= key_debug(brw, "y_uv image bound", 263 old_key->y_uv_image_mask, 264 key->y_uv_image_mask); 265 found |= key_debug(brw, "y_u_v image bound", 266 old_key->y_u_v_image_mask, 267 key->y_u_v_image_mask); 268 found |= key_debug(brw, "yx_xuxv image bound", 269 old_key->yx_xuxv_image_mask, 270 key->yx_xuxv_image_mask); 271 272 for (unsigned int i = 0; i < MAX_SAMPLERS; i++) { 273 found |= key_debug(brw, "textureGather workarounds", 274 old_key->gen6_gather_wa[i], key->gen6_gather_wa[i]); 275 } 276 277 return found; 278 } 279 280 static uint8_t 281 gen6_gather_workaround(GLenum internalformat) 282 { 283 switch (internalformat) { 284 case GL_R8I: return WA_SIGN | WA_8BIT; 285 case GL_R8UI: return WA_8BIT; 286 case GL_R16I: return WA_SIGN | WA_16BIT; 287 case GL_R16UI: return WA_16BIT; 288 default: 289 /* Note that even though GL_R32I and GL_R32UI have format overrides in 290 * the surface state, there is no shader w/a required. 291 */ 292 return 0; 293 } 294 } 295 296 void 297 brw_populate_sampler_prog_key_data(struct gl_context *ctx, 298 const struct gl_program *prog, 299 struct brw_sampler_prog_key_data *key) 300 { 301 struct brw_context *brw = brw_context(ctx); 302 GLbitfield mask = prog->SamplersUsed; 303 304 while (mask) { 305 const int s = u_bit_scan(&mask); 306 307 key->swizzles[s] = SWIZZLE_NOOP; 308 309 int unit_id = prog->SamplerUnits[s]; 310 const struct gl_texture_unit *unit = &ctx->Texture.Unit[unit_id]; 311 312 if (unit->_Current && unit->_Current->Target != GL_TEXTURE_BUFFER) { 313 const struct gl_texture_object *t = unit->_Current; 314 const struct gl_texture_image *img = t->Image[0][t->BaseLevel]; 315 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit_id); 316 317 const bool alpha_depth = t->DepthMode == GL_ALPHA && 318 (img->_BaseFormat == GL_DEPTH_COMPONENT || 319 img->_BaseFormat == GL_DEPTH_STENCIL); 320 321 /* Haswell handles texture swizzling as surface format overrides 322 * (except for GL_ALPHA); all other platforms need MOVs in the shader. 323 */ 324 if (alpha_depth || (brw->gen < 8 && !brw->is_haswell)) 325 key->swizzles[s] = brw_get_texture_swizzle(ctx, t); 326 327 if (brw->gen < 8 && 328 sampler->MinFilter != GL_NEAREST && 329 sampler->MagFilter != GL_NEAREST) { 330 if (sampler->WrapS == GL_CLAMP) 331 key->gl_clamp_mask[0] |= 1 << s; 332 if (sampler->WrapT == GL_CLAMP) 333 key->gl_clamp_mask[1] |= 1 << s; 334 if (sampler->WrapR == GL_CLAMP) 335 key->gl_clamp_mask[2] |= 1 << s; 336 } 337 338 /* gather4 for RG32* is broken in multiple ways on Gen7. */ 339 if (brw->gen == 7 && prog->nir->info->uses_texture_gather) { 340 switch (img->InternalFormat) { 341 case GL_RG32I: 342 case GL_RG32UI: { 343 /* We have to override the format to R32G32_FLOAT_LD. 344 * This means that SCS_ALPHA and SCS_ONE will return 0x3f8 345 * (1.0) rather than integer 1. This needs shader hacks. 346 * 347 * On Ivybridge, we whack W (alpha) to ONE in our key's 348 * swizzle. On Haswell, we look at the original texture 349 * swizzle, and use XYZW with channels overridden to ONE, 350 * leaving normal texture swizzling to SCS. 351 */ 352 unsigned src_swizzle = 353 brw->is_haswell ? t->_Swizzle : key->swizzles[s]; 354 for (int i = 0; i < 4; i++) { 355 unsigned src_comp = GET_SWZ(src_swizzle, i); 356 if (src_comp == SWIZZLE_ONE || src_comp == SWIZZLE_W) { 357 key->swizzles[i] &= ~(0x7 << (3 * i)); 358 key->swizzles[i] |= SWIZZLE_ONE << (3 * i); 359 } 360 } 361 /* fallthrough */ 362 } 363 case GL_RG32F: 364 /* The channel select for green doesn't work - we have to 365 * request blue. Haswell can use SCS for this, but Ivybridge 366 * needs a shader workaround. 367 */ 368 if (!brw->is_haswell) 369 key->gather_channel_quirk_mask |= 1 << s; 370 break; 371 } 372 } 373 374 /* Gen6's gather4 is broken for UINT/SINT; we treat them as 375 * UNORM/FLOAT instead and fix it in the shader. 376 */ 377 if (brw->gen == 6 && prog->nir->info->uses_texture_gather) { 378 key->gen6_gather_wa[s] = gen6_gather_workaround(img->InternalFormat); 379 } 380 381 /* If this is a multisample sampler, and uses the CMS MSAA layout, 382 * then we need to emit slightly different code to first sample the 383 * MCS surface. 384 */ 385 struct intel_texture_object *intel_tex = 386 intel_texture_object((struct gl_texture_object *)t); 387 388 /* From gen9 onwards some single sampled buffers can also be 389 * compressed. These don't need ld2dms sampling along with mcs fetch. 390 */ 391 if (brw->gen >= 7 && 392 intel_tex->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS && 393 intel_tex->mt->num_samples > 1) { 394 key->compressed_multisample_layout_mask |= 1 << s; 395 396 if (intel_tex->mt->num_samples >= 16) { 397 assert(brw->gen >= 9); 398 key->msaa_16 |= 1 << s; 399 } 400 } 401 402 if (t->Target == GL_TEXTURE_EXTERNAL_OES && intel_tex->planar_format) { 403 switch (intel_tex->planar_format->components) { 404 case __DRI_IMAGE_COMPONENTS_Y_UV: 405 key->y_uv_image_mask |= 1 << s; 406 break; 407 case __DRI_IMAGE_COMPONENTS_Y_U_V: 408 key->y_u_v_image_mask |= 1 << s; 409 break; 410 case __DRI_IMAGE_COMPONENTS_Y_XUXV: 411 key->yx_xuxv_image_mask |= 1 << s; 412 break; 413 default: 414 break; 415 } 416 } 417 418 } 419 } 420 } 421 422 static bool 423 brw_wm_state_dirty(const struct brw_context *brw) 424 { 425 return brw_state_dirty(brw, 426 _NEW_BUFFERS | 427 _NEW_COLOR | 428 _NEW_DEPTH | 429 _NEW_FRAG_CLAMP | 430 _NEW_HINT | 431 _NEW_LIGHT | 432 _NEW_LINE | 433 _NEW_MULTISAMPLE | 434 _NEW_POLYGON | 435 _NEW_STENCIL | 436 _NEW_TEXTURE, 437 BRW_NEW_FRAGMENT_PROGRAM | 438 BRW_NEW_REDUCED_PRIMITIVE | 439 BRW_NEW_STATS_WM | 440 BRW_NEW_VUE_MAP_GEOM_OUT); 441 } 442 443 void 444 brw_wm_populate_key(struct brw_context *brw, struct brw_wm_prog_key *key) 445 { 446 struct gl_context *ctx = &brw->ctx; 447 /* BRW_NEW_FRAGMENT_PROGRAM */ 448 const struct brw_program *fp = brw_program_const(brw->fragment_program); 449 const struct gl_program *prog = (struct gl_program *) brw->fragment_program; 450 GLuint lookup = 0; 451 GLuint line_aa; 452 453 memset(key, 0, sizeof(*key)); 454 455 /* Build the index for table lookup 456 */ 457 if (brw->gen < 6) { 458 /* _NEW_COLOR */ 459 if (prog->info.fs.uses_discard || ctx->Color.AlphaEnabled) { 460 lookup |= IZ_PS_KILL_ALPHATEST_BIT; 461 } 462 463 if (prog->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { 464 lookup |= IZ_PS_COMPUTES_DEPTH_BIT; 465 } 466 467 /* _NEW_DEPTH */ 468 if (ctx->Depth.Test) 469 lookup |= IZ_DEPTH_TEST_ENABLE_BIT; 470 471 if (brw_depth_writes_enabled(brw)) 472 lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; 473 474 /* _NEW_STENCIL | _NEW_BUFFERS */ 475 if (ctx->Stencil._Enabled) { 476 lookup |= IZ_STENCIL_TEST_ENABLE_BIT; 477 478 if (ctx->Stencil.WriteMask[0] || 479 ctx->Stencil.WriteMask[ctx->Stencil._BackFace]) 480 lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; 481 } 482 key->iz_lookup = lookup; 483 } 484 485 line_aa = AA_NEVER; 486 487 /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ 488 if (ctx->Line.SmoothFlag) { 489 if (brw->reduced_primitive == GL_LINES) { 490 line_aa = AA_ALWAYS; 491 } 492 else if (brw->reduced_primitive == GL_TRIANGLES) { 493 if (ctx->Polygon.FrontMode == GL_LINE) { 494 line_aa = AA_SOMETIMES; 495 496 if (ctx->Polygon.BackMode == GL_LINE || 497 (ctx->Polygon.CullFlag && 498 ctx->Polygon.CullFaceMode == GL_BACK)) 499 line_aa = AA_ALWAYS; 500 } 501 else if (ctx->Polygon.BackMode == GL_LINE) { 502 line_aa = AA_SOMETIMES; 503 504 if ((ctx->Polygon.CullFlag && 505 ctx->Polygon.CullFaceMode == GL_FRONT)) 506 line_aa = AA_ALWAYS; 507 } 508 } 509 } 510 511 key->line_aa = line_aa; 512 513 /* _NEW_HINT */ 514 key->high_quality_derivatives = 515 ctx->Hint.FragmentShaderDerivative == GL_NICEST; 516 517 if (brw->gen < 6) 518 key->stats_wm = brw->stats_wm; 519 520 /* _NEW_LIGHT */ 521 key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); 522 523 /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */ 524 key->clamp_fragment_color = ctx->Color._ClampFragmentColor; 525 526 /* _NEW_TEXTURE */ 527 brw_populate_sampler_prog_key_data(ctx, prog, &key->tex); 528 529 /* _NEW_BUFFERS */ 530 key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers; 531 532 /* _NEW_COLOR */ 533 key->force_dual_color_blend = brw->dual_color_blend_by_location && 534 (ctx->Color.BlendEnabled & 1) && ctx->Color.Blend[0]._UsesDualSrc; 535 536 /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */ 537 key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 && 538 (_mesa_is_alpha_test_enabled(ctx) || 539 _mesa_is_alpha_to_coverage_enabled(ctx)); 540 541 /* _NEW_BUFFERS _NEW_MULTISAMPLE */ 542 /* Ignore sample qualifier while computing this flag. */ 543 if (ctx->Multisample.Enabled) { 544 key->persample_interp = 545 ctx->Multisample.SampleShading && 546 (ctx->Multisample.MinSampleShadingValue * 547 _mesa_geometric_samples(ctx->DrawBuffer) > 1); 548 549 key->multisample_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1; 550 } 551 552 /* BRW_NEW_VUE_MAP_GEOM_OUT */ 553 if (brw->gen < 6 || _mesa_bitcount_64(prog->info.inputs_read & 554 BRW_FS_VARYING_INPUT_MASK) > 16) { 555 key->input_slots_valid = brw->vue_map_geom_out.slots_valid; 556 } 557 558 /* _NEW_COLOR | _NEW_BUFFERS */ 559 /* Pre-gen6, the hardware alpha test always used each render 560 * target's alpha to do alpha test, as opposed to render target 0's alpha 561 * like GL requires. Fix that by building the alpha test into the 562 * shader, and we'll skip enabling the fixed function alpha test. 563 */ 564 if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && 565 ctx->Color.AlphaEnabled) { 566 key->alpha_test_func = ctx->Color.AlphaFunc; 567 key->alpha_test_ref = ctx->Color.AlphaRef; 568 } 569 570 /* The unique fragment program ID */ 571 key->program_string_id = fp->id; 572 573 /* Whether reads from the framebuffer should behave coherently. */ 574 key->coherent_fb_fetch = ctx->Extensions.MESA_shader_framebuffer_fetch; 575 } 576 577 void 578 brw_upload_wm_prog(struct brw_context *brw) 579 { 580 struct brw_wm_prog_key key; 581 struct brw_program *fp = (struct brw_program *) brw->fragment_program; 582 583 if (!brw_wm_state_dirty(brw)) 584 return; 585 586 brw_wm_populate_key(brw, &key); 587 588 if (!brw_search_cache(&brw->cache, BRW_CACHE_FS_PROG, 589 &key, sizeof(key), 590 &brw->wm.base.prog_offset, 591 &brw->wm.base.prog_data)) { 592 bool success = brw_codegen_wm_prog(brw, fp, &key, 593 &brw->vue_map_geom_out); 594 (void) success; 595 assert(success); 596 } 597 } 598 599 bool 600 brw_fs_precompile(struct gl_context *ctx, struct gl_program *prog) 601 { 602 struct brw_context *brw = brw_context(ctx); 603 struct brw_wm_prog_key key; 604 605 struct brw_program *bfp = brw_program(prog); 606 607 memset(&key, 0, sizeof(key)); 608 609 uint64_t outputs_written = prog->info.outputs_written; 610 611 if (brw->gen < 6) { 612 if (prog->info.fs.uses_discard) 613 key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT; 614 615 if (outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) 616 key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT; 617 618 /* Just assume depth testing. */ 619 key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT; 620 key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; 621 } 622 623 if (brw->gen < 6 || _mesa_bitcount_64(prog->info.inputs_read & 624 BRW_FS_VARYING_INPUT_MASK) > 16) { 625 key.input_slots_valid = prog->info.inputs_read | VARYING_BIT_POS; 626 } 627 628 brw_setup_tex_for_precompile(brw, &key.tex, prog); 629 630 key.nr_color_regions = _mesa_bitcount_64(outputs_written & 631 ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) | 632 BITFIELD64_BIT(FRAG_RESULT_STENCIL) | 633 BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK))); 634 635 key.program_string_id = bfp->id; 636 637 /* Whether reads from the framebuffer should behave coherently. */ 638 key.coherent_fb_fetch = ctx->Extensions.MESA_shader_framebuffer_fetch; 639 640 uint32_t old_prog_offset = brw->wm.base.prog_offset; 641 struct brw_stage_prog_data *old_prog_data = brw->wm.base.prog_data; 642 643 struct brw_vue_map vue_map; 644 if (brw->gen < 6) { 645 brw_compute_vue_map(&brw->screen->devinfo, &vue_map, 646 prog->info.inputs_read | VARYING_BIT_POS, 647 false); 648 } 649 650 bool success = brw_codegen_wm_prog(brw, bfp, &key, &vue_map); 651 652 brw->wm.base.prog_offset = old_prog_offset; 653 brw->wm.base.prog_data = old_prog_data; 654 655 return success; 656 } 657