Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright (C) Intel Corp.  2006.  All Rights Reserved.
      3  * Intel funded Tungsten Graphics to
      4  * develop this 3D driver.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining
      7  * a copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sublicense, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial
     16  * portions of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     19  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     21  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     22  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     23  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     24  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  */
     26 #include "brw_context.h"
     27 #include "brw_wm.h"
     28 #include "brw_state.h"
     29 #include "brw_shader.h"
     30 #include "main/enums.h"
     31 #include "main/formats.h"
     32 #include "main/fbobject.h"
     33 #include "main/samplerobj.h"
     34 #include "main/framebuffer.h"
     35 #include "program/prog_parameter.h"
     36 #include "program/program.h"
     37 #include "intel_mipmap_tree.h"
     38 #include "intel_image.h"
     39 #include "brw_nir.h"
     40 #include "brw_program.h"
     41 
     42 #include "util/ralloc.h"
     43 
     44 static void
     45 assign_fs_binding_table_offsets(const struct gen_device_info *devinfo,
     46                                 const struct gl_program *prog,
     47                                 const struct brw_wm_prog_key *key,
     48                                 struct brw_wm_prog_data *prog_data)
     49 {
     50    uint32_t next_binding_table_offset = 0;
     51 
     52    /* If there are no color regions, we still perform an FB write to a null
     53     * renderbuffer, which we place at surface index 0.
     54     */
     55    prog_data->binding_table.render_target_start = next_binding_table_offset;
     56    next_binding_table_offset += MAX2(key->nr_color_regions, 1);
     57 
     58    next_binding_table_offset =
     59       brw_assign_common_binding_table_offsets(devinfo, prog, &prog_data->base,
     60                                               next_binding_table_offset);
     61 
     62    if (prog->nir->info->outputs_read && !key->coherent_fb_fetch) {
     63       prog_data->binding_table.render_target_read_start =
     64          next_binding_table_offset;
     65       next_binding_table_offset += key->nr_color_regions;
     66    }
     67 }
     68 
     69 static void
     70 brw_wm_debug_recompile(struct brw_context *brw, struct gl_program *prog,
     71                        const struct brw_wm_prog_key *key)
     72 {
     73    perf_debug("Recompiling fragment shader for program %d\n", prog->Id);
     74 
     75    bool found = false;
     76    const struct brw_wm_prog_key *old_key =
     77       brw_find_previous_compile(&brw->cache, BRW_CACHE_FS_PROG,
     78                                 key->program_string_id);
     79 
     80    if (!old_key) {
     81       perf_debug("  Didn't find previous compile in the shader cache for debug\n");
     82       return;
     83    }
     84 
     85    found |= key_debug(brw, "alphatest, computed depth, depth test, or "
     86                       "depth write",
     87                       old_key->iz_lookup, key->iz_lookup);
     88    found |= key_debug(brw, "depth statistics",
     89                       old_key->stats_wm, key->stats_wm);
     90    found |= key_debug(brw, "flat shading",
     91                       old_key->flat_shade, key->flat_shade);
     92    found |= key_debug(brw, "per-sample interpolation",
     93                       old_key->persample_interp, key->persample_interp);
     94    found |= key_debug(brw, "number of color buffers",
     95                       old_key->nr_color_regions, key->nr_color_regions);
     96    found |= key_debug(brw, "MRT alpha test or alpha-to-coverage",
     97                       old_key->replicate_alpha, key->replicate_alpha);
     98    found |= key_debug(brw, "fragment color clamping",
     99                       old_key->clamp_fragment_color, key->clamp_fragment_color);
    100    found |= key_debug(brw, "multisampled FBO",
    101                       old_key->multisample_fbo, key->multisample_fbo);
    102    found |= key_debug(brw, "line smoothing",
    103                       old_key->line_aa, key->line_aa);
    104    found |= key_debug(brw, "input slots valid",
    105                       old_key->input_slots_valid, key->input_slots_valid);
    106    found |= key_debug(brw, "mrt alpha test function",
    107                       old_key->alpha_test_func, key->alpha_test_func);
    108    found |= key_debug(brw, "mrt alpha test reference value",
    109                       old_key->alpha_test_ref, key->alpha_test_ref);
    110 
    111    found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
    112 
    113    if (!found) {
    114       perf_debug("  Something else\n");
    115    }
    116 }
    117 
    118 /**
    119  * All Mesa program -> GPU code generation goes through this function.
    120  * Depending on the instructions used (i.e. flow control instructions)
    121  * we'll use one of two code generators.
    122  */
    123 static bool
    124 brw_codegen_wm_prog(struct brw_context *brw,
    125                     struct brw_program *fp,
    126                     struct brw_wm_prog_key *key,
    127                     struct brw_vue_map *vue_map)
    128 {
    129    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    130    struct gl_context *ctx = &brw->ctx;
    131    void *mem_ctx = ralloc_context(NULL);
    132    struct brw_wm_prog_data prog_data;
    133    const GLuint *program;
    134    GLuint program_size;
    135    bool start_busy = false;
    136    double start_time = 0;
    137 
    138    memset(&prog_data, 0, sizeof(prog_data));
    139 
    140    /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
    141    if (fp->program.is_arb_asm)
    142       prog_data.base.use_alt_mode = true;
    143 
    144    assign_fs_binding_table_offsets(devinfo, &fp->program, key, &prog_data);
    145 
    146    /* Allocate the references to the uniforms that will end up in the
    147     * prog_data associated with the compiled program, and which will be freed
    148     * by the state cache.
    149     */
    150    int param_count = fp->program.nir->num_uniforms / 4;
    151    prog_data.base.nr_image_params = fp->program.info.num_images;
    152    /* The backend also sometimes adds params for texture size. */
    153    param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits;
    154    prog_data.base.param =
    155       rzalloc_array(NULL, const gl_constant_value *, param_count);
    156    prog_data.base.pull_param =
    157       rzalloc_array(NULL, const gl_constant_value *, param_count);
    158    prog_data.base.image_param =
    159       rzalloc_array(NULL, struct brw_image_param,
    160                     prog_data.base.nr_image_params);
    161    prog_data.base.nr_params = param_count;
    162 
    163    if (!fp->program.is_arb_asm) {
    164       brw_nir_setup_glsl_uniforms(fp->program.nir, &fp->program,
    165                                   &prog_data.base, true);
    166    } else {
    167       brw_nir_setup_arb_uniforms(fp->program.nir, &fp->program,
    168                                  &prog_data.base);
    169 
    170       if (unlikely(INTEL_DEBUG & DEBUG_WM))
    171          brw_dump_arb_asm("fragment", &fp->program);
    172    }
    173 
    174    if (unlikely(brw->perf_debug)) {
    175       start_busy = (brw->batch.last_bo &&
    176                     drm_intel_bo_busy(brw->batch.last_bo));
    177       start_time = get_time();
    178    }
    179 
    180    int st_index8 = -1, st_index16 = -1;
    181    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
    182       st_index8 = brw_get_shader_time_index(brw, &fp->program, ST_FS8,
    183                                             !fp->program.is_arb_asm);
    184       st_index16 = brw_get_shader_time_index(brw, &fp->program, ST_FS16,
    185                                              !fp->program.is_arb_asm);
    186    }
    187 
    188    char *error_str = NULL;
    189    program = brw_compile_fs(brw->screen->compiler, brw, mem_ctx,
    190                             key, &prog_data, fp->program.nir,
    191                             &fp->program, st_index8, st_index16,
    192                             true, brw->use_rep_send, vue_map,
    193                             &program_size, &error_str);
    194 
    195    if (program == NULL) {
    196       if (!fp->program.is_arb_asm) {
    197          fp->program.sh.data->LinkStatus = false;
    198          ralloc_strcat(&fp->program.sh.data->InfoLog, error_str);
    199       }
    200 
    201       _mesa_problem(NULL, "Failed to compile fragment shader: %s\n", error_str);
    202 
    203       ralloc_free(mem_ctx);
    204       return false;
    205    }
    206 
    207    if (unlikely(brw->perf_debug)) {
    208       if (fp->compiled_once)
    209          brw_wm_debug_recompile(brw, &fp->program, key);
    210       fp->compiled_once = true;
    211 
    212       if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
    213          perf_debug("FS compile took %.03f ms and stalled the GPU\n",
    214                     (get_time() - start_time) * 1000);
    215       }
    216    }
    217 
    218    brw_alloc_stage_scratch(brw, &brw->wm.base,
    219                            prog_data.base.total_scratch,
    220                            devinfo->max_wm_threads);
    221 
    222    if (unlikely((INTEL_DEBUG & DEBUG_WM) && fp->program.is_arb_asm))
    223       fprintf(stderr, "\n");
    224 
    225    brw_upload_cache(&brw->cache, BRW_CACHE_FS_PROG,
    226                     key, sizeof(struct brw_wm_prog_key),
    227                     program, program_size,
    228                     &prog_data, sizeof(prog_data),
    229                     &brw->wm.base.prog_offset, &brw->wm.base.prog_data);
    230 
    231    ralloc_free(mem_ctx);
    232 
    233    return true;
    234 }
    235 
    236 bool
    237 brw_debug_recompile_sampler_key(struct brw_context *brw,
    238                                 const struct brw_sampler_prog_key_data *old_key,
    239                                 const struct brw_sampler_prog_key_data *key)
    240 {
    241    bool found = false;
    242 
    243    for (unsigned int i = 0; i < MAX_SAMPLERS; i++) {
    244       found |= key_debug(brw, "EXT_texture_swizzle or DEPTH_TEXTURE_MODE",
    245                          old_key->swizzles[i], key->swizzles[i]);
    246    }
    247    found |= key_debug(brw, "GL_CLAMP enabled on any texture unit's 1st coordinate",
    248                       old_key->gl_clamp_mask[0], key->gl_clamp_mask[0]);
    249    found |= key_debug(brw, "GL_CLAMP enabled on any texture unit's 2nd coordinate",
    250                       old_key->gl_clamp_mask[1], key->gl_clamp_mask[1]);
    251    found |= key_debug(brw, "GL_CLAMP enabled on any texture unit's 3rd coordinate",
    252                       old_key->gl_clamp_mask[2], key->gl_clamp_mask[2]);
    253    found |= key_debug(brw, "gather channel quirk on any texture unit",
    254                       old_key->gather_channel_quirk_mask, key->gather_channel_quirk_mask);
    255    found |= key_debug(brw, "compressed multisample layout",
    256                       old_key->compressed_multisample_layout_mask,
    257                       key->compressed_multisample_layout_mask);
    258    found |= key_debug(brw, "16x msaa",
    259                       old_key->msaa_16,
    260                       key->msaa_16);
    261 
    262    found |= key_debug(brw, "y_uv image bound",
    263                       old_key->y_uv_image_mask,
    264                       key->y_uv_image_mask);
    265    found |= key_debug(brw, "y_u_v image bound",
    266                       old_key->y_u_v_image_mask,
    267                       key->y_u_v_image_mask);
    268    found |= key_debug(brw, "yx_xuxv image bound",
    269                       old_key->yx_xuxv_image_mask,
    270                       key->yx_xuxv_image_mask);
    271 
    272    for (unsigned int i = 0; i < MAX_SAMPLERS; i++) {
    273       found |= key_debug(brw, "textureGather workarounds",
    274                          old_key->gen6_gather_wa[i], key->gen6_gather_wa[i]);
    275    }
    276 
    277    return found;
    278 }
    279 
    280 static uint8_t
    281 gen6_gather_workaround(GLenum internalformat)
    282 {
    283    switch (internalformat) {
    284    case GL_R8I: return WA_SIGN | WA_8BIT;
    285    case GL_R8UI: return WA_8BIT;
    286    case GL_R16I: return WA_SIGN | WA_16BIT;
    287    case GL_R16UI: return WA_16BIT;
    288    default:
    289       /* Note that even though GL_R32I and GL_R32UI have format overrides in
    290        * the surface state, there is no shader w/a required.
    291        */
    292       return 0;
    293    }
    294 }
    295 
    296 void
    297 brw_populate_sampler_prog_key_data(struct gl_context *ctx,
    298                                    const struct gl_program *prog,
    299                                    struct brw_sampler_prog_key_data *key)
    300 {
    301    struct brw_context *brw = brw_context(ctx);
    302    GLbitfield mask = prog->SamplersUsed;
    303 
    304    while (mask) {
    305       const int s = u_bit_scan(&mask);
    306 
    307       key->swizzles[s] = SWIZZLE_NOOP;
    308 
    309       int unit_id = prog->SamplerUnits[s];
    310       const struct gl_texture_unit *unit = &ctx->Texture.Unit[unit_id];
    311 
    312       if (unit->_Current && unit->_Current->Target != GL_TEXTURE_BUFFER) {
    313          const struct gl_texture_object *t = unit->_Current;
    314          const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
    315          struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit_id);
    316 
    317          const bool alpha_depth = t->DepthMode == GL_ALPHA &&
    318             (img->_BaseFormat == GL_DEPTH_COMPONENT ||
    319              img->_BaseFormat == GL_DEPTH_STENCIL);
    320 
    321          /* Haswell handles texture swizzling as surface format overrides
    322           * (except for GL_ALPHA); all other platforms need MOVs in the shader.
    323           */
    324          if (alpha_depth || (brw->gen < 8 && !brw->is_haswell))
    325             key->swizzles[s] = brw_get_texture_swizzle(ctx, t);
    326 
    327          if (brw->gen < 8 &&
    328              sampler->MinFilter != GL_NEAREST &&
    329              sampler->MagFilter != GL_NEAREST) {
    330             if (sampler->WrapS == GL_CLAMP)
    331                key->gl_clamp_mask[0] |= 1 << s;
    332             if (sampler->WrapT == GL_CLAMP)
    333                key->gl_clamp_mask[1] |= 1 << s;
    334             if (sampler->WrapR == GL_CLAMP)
    335                key->gl_clamp_mask[2] |= 1 << s;
    336          }
    337 
    338          /* gather4 for RG32* is broken in multiple ways on Gen7. */
    339          if (brw->gen == 7 && prog->nir->info->uses_texture_gather) {
    340             switch (img->InternalFormat) {
    341             case GL_RG32I:
    342             case GL_RG32UI: {
    343                /* We have to override the format to R32G32_FLOAT_LD.
    344                 * This means that SCS_ALPHA and SCS_ONE will return 0x3f8
    345                 * (1.0) rather than integer 1.  This needs shader hacks.
    346                 *
    347                 * On Ivybridge, we whack W (alpha) to ONE in our key's
    348                 * swizzle.  On Haswell, we look at the original texture
    349                 * swizzle, and use XYZW with channels overridden to ONE,
    350                 * leaving normal texture swizzling to SCS.
    351                 */
    352                unsigned src_swizzle =
    353                   brw->is_haswell ? t->_Swizzle : key->swizzles[s];
    354                for (int i = 0; i < 4; i++) {
    355                   unsigned src_comp = GET_SWZ(src_swizzle, i);
    356                   if (src_comp == SWIZZLE_ONE || src_comp == SWIZZLE_W) {
    357                      key->swizzles[i] &= ~(0x7 << (3 * i));
    358                      key->swizzles[i] |= SWIZZLE_ONE << (3 * i);
    359                   }
    360                }
    361                /* fallthrough */
    362             }
    363             case GL_RG32F:
    364                /* The channel select for green doesn't work - we have to
    365                 * request blue.  Haswell can use SCS for this, but Ivybridge
    366                 * needs a shader workaround.
    367                 */
    368                if (!brw->is_haswell)
    369                   key->gather_channel_quirk_mask |= 1 << s;
    370                break;
    371             }
    372          }
    373 
    374          /* Gen6's gather4 is broken for UINT/SINT; we treat them as
    375           * UNORM/FLOAT instead and fix it in the shader.
    376           */
    377          if (brw->gen == 6 && prog->nir->info->uses_texture_gather) {
    378             key->gen6_gather_wa[s] = gen6_gather_workaround(img->InternalFormat);
    379          }
    380 
    381          /* If this is a multisample sampler, and uses the CMS MSAA layout,
    382           * then we need to emit slightly different code to first sample the
    383           * MCS surface.
    384           */
    385          struct intel_texture_object *intel_tex =
    386             intel_texture_object((struct gl_texture_object *)t);
    387 
    388          /* From gen9 onwards some single sampled buffers can also be
    389           * compressed. These don't need ld2dms sampling along with mcs fetch.
    390           */
    391          if (brw->gen >= 7 &&
    392              intel_tex->mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS &&
    393              intel_tex->mt->num_samples > 1) {
    394             key->compressed_multisample_layout_mask |= 1 << s;
    395 
    396             if (intel_tex->mt->num_samples >= 16) {
    397                assert(brw->gen >= 9);
    398                key->msaa_16 |= 1 << s;
    399             }
    400          }
    401 
    402          if (t->Target == GL_TEXTURE_EXTERNAL_OES && intel_tex->planar_format) {
    403             switch (intel_tex->planar_format->components) {
    404             case __DRI_IMAGE_COMPONENTS_Y_UV:
    405                key->y_uv_image_mask |= 1 << s;
    406                break;
    407             case __DRI_IMAGE_COMPONENTS_Y_U_V:
    408                key->y_u_v_image_mask |= 1 << s;
    409                break;
    410             case __DRI_IMAGE_COMPONENTS_Y_XUXV:
    411                key->yx_xuxv_image_mask |= 1 << s;
    412                break;
    413             default:
    414                break;
    415             }
    416          }
    417 
    418       }
    419    }
    420 }
    421 
    422 static bool
    423 brw_wm_state_dirty(const struct brw_context *brw)
    424 {
    425    return brw_state_dirty(brw,
    426                           _NEW_BUFFERS |
    427                           _NEW_COLOR |
    428                           _NEW_DEPTH |
    429                           _NEW_FRAG_CLAMP |
    430                           _NEW_HINT |
    431                           _NEW_LIGHT |
    432                           _NEW_LINE |
    433                           _NEW_MULTISAMPLE |
    434                           _NEW_POLYGON |
    435                           _NEW_STENCIL |
    436                           _NEW_TEXTURE,
    437                           BRW_NEW_FRAGMENT_PROGRAM |
    438                           BRW_NEW_REDUCED_PRIMITIVE |
    439                           BRW_NEW_STATS_WM |
    440                           BRW_NEW_VUE_MAP_GEOM_OUT);
    441 }
    442 
    443 void
    444 brw_wm_populate_key(struct brw_context *brw, struct brw_wm_prog_key *key)
    445 {
    446    struct gl_context *ctx = &brw->ctx;
    447    /* BRW_NEW_FRAGMENT_PROGRAM */
    448    const struct brw_program *fp = brw_program_const(brw->fragment_program);
    449    const struct gl_program *prog = (struct gl_program *) brw->fragment_program;
    450    GLuint lookup = 0;
    451    GLuint line_aa;
    452 
    453    memset(key, 0, sizeof(*key));
    454 
    455    /* Build the index for table lookup
    456     */
    457    if (brw->gen < 6) {
    458       /* _NEW_COLOR */
    459       if (prog->info.fs.uses_discard || ctx->Color.AlphaEnabled) {
    460          lookup |= IZ_PS_KILL_ALPHATEST_BIT;
    461       }
    462 
    463       if (prog->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
    464          lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
    465       }
    466 
    467       /* _NEW_DEPTH */
    468       if (ctx->Depth.Test)
    469          lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
    470 
    471       if (brw_depth_writes_enabled(brw))
    472          lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
    473 
    474       /* _NEW_STENCIL | _NEW_BUFFERS */
    475       if (ctx->Stencil._Enabled) {
    476          lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
    477 
    478          if (ctx->Stencil.WriteMask[0] ||
    479              ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
    480             lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
    481       }
    482       key->iz_lookup = lookup;
    483    }
    484 
    485    line_aa = AA_NEVER;
    486 
    487    /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
    488    if (ctx->Line.SmoothFlag) {
    489       if (brw->reduced_primitive == GL_LINES) {
    490          line_aa = AA_ALWAYS;
    491       }
    492       else if (brw->reduced_primitive == GL_TRIANGLES) {
    493          if (ctx->Polygon.FrontMode == GL_LINE) {
    494             line_aa = AA_SOMETIMES;
    495 
    496             if (ctx->Polygon.BackMode == GL_LINE ||
    497                 (ctx->Polygon.CullFlag &&
    498                  ctx->Polygon.CullFaceMode == GL_BACK))
    499                line_aa = AA_ALWAYS;
    500          }
    501          else if (ctx->Polygon.BackMode == GL_LINE) {
    502             line_aa = AA_SOMETIMES;
    503 
    504             if ((ctx->Polygon.CullFlag &&
    505                  ctx->Polygon.CullFaceMode == GL_FRONT))
    506                line_aa = AA_ALWAYS;
    507          }
    508       }
    509    }
    510 
    511    key->line_aa = line_aa;
    512 
    513    /* _NEW_HINT */
    514    key->high_quality_derivatives =
    515       ctx->Hint.FragmentShaderDerivative == GL_NICEST;
    516 
    517    if (brw->gen < 6)
    518       key->stats_wm = brw->stats_wm;
    519 
    520    /* _NEW_LIGHT */
    521    key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
    522 
    523    /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
    524    key->clamp_fragment_color = ctx->Color._ClampFragmentColor;
    525 
    526    /* _NEW_TEXTURE */
    527    brw_populate_sampler_prog_key_data(ctx, prog, &key->tex);
    528 
    529    /* _NEW_BUFFERS */
    530    key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers;
    531 
    532    /* _NEW_COLOR */
    533    key->force_dual_color_blend = brw->dual_color_blend_by_location &&
    534       (ctx->Color.BlendEnabled & 1) && ctx->Color.Blend[0]._UsesDualSrc;
    535 
    536    /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */
    537    key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 &&
    538       (_mesa_is_alpha_test_enabled(ctx) ||
    539        _mesa_is_alpha_to_coverage_enabled(ctx));
    540 
    541    /* _NEW_BUFFERS _NEW_MULTISAMPLE */
    542    /* Ignore sample qualifier while computing this flag. */
    543    if (ctx->Multisample.Enabled) {
    544       key->persample_interp =
    545          ctx->Multisample.SampleShading &&
    546          (ctx->Multisample.MinSampleShadingValue *
    547           _mesa_geometric_samples(ctx->DrawBuffer) > 1);
    548 
    549       key->multisample_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
    550    }
    551 
    552    /* BRW_NEW_VUE_MAP_GEOM_OUT */
    553    if (brw->gen < 6 || _mesa_bitcount_64(prog->info.inputs_read &
    554                                          BRW_FS_VARYING_INPUT_MASK) > 16) {
    555       key->input_slots_valid = brw->vue_map_geom_out.slots_valid;
    556    }
    557 
    558    /* _NEW_COLOR | _NEW_BUFFERS */
    559    /* Pre-gen6, the hardware alpha test always used each render
    560     * target's alpha to do alpha test, as opposed to render target 0's alpha
    561     * like GL requires.  Fix that by building the alpha test into the
    562     * shader, and we'll skip enabling the fixed function alpha test.
    563     */
    564    if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 &&
    565        ctx->Color.AlphaEnabled) {
    566       key->alpha_test_func = ctx->Color.AlphaFunc;
    567       key->alpha_test_ref = ctx->Color.AlphaRef;
    568    }
    569 
    570    /* The unique fragment program ID */
    571    key->program_string_id = fp->id;
    572 
    573    /* Whether reads from the framebuffer should behave coherently. */
    574    key->coherent_fb_fetch = ctx->Extensions.MESA_shader_framebuffer_fetch;
    575 }
    576 
    577 void
    578 brw_upload_wm_prog(struct brw_context *brw)
    579 {
    580    struct brw_wm_prog_key key;
    581    struct brw_program *fp = (struct brw_program *) brw->fragment_program;
    582 
    583    if (!brw_wm_state_dirty(brw))
    584       return;
    585 
    586    brw_wm_populate_key(brw, &key);
    587 
    588    if (!brw_search_cache(&brw->cache, BRW_CACHE_FS_PROG,
    589                          &key, sizeof(key),
    590                          &brw->wm.base.prog_offset,
    591                          &brw->wm.base.prog_data)) {
    592       bool success = brw_codegen_wm_prog(brw, fp, &key,
    593                                          &brw->vue_map_geom_out);
    594       (void) success;
    595       assert(success);
    596    }
    597 }
    598 
    599 bool
    600 brw_fs_precompile(struct gl_context *ctx, struct gl_program *prog)
    601 {
    602    struct brw_context *brw = brw_context(ctx);
    603    struct brw_wm_prog_key key;
    604 
    605    struct brw_program *bfp = brw_program(prog);
    606 
    607    memset(&key, 0, sizeof(key));
    608 
    609    uint64_t outputs_written = prog->info.outputs_written;
    610 
    611    if (brw->gen < 6) {
    612       if (prog->info.fs.uses_discard)
    613          key.iz_lookup |= IZ_PS_KILL_ALPHATEST_BIT;
    614 
    615       if (outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
    616          key.iz_lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
    617 
    618       /* Just assume depth testing. */
    619       key.iz_lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
    620       key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
    621    }
    622 
    623    if (brw->gen < 6 || _mesa_bitcount_64(prog->info.inputs_read &
    624                                          BRW_FS_VARYING_INPUT_MASK) > 16) {
    625       key.input_slots_valid = prog->info.inputs_read | VARYING_BIT_POS;
    626    }
    627 
    628    brw_setup_tex_for_precompile(brw, &key.tex, prog);
    629 
    630    key.nr_color_regions = _mesa_bitcount_64(outputs_written &
    631          ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
    632            BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
    633            BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)));
    634 
    635    key.program_string_id = bfp->id;
    636 
    637    /* Whether reads from the framebuffer should behave coherently. */
    638    key.coherent_fb_fetch = ctx->Extensions.MESA_shader_framebuffer_fetch;
    639 
    640    uint32_t old_prog_offset = brw->wm.base.prog_offset;
    641    struct brw_stage_prog_data *old_prog_data = brw->wm.base.prog_data;
    642 
    643    struct brw_vue_map vue_map;
    644    if (brw->gen < 6) {
    645       brw_compute_vue_map(&brw->screen->devinfo, &vue_map,
    646                           prog->info.inputs_read | VARYING_BIT_POS,
    647                           false);
    648    }
    649 
    650    bool success = brw_codegen_wm_prog(brw, bfp, &key, &vue_map);
    651 
    652    brw->wm.base.prog_offset = old_prog_offset;
    653    brw->wm.base.prog_data = old_prog_data;
    654 
    655    return success;
    656 }
    657