Home | History | Annotate | Download | only in i965
      1 /*
      2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
      3  Intel funded Tungsten Graphics to
      4  develop this 3D driver.
      5 
      6  Permission is hereby granted, free of charge, to any person obtaining
      7  a copy of this software and associated documentation files (the
      8  "Software"), to deal in the Software without restriction, including
      9  without limitation the rights to use, copy, modify, merge, publish,
     10  distribute, sublicense, and/or sell copies of the Software, and to
     11  permit persons to whom the Software is furnished to do so, subject to
     12  the following conditions:
     13 
     14  The above copyright notice and this permission notice (including the
     15  next paragraph) shall be included in all copies or substantial
     16  portions of the Software.
     17 
     18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
     22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
     23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
     24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25 
     26  **********************************************************************/
     27  /*
     28   * Authors:
     29   *   Keith Whitwell <keithw (at) vmware.com>
     30   */
     31 
     32 
     33 #include "main/compiler.h"
     34 #include "main/context.h"
     35 #include "brw_context.h"
     36 #include "brw_vs.h"
     37 #include "brw_util.h"
     38 #include "brw_state.h"
     39 #include "program/prog_print.h"
     40 #include "program/prog_parameter.h"
     41 #include "brw_nir.h"
     42 #include "brw_program.h"
     43 
     44 #include "util/ralloc.h"
     45 
     46 GLbitfield64
     47 brw_vs_outputs_written(struct brw_context *brw, struct brw_vs_prog_key *key,
     48                        GLbitfield64 user_varyings)
     49 {
     50    GLbitfield64 outputs_written = user_varyings;
     51 
     52    if (key->copy_edgeflag) {
     53       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE);
     54    }
     55 
     56    if (brw->gen < 6) {
     57       /* Put dummy slots into the VUE for the SF to put the replaced
     58        * point sprite coords in.  We shouldn't need these dummy slots,
     59        * which take up precious URB space, but it would mean that the SF
     60        * doesn't get nice aligned pairs of input coords into output
     61        * coords, which would be a pain to handle.
     62        */
     63       for (unsigned i = 0; i < 8; i++) {
     64          if (key->point_coord_replace & (1 << i))
     65             outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
     66       }
     67 
     68       /* if back colors are written, allocate slots for front colors too */
     69       if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0))
     70          outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);
     71       if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1))
     72          outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);
     73    }
     74 
     75    /* In order for legacy clipping to work, we need to populate the clip
     76     * distance varying slots whenever clipping is enabled, even if the vertex
     77     * shader doesn't write to gl_ClipDistance.
     78     */
     79    if (key->nr_userclip_plane_consts > 0) {
     80       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
     81       outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
     82    }
     83 
     84    return outputs_written;
     85 }
     86 
     87 static void
     88 brw_vs_debug_recompile(struct brw_context *brw, struct gl_program *prog,
     89                        const struct brw_vs_prog_key *key)
     90 {
     91    perf_debug("Recompiling vertex shader for program %d\n", prog->Id);
     92 
     93    bool found = false;
     94    const struct brw_vs_prog_key *old_key =
     95       brw_find_previous_compile(&brw->cache, BRW_CACHE_VS_PROG,
     96                                 key->program_string_id);
     97 
     98    if (!old_key) {
     99       perf_debug("  Didn't find previous compile in the shader cache for "
    100                  "debug\n");
    101       return;
    102    }
    103 
    104    for (unsigned int i = 0; i < VERT_ATTRIB_MAX; i++) {
    105       found |= key_debug(brw, "Vertex attrib w/a flags",
    106                          old_key->gl_attrib_wa_flags[i],
    107                          key->gl_attrib_wa_flags[i]);
    108    }
    109 
    110    found |= key_debug(brw, "legacy user clipping",
    111                       old_key->nr_userclip_plane_consts,
    112                       key->nr_userclip_plane_consts);
    113 
    114    found |= key_debug(brw, "copy edgeflag",
    115                       old_key->copy_edgeflag, key->copy_edgeflag);
    116    found |= key_debug(brw, "PointCoord replace",
    117                       old_key->point_coord_replace, key->point_coord_replace);
    118    found |= key_debug(brw, "vertex color clamping",
    119                       old_key->clamp_vertex_color, key->clamp_vertex_color);
    120 
    121    found |= brw_debug_recompile_sampler_key(brw, &old_key->tex, &key->tex);
    122 
    123    if (!found) {
    124       perf_debug("  Something else\n");
    125    }
    126 }
    127 
    128 static bool
    129 brw_codegen_vs_prog(struct brw_context *brw,
    130                     struct brw_program *vp,
    131                     struct brw_vs_prog_key *key)
    132 {
    133    const struct brw_compiler *compiler = brw->screen->compiler;
    134    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    135    GLuint program_size;
    136    const GLuint *program;
    137    struct brw_vs_prog_data prog_data;
    138    struct brw_stage_prog_data *stage_prog_data = &prog_data.base.base;
    139    void *mem_ctx;
    140    bool start_busy = false;
    141    double start_time = 0;
    142 
    143    memset(&prog_data, 0, sizeof(prog_data));
    144 
    145    /* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
    146    if (vp->program.is_arb_asm)
    147       stage_prog_data->use_alt_mode = true;
    148 
    149    mem_ctx = ralloc_context(NULL);
    150 
    151    brw_assign_common_binding_table_offsets(devinfo, &vp->program,
    152                                            &prog_data.base.base, 0);
    153 
    154    /* Allocate the references to the uniforms that will end up in the
    155     * prog_data associated with the compiled program, and which will be freed
    156     * by the state cache.
    157     */
    158    int param_count = vp->program.nir->num_uniforms / 4;
    159 
    160    prog_data.base.base.nr_image_params = vp->program.info.num_images;
    161 
    162    /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
    163     * planes as uniforms.
    164     */
    165    param_count += key->nr_userclip_plane_consts * 4;
    166 
    167    stage_prog_data->param =
    168       rzalloc_array(NULL, const gl_constant_value *, param_count);
    169    stage_prog_data->pull_param =
    170       rzalloc_array(NULL, const gl_constant_value *, param_count);
    171    stage_prog_data->image_param =
    172       rzalloc_array(NULL, struct brw_image_param,
    173                     stage_prog_data->nr_image_params);
    174    stage_prog_data->nr_params = param_count;
    175 
    176    if (!vp->program.is_arb_asm) {
    177       brw_nir_setup_glsl_uniforms(vp->program.nir, &vp->program,
    178                                   &prog_data.base.base,
    179                                   compiler->scalar_stage[MESA_SHADER_VERTEX]);
    180    } else {
    181       brw_nir_setup_arb_uniforms(vp->program.nir, &vp->program,
    182                                  &prog_data.base.base);
    183    }
    184 
    185    uint64_t outputs_written =
    186       brw_vs_outputs_written(brw, key, vp->program.info.outputs_written);
    187    prog_data.inputs_read = vp->program.info.inputs_read;
    188    prog_data.double_inputs_read = vp->program.info.double_inputs_read;
    189 
    190    if (key->copy_edgeflag) {
    191       prog_data.inputs_read |= VERT_BIT_EDGEFLAG;
    192    }
    193 
    194    brw_compute_vue_map(devinfo,
    195                        &prog_data.base.vue_map, outputs_written,
    196                        vp->program.nir->info->separate_shader);
    197 
    198    if (0) {
    199       _mesa_fprint_program_opt(stderr, &vp->program, PROG_PRINT_DEBUG, true);
    200    }
    201 
    202    if (unlikely(brw->perf_debug)) {
    203       start_busy = (brw->batch.last_bo &&
    204                     drm_intel_bo_busy(brw->batch.last_bo));
    205       start_time = get_time();
    206    }
    207 
    208    if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
    209       if (vp->program.is_arb_asm)
    210          brw_dump_arb_asm("vertex", &vp->program);
    211    }
    212 
    213    int st_index = -1;
    214    if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
    215       st_index = brw_get_shader_time_index(brw, &vp->program, ST_VS,
    216                                            !vp->program.is_arb_asm);
    217    }
    218 
    219    /* Emit GEN4 code.
    220     */
    221    char *error_str;
    222    program = brw_compile_vs(compiler, brw, mem_ctx, key, &prog_data,
    223                             vp->program.nir,
    224                             brw_select_clip_planes(&brw->ctx),
    225                             !_mesa_is_gles3(&brw->ctx),
    226                             st_index, &program_size, &error_str);
    227    if (program == NULL) {
    228       if (!vp->program.is_arb_asm) {
    229          vp->program.sh.data->LinkStatus = false;
    230          ralloc_strcat(&vp->program.sh.data->InfoLog, error_str);
    231       }
    232 
    233       _mesa_problem(NULL, "Failed to compile vertex shader: %s\n", error_str);
    234 
    235       ralloc_free(mem_ctx);
    236       return false;
    237    }
    238 
    239    if (unlikely(brw->perf_debug)) {
    240       if (vp->compiled_once) {
    241          brw_vs_debug_recompile(brw, &vp->program, key);
    242       }
    243       if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
    244          perf_debug("VS compile took %.03f ms and stalled the GPU\n",
    245                     (get_time() - start_time) * 1000);
    246       }
    247       vp->compiled_once = true;
    248    }
    249 
    250    /* Scratch space is used for register spilling */
    251    brw_alloc_stage_scratch(brw, &brw->vs.base,
    252                            prog_data.base.base.total_scratch,
    253                            devinfo->max_vs_threads);
    254 
    255    brw_upload_cache(&brw->cache, BRW_CACHE_VS_PROG,
    256 		    key, sizeof(struct brw_vs_prog_key),
    257 		    program, program_size,
    258 		    &prog_data, sizeof(prog_data),
    259 		    &brw->vs.base.prog_offset, &brw->vs.base.prog_data);
    260    ralloc_free(mem_ctx);
    261 
    262    return true;
    263 }
    264 
    265 static bool
    266 brw_vs_state_dirty(const struct brw_context *brw)
    267 {
    268    return brw_state_dirty(brw,
    269                           _NEW_BUFFERS |
    270                           _NEW_LIGHT |
    271                           _NEW_POINT |
    272                           _NEW_POLYGON |
    273                           _NEW_TEXTURE |
    274                           _NEW_TRANSFORM,
    275                           BRW_NEW_VERTEX_PROGRAM |
    276                           BRW_NEW_VS_ATTRIB_WORKAROUNDS);
    277 }
    278 
    279 void
    280 brw_vs_populate_key(struct brw_context *brw,
    281                     struct brw_vs_prog_key *key)
    282 {
    283    struct gl_context *ctx = &brw->ctx;
    284    /* BRW_NEW_VERTEX_PROGRAM */
    285    struct brw_program *vp = (struct brw_program *)brw->vertex_program;
    286    struct gl_program *prog = (struct gl_program *) brw->vertex_program;
    287 
    288    memset(key, 0, sizeof(*key));
    289 
    290    /* Just upload the program verbatim for now.  Always send it all
    291     * the inputs it asks for, whether they are varying or not.
    292     */
    293    key->program_string_id = vp->id;
    294 
    295    if (ctx->Transform.ClipPlanesEnabled != 0 &&
    296        (ctx->API == API_OPENGL_COMPAT || ctx->API == API_OPENGLES) &&
    297        vp->program.ClipDistanceArraySize == 0) {
    298       key->nr_userclip_plane_consts =
    299          _mesa_logbase2(ctx->Transform.ClipPlanesEnabled) + 1;
    300    }
    301 
    302    if (brw->gen < 6) {
    303       /* _NEW_POLYGON */
    304       key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
    305                             ctx->Polygon.BackMode != GL_FILL);
    306 
    307       /* _NEW_POINT */
    308       if (ctx->Point.PointSprite) {
    309          key->point_coord_replace = ctx->Point.CoordReplace & 0xff;
    310       }
    311    }
    312 
    313    if (prog->nir->info->outputs_written &
    314        (VARYING_BIT_COL0 | VARYING_BIT_COL1 | VARYING_BIT_BFC0 |
    315         VARYING_BIT_BFC1)) {
    316       /* _NEW_LIGHT | _NEW_BUFFERS */
    317       key->clamp_vertex_color = ctx->Light._ClampVertexColor;
    318    }
    319 
    320    /* _NEW_TEXTURE */
    321    brw_populate_sampler_prog_key_data(ctx, prog, &key->tex);
    322 
    323    /* BRW_NEW_VS_ATTRIB_WORKAROUNDS */
    324    if (brw->gen < 8 && !brw->is_haswell) {
    325       memcpy(key->gl_attrib_wa_flags, brw->vb.attrib_wa_flags,
    326              sizeof(brw->vb.attrib_wa_flags));
    327    }
    328 }
    329 
    330 void
    331 brw_upload_vs_prog(struct brw_context *brw)
    332 {
    333    struct brw_vs_prog_key key;
    334    /* BRW_NEW_VERTEX_PROGRAM */
    335    struct brw_program *vp = (struct brw_program *)brw->vertex_program;
    336 
    337    if (!brw_vs_state_dirty(brw))
    338       return;
    339 
    340    brw_vs_populate_key(brw, &key);
    341 
    342    if (!brw_search_cache(&brw->cache, BRW_CACHE_VS_PROG,
    343 			 &key, sizeof(key),
    344 			 &brw->vs.base.prog_offset, &brw->vs.base.prog_data)) {
    345       bool success = brw_codegen_vs_prog(brw, vp, &key);
    346       (void) success;
    347       assert(success);
    348    }
    349 }
    350 
    351 bool
    352 brw_vs_precompile(struct gl_context *ctx, struct gl_program *prog)
    353 {
    354    struct brw_context *brw = brw_context(ctx);
    355    struct brw_vs_prog_key key;
    356    uint32_t old_prog_offset = brw->vs.base.prog_offset;
    357    struct brw_stage_prog_data *old_prog_data = brw->vs.base.prog_data;
    358    bool success;
    359 
    360    struct brw_program *bvp = brw_program(prog);
    361 
    362    memset(&key, 0, sizeof(key));
    363 
    364    brw_setup_tex_for_precompile(brw, &key.tex, prog);
    365    key.program_string_id = bvp->id;
    366    key.clamp_vertex_color =
    367       (prog->nir->info->outputs_written &
    368        (VARYING_BIT_COL0 | VARYING_BIT_COL1 | VARYING_BIT_BFC0 |
    369         VARYING_BIT_BFC1));
    370 
    371    success = brw_codegen_vs_prog(brw, bvp, &key);
    372 
    373    brw->vs.base.prog_offset = old_prog_offset;
    374    brw->vs.base.prog_data = old_prog_data;
    375 
    376    return success;
    377 }
    378