Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2011 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include "main/macros.h"
     25 #include "intel_batchbuffer.h"
     26 #include "brw_context.h"
     27 #include "brw_state.h"
     28 #include "brw_defines.h"
     29 
     30 #include "common/gen_l3_config.h"
     31 
     32 /**
     33  * The following diagram shows how we partition the URB:
     34  *
     35  *        16kB or 32kB               Rest of the URB space
     36  *   __________-__________   _________________-_________________
     37  *  /                     \ /                                   \
     38  * +-------------------------------------------------------------+
     39  * |  VS/HS/DS/GS/FS Push  |           VS/HS/DS/GS URB           |
     40  * |       Constants       |               Entries               |
     41  * +-------------------------------------------------------------+
     42  *
     43  * Notably, push constants must be stored at the beginning of the URB
     44  * space, while entries can be stored anywhere.  Ivybridge and Haswell
     45  * GT1/GT2 have a maximum constant buffer size of 16kB, while Haswell GT3
     46  * doubles this (32kB).
     47  *
     48  * Ivybridge and Haswell GT1/GT2 allow push constants to be located (and
     49  * sized) in increments of 1kB.  Haswell GT3 requires them to be located and
     50  * sized in increments of 2kB.
     51  *
     52  * Currently we split the constant buffer space evenly among whatever stages
     53  * are active.  This is probably not ideal, but simple.
     54  *
     55  * Ivybridge GT1 and Haswell GT1 have 128kB of URB space.
     56  * Ivybridge GT2 and Haswell GT2 have 256kB of URB space.
     57  * Haswell GT3 has 512kB of URB space.
     58  *
     59  * See "Volume 2a: 3D Pipeline," section 1.8, "Volume 1b: Configurations",
     60  * and the documentation for 3DSTATE_PUSH_CONSTANT_ALLOC_xS.
     61  */
     62 static void
     63 gen7_allocate_push_constants(struct brw_context *brw)
     64 {
     65    /* BRW_NEW_GEOMETRY_PROGRAM */
     66    bool gs_present = brw->geometry_program;
     67 
     68    /* BRW_NEW_TESS_PROGRAMS */
     69    bool tess_present = brw->tess_eval_program;
     70 
     71    unsigned avail_size = 16;
     72    unsigned multiplier =
     73       (brw->gen >= 8 || (brw->is_haswell && brw->gt == 3)) ? 2 : 1;
     74 
     75    int stages = 2 + gs_present + 2 * tess_present;
     76 
     77    /* Divide up the available space equally between stages.  Because we
     78     * round down (using floor division), there may be some left over
     79     * space.  We allocate that to the pixel shader stage.
     80     */
     81    unsigned size_per_stage = avail_size / stages;
     82 
     83    unsigned vs_size = size_per_stage;
     84    unsigned hs_size = tess_present ? size_per_stage : 0;
     85    unsigned ds_size = tess_present ? size_per_stage : 0;
     86    unsigned gs_size = gs_present ? size_per_stage : 0;
     87    unsigned fs_size = avail_size - size_per_stage * (stages - 1);
     88 
     89    gen7_emit_push_constant_state(brw, multiplier * vs_size,
     90                                  multiplier * hs_size, multiplier * ds_size,
     91                                  multiplier * gs_size, multiplier * fs_size);
     92 
     93    /* From p115 of the Ivy Bridge PRM (3.2.1.4 3DSTATE_PUSH_CONSTANT_ALLOC_VS):
     94     *
     95     *     Programming Restriction:
     96     *
     97     *     The 3DSTATE_CONSTANT_VS must be reprogrammed prior to the next
     98     *     3DPRIMITIVE command after programming the
     99     *     3DSTATE_PUSH_CONSTANT_ALLOC_VS.
    100     *
    101     * Similar text exists for the other 3DSTATE_PUSH_CONSTANT_ALLOC_*
    102     * commands.
    103     */
    104    brw->ctx.NewDriverState |= BRW_NEW_PUSH_CONSTANT_ALLOCATION;
    105 }
    106 
    107 void
    108 gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
    109                               unsigned hs_size, unsigned ds_size,
    110                               unsigned gs_size, unsigned fs_size)
    111 {
    112    unsigned offset = 0;
    113 
    114    BEGIN_BATCH(10);
    115    OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2));
    116    OUT_BATCH(vs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
    117    offset += vs_size;
    118 
    119    OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_HS << 16 | (2 - 2));
    120    OUT_BATCH(hs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
    121    offset += hs_size;
    122 
    123    OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_DS << 16 | (2 - 2));
    124    OUT_BATCH(ds_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
    125    offset += ds_size;
    126 
    127    OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_GS << 16 | (2 - 2));
    128    OUT_BATCH(gs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
    129    offset += gs_size;
    130 
    131    OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_PS << 16 | (2 - 2));
    132    OUT_BATCH(fs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
    133    ADVANCE_BATCH();
    134 
    135    /* From p292 of the Ivy Bridge PRM (11.2.4 3DSTATE_PUSH_CONSTANT_ALLOC_PS):
    136     *
    137     *     A PIPE_CONTROL command with the CS Stall bit set must be programmed
    138     *     in the ring after this instruction.
    139     *
    140     * No such restriction exists for Haswell or Baytrail.
    141     */
    142    if (brw->gen < 8 && !brw->is_haswell && !brw->is_baytrail)
    143       gen7_emit_cs_stall_flush(brw);
    144 }
    145 
    146 const struct brw_tracked_state gen7_push_constant_space = {
    147    .dirty = {
    148       .mesa = 0,
    149       .brw = BRW_NEW_CONTEXT |
    150              BRW_NEW_GEOMETRY_PROGRAM |
    151              BRW_NEW_TESS_PROGRAMS,
    152    },
    153    .emit = gen7_allocate_push_constants,
    154 };
    155 
    156 static void
    157 upload_urb(struct brw_context *brw)
    158 {
    159    /* BRW_NEW_VS_PROG_DATA */
    160    const struct brw_vue_prog_data *vs_vue_prog_data =
    161       brw_vue_prog_data(brw->vs.base.prog_data);
    162    const unsigned vs_size = MAX2(vs_vue_prog_data->urb_entry_size, 1);
    163    /* BRW_NEW_GS_PROG_DATA */
    164    const bool gs_present = brw->gs.base.prog_data;
    165    /* BRW_NEW_TES_PROG_DATA */
    166    const bool tess_present = brw->tes.base.prog_data;
    167 
    168    gen7_upload_urb(brw, vs_size, gs_present, tess_present);
    169 }
    170 
    171 void
    172 gen7_upload_urb(struct brw_context *brw, unsigned vs_size,
    173                 bool gs_present, bool tess_present)
    174 {
    175    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    176    const int push_size_kB =
    177       (brw->gen >= 8 || (brw->is_haswell && brw->gt == 3)) ? 32 : 16;
    178 
    179    /* BRW_NEW_{VS,TCS,TES,GS}_PROG_DATA */
    180    struct brw_vue_prog_data *prog_data[4] = {
    181       [MESA_SHADER_VERTEX] =
    182          brw_vue_prog_data(brw->vs.base.prog_data),
    183       [MESA_SHADER_TESS_CTRL] =
    184          tess_present ? brw_vue_prog_data(brw->tcs.base.prog_data) : NULL,
    185       [MESA_SHADER_TESS_EVAL] =
    186          tess_present ? brw_vue_prog_data(brw->tes.base.prog_data) : NULL,
    187       [MESA_SHADER_GEOMETRY] =
    188          gs_present ? brw_vue_prog_data(brw->gs.base.prog_data) : NULL,
    189    };
    190 
    191    unsigned entry_size[4];
    192    entry_size[MESA_SHADER_VERTEX] = vs_size;
    193    for (int i = MESA_SHADER_TESS_CTRL; i <= MESA_SHADER_GEOMETRY; i++) {
    194       entry_size[i] = prog_data[i] ? prog_data[i]->urb_entry_size : 1;
    195    }
    196 
    197    /* If we're just switching between programs with the same URB requirements,
    198     * skip the rest of the logic.
    199     */
    200    if (!(brw->ctx.NewDriverState & BRW_NEW_CONTEXT) &&
    201        !(brw->ctx.NewDriverState & BRW_NEW_URB_SIZE) &&
    202        brw->urb.vsize == entry_size[MESA_SHADER_VERTEX] &&
    203        brw->urb.gs_present == gs_present &&
    204        brw->urb.gsize == entry_size[MESA_SHADER_GEOMETRY] &&
    205        brw->urb.tess_present == tess_present &&
    206        brw->urb.hsize == entry_size[MESA_SHADER_TESS_CTRL] &&
    207        brw->urb.dsize == entry_size[MESA_SHADER_TESS_EVAL]) {
    208       return;
    209    }
    210    brw->urb.vsize = entry_size[MESA_SHADER_VERTEX];
    211    brw->urb.gs_present = gs_present;
    212    brw->urb.gsize = entry_size[MESA_SHADER_GEOMETRY];
    213    brw->urb.tess_present = tess_present;
    214    brw->urb.hsize = entry_size[MESA_SHADER_TESS_CTRL];
    215    brw->urb.dsize = entry_size[MESA_SHADER_TESS_EVAL];
    216 
    217    unsigned entries[4];
    218    unsigned start[4];
    219    gen_get_urb_config(devinfo, 1024 * push_size_kB, 1024 * brw->urb.size,
    220                       tess_present, gs_present, entry_size, entries, start);
    221 
    222    if (brw->gen == 7 && !brw->is_haswell && !brw->is_baytrail)
    223       gen7_emit_vs_workaround_flush(brw);
    224 
    225    BEGIN_BATCH(8);
    226    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
    227       OUT_BATCH((_3DSTATE_URB_VS + i) << 16 | (2 - 2));
    228       OUT_BATCH(entries[i] |
    229                 ((entry_size[i] - 1) << GEN7_URB_ENTRY_SIZE_SHIFT) |
    230                 (start[i] << GEN7_URB_STARTING_ADDRESS_SHIFT));
    231    }
    232    ADVANCE_BATCH();
    233 }
    234 
    235 const struct brw_tracked_state gen7_urb = {
    236    .dirty = {
    237       .mesa = 0,
    238       .brw = BRW_NEW_CONTEXT |
    239              BRW_NEW_URB_SIZE |
    240              BRW_NEW_GS_PROG_DATA |
    241              BRW_NEW_TCS_PROG_DATA |
    242              BRW_NEW_TES_PROG_DATA |
    243              BRW_NEW_VS_PROG_DATA,
    244    },
    245    .emit = upload_urb,
    246 };
    247