Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2011 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include "main/macros.h"
     25 #include "intel_batchbuffer.h"
     26 #include "brw_context.h"
     27 #include "brw_state.h"
     28 #include "brw_defines.h"
     29 
     30 #include "common/gen_l3_config.h"
     31 
     32 /**
     33  * The following diagram shows how we partition the URB:
     34  *
     35  *        16kB or 32kB               Rest of the URB space
     36  *   __________-__________   _________________-_________________
     37  *  /                     \ /                                   \
     38  * +-------------------------------------------------------------+
     39  * |  VS/HS/DS/GS/FS Push  |           VS/HS/DS/GS URB           |
     40  * |       Constants       |               Entries               |
     41  * +-------------------------------------------------------------+
     42  *
     43  * Notably, push constants must be stored at the beginning of the URB
     44  * space, while entries can be stored anywhere.  Ivybridge and Haswell
     45  * GT1/GT2 have a maximum constant buffer size of 16kB, while Haswell GT3
     46  * doubles this (32kB).
     47  *
     48  * Ivybridge and Haswell GT1/GT2 allow push constants to be located (and
     49  * sized) in increments of 1kB.  Haswell GT3 requires them to be located and
     50  * sized in increments of 2kB.
     51  *
     52  * Currently we split the constant buffer space evenly among whatever stages
     53  * are active.  This is probably not ideal, but simple.
     54  *
     55  * Ivybridge GT1 and Haswell GT1 have 128kB of URB space.
     56  * Ivybridge GT2 and Haswell GT2 have 256kB of URB space.
     57  * Haswell GT3 has 512kB of URB space.
     58  *
     59  * See "Volume 2a: 3D Pipeline," section 1.8, "Volume 1b: Configurations",
     60  * and the documentation for 3DSTATE_PUSH_CONSTANT_ALLOC_xS.
     61  */
     62 static void
     63 gen7_allocate_push_constants(struct brw_context *brw)
     64 {
     65    const struct gen_device_info *devinfo = &brw->screen->devinfo;
     66 
     67    /* BRW_NEW_GEOMETRY_PROGRAM */
     68    bool gs_present = brw->programs[MESA_SHADER_GEOMETRY];
     69 
     70    /* BRW_NEW_TESS_PROGRAMS */
     71    bool tess_present = brw->programs[MESA_SHADER_TESS_EVAL];
     72 
     73    unsigned avail_size = 16;
     74    unsigned multiplier =
     75       (devinfo->gen >= 8 || (devinfo->is_haswell && devinfo->gt == 3)) ? 2 : 1;
     76 
     77    int stages = 2 + gs_present + 2 * tess_present;
     78 
     79    /* Divide up the available space equally between stages.  Because we
     80     * round down (using floor division), there may be some left over
     81     * space.  We allocate that to the pixel shader stage.
     82     */
     83    unsigned size_per_stage = avail_size / stages;
     84 
     85    unsigned vs_size = size_per_stage;
     86    unsigned hs_size = tess_present ? size_per_stage : 0;
     87    unsigned ds_size = tess_present ? size_per_stage : 0;
     88    unsigned gs_size = gs_present ? size_per_stage : 0;
     89    unsigned fs_size = avail_size - size_per_stage * (stages - 1);
     90 
     91    gen7_emit_push_constant_state(brw, multiplier * vs_size,
     92                                  multiplier * hs_size, multiplier * ds_size,
     93                                  multiplier * gs_size, multiplier * fs_size);
     94 
     95    /* From p115 of the Ivy Bridge PRM (3.2.1.4 3DSTATE_PUSH_CONSTANT_ALLOC_VS):
     96     *
     97     *     Programming Restriction:
     98     *
     99     *     The 3DSTATE_CONSTANT_VS must be reprogrammed prior to the next
    100     *     3DPRIMITIVE command after programming the
    101     *     3DSTATE_PUSH_CONSTANT_ALLOC_VS.
    102     *
    103     * Similar text exists for the other 3DSTATE_PUSH_CONSTANT_ALLOC_*
    104     * commands.
    105     */
    106    brw->vs.base.push_constants_dirty = true;
    107    brw->tcs.base.push_constants_dirty = true;
    108    brw->tes.base.push_constants_dirty = true;
    109    brw->gs.base.push_constants_dirty = true;
    110    brw->wm.base.push_constants_dirty = true;
    111 }
    112 
    113 void
    114 gen7_emit_push_constant_state(struct brw_context *brw, unsigned vs_size,
    115                               unsigned hs_size, unsigned ds_size,
    116                               unsigned gs_size, unsigned fs_size)
    117 {
    118    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    119    unsigned offset = 0;
    120 
    121    BEGIN_BATCH(10);
    122    OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_VS << 16 | (2 - 2));
    123    OUT_BATCH(vs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
    124    offset += vs_size;
    125 
    126    OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_HS << 16 | (2 - 2));
    127    OUT_BATCH(hs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
    128    offset += hs_size;
    129 
    130    OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_DS << 16 | (2 - 2));
    131    OUT_BATCH(ds_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
    132    offset += ds_size;
    133 
    134    OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_GS << 16 | (2 - 2));
    135    OUT_BATCH(gs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
    136    offset += gs_size;
    137 
    138    OUT_BATCH(_3DSTATE_PUSH_CONSTANT_ALLOC_PS << 16 | (2 - 2));
    139    OUT_BATCH(fs_size | offset << GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT);
    140    ADVANCE_BATCH();
    141 
    142    /* From p292 of the Ivy Bridge PRM (11.2.4 3DSTATE_PUSH_CONSTANT_ALLOC_PS):
    143     *
    144     *     A PIPE_CONTROL command with the CS Stall bit set must be programmed
    145     *     in the ring after this instruction.
    146     *
    147     * No such restriction exists for Haswell or Baytrail.
    148     */
    149    if (devinfo->gen < 8 && !devinfo->is_haswell && !devinfo->is_baytrail)
    150       gen7_emit_cs_stall_flush(brw);
    151 }
    152 
    153 const struct brw_tracked_state gen7_push_constant_space = {
    154    .dirty = {
    155       .mesa = 0,
    156       .brw = BRW_NEW_CONTEXT |
    157              BRW_NEW_GEOMETRY_PROGRAM |
    158              BRW_NEW_TESS_PROGRAMS,
    159    },
    160    .emit = gen7_allocate_push_constants,
    161 };
    162 
    163 static void
    164 upload_urb(struct brw_context *brw)
    165 {
    166    /* BRW_NEW_VS_PROG_DATA */
    167    const struct brw_vue_prog_data *vs_vue_prog_data =
    168       brw_vue_prog_data(brw->vs.base.prog_data);
    169    const unsigned vs_size = MAX2(vs_vue_prog_data->urb_entry_size, 1);
    170    /* BRW_NEW_GS_PROG_DATA */
    171    const bool gs_present = brw->gs.base.prog_data;
    172    /* BRW_NEW_TES_PROG_DATA */
    173    const bool tess_present = brw->tes.base.prog_data;
    174 
    175    gen7_upload_urb(brw, vs_size, gs_present, tess_present);
    176 }
    177 
    178 void
    179 gen7_upload_urb(struct brw_context *brw, unsigned vs_size,
    180                 bool gs_present, bool tess_present)
    181 {
    182    const struct gen_device_info *devinfo = &brw->screen->devinfo;
    183    const int push_size_kB =
    184       (devinfo->gen >= 8 || (devinfo->is_haswell && devinfo->gt == 3)) ? 32 : 16;
    185 
    186    /* BRW_NEW_{VS,TCS,TES,GS}_PROG_DATA */
    187    struct brw_vue_prog_data *prog_data[4] = {
    188       [MESA_SHADER_VERTEX] =
    189          brw_vue_prog_data(brw->vs.base.prog_data),
    190       [MESA_SHADER_TESS_CTRL] =
    191          tess_present ? brw_vue_prog_data(brw->tcs.base.prog_data) : NULL,
    192       [MESA_SHADER_TESS_EVAL] =
    193          tess_present ? brw_vue_prog_data(brw->tes.base.prog_data) : NULL,
    194       [MESA_SHADER_GEOMETRY] =
    195          gs_present ? brw_vue_prog_data(brw->gs.base.prog_data) : NULL,
    196    };
    197 
    198    unsigned entry_size[4];
    199    entry_size[MESA_SHADER_VERTEX] = vs_size;
    200    for (int i = MESA_SHADER_TESS_CTRL; i <= MESA_SHADER_GEOMETRY; i++) {
    201       entry_size[i] = prog_data[i] ? prog_data[i]->urb_entry_size : 1;
    202    }
    203 
    204    /* If we're just switching between programs with the same URB requirements,
    205     * skip the rest of the logic.
    206     */
    207    if (brw->urb.vsize == entry_size[MESA_SHADER_VERTEX] &&
    208        brw->urb.gs_present == gs_present &&
    209        brw->urb.gsize == entry_size[MESA_SHADER_GEOMETRY] &&
    210        brw->urb.tess_present == tess_present &&
    211        brw->urb.hsize == entry_size[MESA_SHADER_TESS_CTRL] &&
    212        brw->urb.dsize == entry_size[MESA_SHADER_TESS_EVAL]) {
    213       return;
    214    }
    215    brw->urb.vsize = entry_size[MESA_SHADER_VERTEX];
    216    brw->urb.gs_present = gs_present;
    217    brw->urb.gsize = entry_size[MESA_SHADER_GEOMETRY];
    218    brw->urb.tess_present = tess_present;
    219    brw->urb.hsize = entry_size[MESA_SHADER_TESS_CTRL];
    220    brw->urb.dsize = entry_size[MESA_SHADER_TESS_EVAL];
    221 
    222    unsigned entries[4];
    223    unsigned start[4];
    224    gen_get_urb_config(devinfo, 1024 * push_size_kB, 1024 * brw->urb.size,
    225                       tess_present, gs_present, entry_size, entries, start);
    226 
    227    if (devinfo->gen == 7 && !devinfo->is_haswell && !devinfo->is_baytrail)
    228       gen7_emit_vs_workaround_flush(brw);
    229 
    230    BEGIN_BATCH(8);
    231    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
    232       assert(devinfo->gen != 10 || entry_size[i] % 3);
    233       OUT_BATCH((_3DSTATE_URB_VS + i) << 16 | (2 - 2));
    234       OUT_BATCH(entries[i] |
    235                 ((entry_size[i] - 1) << GEN7_URB_ENTRY_SIZE_SHIFT) |
    236                 (start[i] << GEN7_URB_STARTING_ADDRESS_SHIFT));
    237    }
    238    ADVANCE_BATCH();
    239 }
    240 
    241 const struct brw_tracked_state gen7_urb = {
    242    .dirty = {
    243       .mesa = 0,
    244       .brw = BRW_NEW_BLORP |
    245              BRW_NEW_CONTEXT |
    246              BRW_NEW_URB_SIZE |
    247              BRW_NEW_GS_PROG_DATA |
    248              BRW_NEW_TCS_PROG_DATA |
    249              BRW_NEW_TES_PROG_DATA |
    250              BRW_NEW_VS_PROG_DATA,
    251    },
    252    .emit = upload_urb,
    253 };
    254