Home | History | Annotate | Download | only in common
      1 /*
      2  * Copyright (c) 2011 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include <stdlib.h>
     25 #include <math.h>
     26 
     27 #include "util/macros.h"
     28 #include "main/macros.h"
     29 
     30 #include "gen_l3_config.h"
     31 
     32 /**
     33  * The following diagram shows how we partition the URB:
     34  *
     35  *        16kb or 32kb               Rest of the URB space
     36  *   __________-__________   _________________-_________________
     37  *  /                     \ /                                   \
     38  * +-------------------------------------------------------------+
     39  * |  VS/HS/DS/GS/FS Push  |           VS/HS/DS/GS URB           |
     40  * |       Constants       |               Entries               |
     41  * +-------------------------------------------------------------+
     42  *
     43  * Push constants must be stored at the beginning of the URB space,
     44  * while URB entries can be stored anywhere.  We choose to lay them
     45  * out in pipeline order (VS -> HS -> DS -> GS).
     46  */
     47 
     48 /**
     49  * Decide how to partition the URB among the various stages.
     50  *
     51  * \param[in] push_constant_bytes - space allocate for push constants.
     52  * \param[in] urb_size_bytes - total size of the URB (from L3 config).
     53  * \param[in] tess_present - are tessellation shaders active?
     54  * \param[in] gs_present - are geometry shaders active?
     55  * \param[in] entry_size - the URB entry size (from the shader compiler)
     56  * \param[out] entries - the number of URB entries for each stage
     57  * \param[out] start - the starting offset for each stage
     58  */
     59 void
     60 gen_get_urb_config(const struct gen_device_info *devinfo,
     61                    unsigned push_constant_bytes, unsigned urb_size_bytes,
     62                    bool tess_present, bool gs_present,
     63                    const unsigned entry_size[4],
     64                    unsigned entries[4], unsigned start[4])
     65 {
     66    const bool active[4] = { true, tess_present, tess_present, gs_present };
     67 
     68    /* URB allocations must be done in 8k chunks. */
     69    const unsigned chunk_size_bytes = 8192;
     70 
     71    const unsigned push_constant_chunks =
     72       push_constant_bytes / chunk_size_bytes;
     73    const unsigned urb_chunks = urb_size_bytes / chunk_size_bytes;
     74 
     75    /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
     76     *
     77     *     VS Number of URB Entries must be divisible by 8 if the VS URB Entry
     78     *     Allocation Size is less than 9 512-bit URB entries.
     79     *
     80     * Similar text exists for HS, DS and GS.
     81     */
     82    unsigned granularity[4];
     83    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
     84       granularity[i] = (entry_size[i] < 9) ? 8 : 1;
     85    }
     86 
     87    unsigned min_entries[4] = {
     88       /* VS has a lower limit on the number of URB entries.
     89        *
     90        * From the Broadwell PRM, 3DSTATE_URB_VS instruction:
     91        * "When tessellation is enabled, the VS Number of URB Entries must be
     92        *  greater than or equal to 192."
     93        */
     94       [MESA_SHADER_VERTEX] = tess_present && devinfo->gen == 8 ?
     95          192 : devinfo->urb.min_entries[MESA_SHADER_VERTEX],
     96 
     97       /* There are two constraints on the minimum amount of URB space we can
     98        * allocate:
     99        *
    100        * (1) We need room for at least 2 URB entries, since we always operate
    101        * the GS in DUAL_OBJECT mode.
    102        *
    103        * (2) We can't allocate less than nr_gs_entries_granularity.
    104        */
    105       [MESA_SHADER_GEOMETRY] = gs_present ? 2 : 0,
    106 
    107       [MESA_SHADER_TESS_CTRL] = tess_present ? 1 : 0,
    108 
    109       [MESA_SHADER_TESS_EVAL] = tess_present ?
    110          devinfo->urb.min_entries[MESA_SHADER_TESS_EVAL] : 0,
    111    };
    112 
    113    /* Min VS Entries isn't a multiple of 8 on Cherryview/Broxton; round up.
    114     * Round them all up.
    115     */
    116    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
    117       min_entries[i] = ALIGN(min_entries[i], granularity[i]);
    118    }
    119 
    120    unsigned entry_size_bytes[4];
    121    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
    122       entry_size_bytes[i] = 64 * entry_size[i];
    123    }
    124 
    125    /* Initially, assign each stage the minimum amount of URB space it needs,
    126     * and make a note of how much additional space it "wants" (the amount of
    127     * additional space it could actually make use of).
    128     */
    129    unsigned chunks[4];
    130    unsigned wants[4];
    131    unsigned total_needs = push_constant_chunks;
    132    unsigned total_wants = 0;
    133 
    134    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
    135       if (active[i]) {
    136          chunks[i] = DIV_ROUND_UP(min_entries[i] * entry_size_bytes[i],
    137                                   chunk_size_bytes);
    138 
    139          wants[i] =
    140             DIV_ROUND_UP(devinfo->urb.max_entries[i] * entry_size_bytes[i],
    141                          chunk_size_bytes) - chunks[i];
    142       } else {
    143          chunks[i] = 0;
    144          wants[i] = 0;
    145       }
    146 
    147       total_needs += chunks[i];
    148       total_wants += wants[i];
    149    }
    150 
    151    assert(total_needs <= urb_chunks);
    152 
    153    /* Mete out remaining space (if any) in proportion to "wants". */
    154    unsigned remaining_space = MIN2(urb_chunks - total_needs, total_wants);
    155 
    156    if (remaining_space > 0) {
    157       for (int i = MESA_SHADER_VERTEX;
    158            total_wants > 0 && i <= MESA_SHADER_TESS_EVAL; i++) {
    159          unsigned additional = (unsigned)
    160             roundf(wants[i] * (((float) remaining_space) / total_wants));
    161          chunks[i] += additional;
    162          remaining_space -= additional;
    163          total_wants -= wants[i];
    164       }
    165 
    166       chunks[MESA_SHADER_GEOMETRY] += remaining_space;
    167    }
    168 
    169    /* Sanity check that we haven't over-allocated. */
    170    unsigned total_chunks = push_constant_chunks;
    171    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
    172       total_chunks += chunks[i];
    173    }
    174    assert(total_chunks <= urb_chunks);
    175 
    176    /* Finally, compute the number of entries that can fit in the space
    177     * allocated to each stage.
    178     */
    179    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
    180       entries[i] = chunks[i] * chunk_size_bytes / entry_size_bytes[i];
    181 
    182       /* Since we rounded up when computing wants[], this may be slightly
    183        * more than the maximum allowed amount, so correct for that.
    184        */
    185       entries[i] = MIN2(entries[i], devinfo->urb.max_entries[i]);
    186 
    187       /* Ensure that we program a multiple of the granularity. */
    188       entries[i] = ROUND_DOWN_TO(entries[i], granularity[i]);
    189 
    190       /* Finally, sanity check to make sure we have at least the minimum
    191        * number of entries needed for each stage.
    192        */
    193       assert(entries[i] >= min_entries[i]);
    194    }
    195 
    196    /* Lay out the URB in pipeline order: push constants, VS, HS, DS, GS. */
    197    start[0] = push_constant_chunks;
    198    for (int i = MESA_SHADER_TESS_CTRL; i <= MESA_SHADER_GEOMETRY; i++) {
    199       start[i] = start[i - 1] + chunks[i - 1];
    200    }
    201 }
    202