Home | History | Annotate | Download | only in ilo
      1 /*
      2  * Mesa 3-D graphics library
      3  *
      4  * Copyright (C) 2014 LunarG, Inc.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included
     14  * in all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     22  * DEALINGS IN THE SOFTWARE.
     23  *
     24  * Authors:
     25  *    Chia-I Wu <olv (at) lunarg.com>
     26  */
     27 
     28 #include "genhw/genhw.h"
     29 #include "core/ilo_builder_media.h"
     30 #include "core/ilo_builder_mi.h"
     31 #include "core/ilo_builder_render.h"
     32 
     33 #include "ilo_shader.h"
     34 #include "ilo_state.h"
     35 #include "ilo_render_gen.h"
     36 
     37 struct gen7_l3_config {
     38    int slm;
     39    int urb;
     40    int rest;
     41    int dc;
     42    int ro;
     43    int is;
     44    int c;
     45    int t;
     46 };
     47 
     48 /*
     49  * From the Ivy Bridge PRM, volume 1 part 7, page 10:
     50  *
     51  *     "Normal L3/URB mode (non-SLM mode), uses all 4 banks of L3 equally to
     52  *      distribute cycles. The following allocation is a suggested programming
     53  *      model. Note all numbers below are given in KBytes."
     54  *
     55  * From the Haswell PRM, volume 7, page 662:
     56  *
     57  *     "The configuration for {SLM = 0,URB = 224,DC = 32,RO = 256,IS = 0,C =
     58  *      0,T =0, SUM 512} was validated as a later supported configuration and
     59  *      can be utilized if desired."
     60  */
     61 static const struct gen7_l3_config gen7_l3_non_slm_configs[] = {
     62    /*       SLM   URB  Rest    DC    RO   I/S     C     T */
     63    [0] = {    0,  256,    0,    0,  256,    0,    0,    0, },
     64    [1] = {    0,  256,    0,  128,  128,    0,    0,    0, },
     65    [2] = {    0,  256,    0,   32,    0,   64,   32,  128, },
     66    [3] = {    0,  224,    0,   64,    0,   64,   32,  128, },
     67    [4] = {    0,  224,    0,  128,    0,   64,   32,   64, },
     68    [5] = {    0,  224,    0,   64,    0,  128,   32,   64, },
     69    [6] = {    0,  224,    0,    0,    0,  128,   32,  128, },
     70    [7] = {    0,  256,    0,    0,    0,  128,    0,  128, },
     71 
     72    [8] = {    0,  224,    0,   32,  256,    0,    0,    0, },
     73 };
     74 
     75 /*
     76  * From the Ivy Bridge PRM, volume 1 part 7, page 11:
     77  *
     78  *     "With the existence of Shared Local Memory, a 64KB chunk from each of
     79  *      the 2 L3 banks will be reserved for SLM usage. The remaining cache
     80  *      space is divided between the remaining clients. SLM allocation is done
     81  *      via reducing the number of ways on the two banks from 64 to 32."
     82  *
     83  * From the Haswell PRM, volume 7, page 662:
     84  *
     85  *     "The configuration for {SLM = 128,URB = 128,DC = 0,RO = 256,IS = 0,C =
     86  *      0,T =0, SUM 512} was validated as a later supported configuration and
     87  *      can be utilized if desired. For this configuration, global atomics
     88  *      must be programmed to be in GTI."
     89  */
     90 static const struct gen7_l3_config gen7_l3_slm_configs[] = {
     91    /*       SLM   URB  Rest    DC    RO   I/S     C     T */
     92    [0] = {  128,  128,    0,  128,  128,    0,    0,    0, },
     93    [1] = {  128,  128,    0,   64,    0,   64,   64,   64, },
     94    [2] = {  128,  128,    0,   32,    0,   64,   32,  128, },
     95    [3] = {  128,  128,    0,   32,    0,  128,   32,   64, },
     96 
     97    [4] = {  128,  128,    0,    0,  256,    0,    0,    0, },
     98 };
     99 
    100 static void
    101 gen7_launch_grid_l3(struct ilo_render *r, bool use_slm)
    102 {
    103    uint32_t l3sqcreg1, l3cntlreg2, l3cntlreg3;
    104    const struct gen7_l3_config *conf;
    105 
    106    /*
    107     * This function mostly follows what beignet does.  I do not know why, for
    108     * example, CON4DCUNC should be reset.  I do not know if it should be set
    109     * again after launch_grid().
    110     */
    111 
    112    ILO_DEV_ASSERT(r->dev, 7, 7.5);
    113 
    114    if (use_slm)
    115       conf = &gen7_l3_slm_configs[1];
    116    else
    117       conf = &gen7_l3_non_slm_configs[4];
    118 
    119    /* unset GEN7_REG_L3SQCREG1_CON4DCUNC (without readback first) */
    120    if (ilo_dev_gen(r->dev) >= ILO_GEN(7.5)) {
    121       l3sqcreg1 = GEN75_REG_L3SQCREG1_SQGPCI_24 |
    122                   GEN75_REG_L3SQCREG1_SQHPCI_8;
    123    } else {
    124       l3sqcreg1 = GEN7_REG_L3SQCREG1_SQGHPCI_18_6;
    125    }
    126 
    127    l3cntlreg2 = (conf->dc / 8) << GEN7_REG_L3CNTLREG2_DCWASS__SHIFT |
    128                 (conf->ro / 8) << GEN7_REG_L3CNTLREG2_RDOCPL__SHIFT |
    129                 (conf->urb / 8) << GEN7_REG_L3CNTLREG2_URBALL__SHIFT;
    130 
    131    l3cntlreg3 = (conf->t / 8) << GEN7_REG_L3CNTLREG3_TXWYALL__SHIFT |
    132                 (conf->c / 8) << GEN7_REG_L3CNTLREG3_CTWYALL__SHIFT |
    133                 (conf->is / 8) << GEN7_REG_L3CNTLREG3_ISWYALL__SHIFT;
    134 
    135    if (conf->slm) {
    136       /*
    137        * From the Ivy Bridge PRM, volume 1 part 7, page 11:
    138        *
    139        *     "Note that URB needs to be set as low b/w client in SLM mode,
    140        *      else the hash will fail. This is a required s/w model."
    141        */
    142       l3cntlreg2 |= GEN7_REG_L3CNTLREG2_URBSLMB |
    143                     GEN7_REG_L3CNTLREG2_SLMMENB;
    144    }
    145 
    146    gen6_MI_LOAD_REGISTER_IMM(r->builder, GEN7_REG_L3SQCREG1, l3sqcreg1);
    147    gen6_MI_LOAD_REGISTER_IMM(r->builder, GEN7_REG_L3CNTLREG2, l3cntlreg2);
    148    gen6_MI_LOAD_REGISTER_IMM(r->builder, GEN7_REG_L3CNTLREG3, l3cntlreg3);
    149 }
    150 
    151 int
    152 ilo_render_get_launch_grid_commands_len(const struct ilo_render *render,
    153                                         const struct ilo_state_vector *vec)
    154 {
    155    static int len;
    156 
    157    ILO_DEV_ASSERT(render->dev, 7, 7.5);
    158 
    159    if (!len) {
    160       len +=
    161          GEN6_PIPELINE_SELECT__SIZE +
    162          GEN6_STATE_BASE_ADDRESS__SIZE +
    163          GEN6_MEDIA_VFE_STATE__SIZE +
    164          GEN6_MEDIA_CURBE_LOAD__SIZE +
    165          GEN6_MEDIA_INTERFACE_DESCRIPTOR_LOAD__SIZE +
    166          GEN6_MEDIA_STATE_FLUSH__SIZE;
    167 
    168       len += ilo_render_get_flush_len(render) * 3;
    169 
    170       if (ilo_dev_gen(render->dev) >= ILO_GEN(7)) {
    171          len += GEN6_MI_LOAD_REGISTER_IMM__SIZE * 3 * 2;
    172          len += GEN7_GPGPU_WALKER__SIZE;
    173       }
    174    }
    175 
    176    return len;
    177 }
    178 
    179 void
    180 ilo_render_emit_launch_grid_commands(struct ilo_render *render,
    181                                      const struct ilo_state_vector *vec,
    182                                      const struct ilo_render_launch_grid_session *session)
    183 {
    184    const unsigned batch_used = ilo_builder_batch_used(render->builder);
    185    const uint32_t pcb = render->state.cs.PUSH_CONSTANT_BUFFER;
    186    const int pcb_size = render->state.cs.PUSH_CONSTANT_BUFFER_size;
    187    int simd_size;
    188    bool use_slm;
    189 
    190    ILO_DEV_ASSERT(render->dev, 7, 7.5);
    191 
    192    simd_size = ilo_shader_get_kernel_param(vec->cs, ILO_KERNEL_CS_SIMD_SIZE);
    193    use_slm = ilo_shader_get_kernel_param(vec->cs, ILO_KERNEL_CS_LOCAL_SIZE);
    194 
    195    ilo_render_emit_flush(render);
    196 
    197    if (ilo_dev_gen(render->dev) >= ILO_GEN(7)) {
    198       gen7_launch_grid_l3(render, use_slm);
    199       ilo_render_emit_flush(render);
    200 
    201       gen6_PIPELINE_SELECT(render->builder,
    202             GEN7_PIPELINE_SELECT_DW0_SELECT_GPGPU);
    203    } else {
    204       gen6_PIPELINE_SELECT(render->builder,
    205             GEN6_PIPELINE_SELECT_DW0_SELECT_MEDIA);
    206    }
    207 
    208    gen6_state_base_address(render->builder, true);
    209 
    210    gen6_MEDIA_VFE_STATE(render->builder, &session->compute);
    211 
    212    if (pcb_size)
    213       gen6_MEDIA_CURBE_LOAD(render->builder, pcb, pcb_size);
    214 
    215    gen6_MEDIA_INTERFACE_DESCRIPTOR_LOAD(render->builder,
    216          session->idrt, session->idrt_size);
    217 
    218    gen7_GPGPU_WALKER(render->builder, session->thread_group_offset,
    219          session->thread_group_dim, session->thread_group_size, simd_size);
    220 
    221    gen6_MEDIA_STATE_FLUSH(render->builder);
    222 
    223    if (ilo_dev_gen(render->dev) >= ILO_GEN(7) && use_slm) {
    224       ilo_render_emit_flush(render);
    225       gen7_launch_grid_l3(render, false);
    226    }
    227 
    228    assert(ilo_builder_batch_used(render->builder) <= batch_used +
    229          ilo_render_get_launch_grid_commands_len(render, vec));
    230 }
    231