Home | History | Annotate | Download | only in core
      1 /*
      2  * Mesa 3-D graphics library
      3  *
      4  * Copyright (C) 2012-2015 LunarG, Inc.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included
     14  * in all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     22  * DEALINGS IN THE SOFTWARE.
     23  *
     24  * Authors:
     25  *    Chia-I Wu <olv (at) lunarg.com>
     26  */
     27 
     28 #include "ilo_debug.h"
     29 #include "ilo_state_shader.h"
     30 
     31 enum vertex_stage {
     32    STAGE_VS,
     33    STAGE_HS,
     34    STAGE_DS,
     35    STAGE_GS,
     36 };
     37 
     38 struct vertex_ff {
     39    uint8_t grf_start;
     40 
     41    uint8_t per_thread_scratch_space;
     42    uint32_t per_thread_scratch_size;
     43 
     44    uint8_t sampler_count;
     45    uint8_t surface_count;
     46    bool has_uav;
     47 
     48    uint8_t vue_read_offset;
     49    uint8_t vue_read_len;
     50 
     51    uint8_t user_clip_enables;
     52 };
     53 
     54 static bool
     55 vertex_validate_gen6_kernel(const struct ilo_dev *dev,
     56                             enum vertex_stage stage,
     57                             const struct ilo_state_shader_kernel_info *kernel)
     58 {
     59    /*
     60     * "Dispatch GRF Start Register for URB Data" is U4 for GS and U5 for
     61     * others.
     62     */
     63    const uint8_t max_grf_start = (stage == STAGE_GS) ? 16 : 32;
     64 
     65    ILO_DEV_ASSERT(dev, 6, 8);
     66 
     67    /* we do not want to save it */
     68    assert(!kernel->offset);
     69 
     70    assert(kernel->grf_start < max_grf_start);
     71 
     72    return true;
     73 }
     74 
     75 static bool
     76 vertex_validate_gen6_urb(const struct ilo_dev *dev,
     77                          enum vertex_stage stage,
     78                          const struct ilo_state_shader_urb_info *urb)
     79 {
     80    /* "Vertex/Patch URB Entry Read Offset" is U6, in pairs */
     81    const uint8_t max_read_base = 63 * 2;
     82    /*
     83     * "Vertex/Patch URB Entry Read Length" is limited to 64 for DS and U6 for
     84     * others, in pairs
     85     */
     86    const uint8_t max_read_count = ((stage == STAGE_DS) ? 64 : 63) * 2;
     87 
     88    ILO_DEV_ASSERT(dev, 6, 8);
     89 
     90    assert(urb->read_base + urb->read_count <= urb->cv_input_attr_count);
     91 
     92    assert(urb->read_base % 2 == 0 && urb->read_base <= max_read_base);
     93 
     94    /*
     95     * There is no need to worry about reading past entries, as URB entries are
     96     * aligned to 1024-bits (Gen6) or 512-bits (Gen7+).
     97     */
     98    assert(urb->read_count <= max_read_count);
     99 
    100    return true;
    101 }
    102 
    103 static bool
    104 vertex_get_gen6_ff(const struct ilo_dev *dev,
    105                    enum vertex_stage stage,
    106                    const struct ilo_state_shader_kernel_info *kernel,
    107                    const struct ilo_state_shader_resource_info *resource,
    108                    const struct ilo_state_shader_urb_info *urb,
    109                    uint32_t per_thread_scratch_size,
    110                    struct vertex_ff *ff)
    111 {
    112    ILO_DEV_ASSERT(dev, 6, 8);
    113 
    114    memset(ff, 0, sizeof(*ff));
    115 
    116    if (!vertex_validate_gen6_kernel(dev, stage, kernel) ||
    117        !vertex_validate_gen6_urb(dev, stage, urb))
    118       return false;
    119 
    120    ff->grf_start = kernel->grf_start;
    121 
    122    if (per_thread_scratch_size) {
    123       /*
    124        * From the Sandy Bridge PRM, volume 2 part 1, page 134:
    125        *
    126        *     "(Per-Thread Scratch Space)
    127        *      Range    [0,11] indicating [1K Bytes, 2M Bytes]"
    128        */
    129       assert(per_thread_scratch_size <= 2 * 1024 * 1024);
    130 
    131       /* next power of two, starting from 1KB */
    132       ff->per_thread_scratch_space = (per_thread_scratch_size > 1024) ?
    133          (util_last_bit(per_thread_scratch_size - 1) - 10) : 0;
    134       ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space);
    135    }
    136 
    137    ff->sampler_count = (resource->sampler_count <= 12) ?
    138       (resource->sampler_count + 3) / 4 : 4;
    139    ff->surface_count = resource->surface_count;
    140    ff->has_uav = resource->has_uav;
    141 
    142    ff->vue_read_offset = urb->read_base / 2;
    143    ff->vue_read_len = (urb->read_count + 1) / 2;
    144 
    145    /* need to read something unless VUE handles are included */
    146    switch (stage) {
    147    case STAGE_VS:
    148       if (!ff->vue_read_len)
    149          ff->vue_read_len = 1;
    150 
    151       /* one GRF per attribute */
    152       assert(kernel->grf_start + urb->read_count * 2 <= 128);
    153       break;
    154    case STAGE_GS:
    155       if (ilo_dev_gen(dev) == ILO_GEN(6) && !ff->vue_read_len)
    156          ff->vue_read_len = 1;
    157       break;
    158    default:
    159       break;
    160    }
    161 
    162    ff->user_clip_enables = urb->user_clip_enables;
    163 
    164    return true;
    165 }
    166 
    167 static uint16_t
    168 vs_get_gen6_thread_count(const struct ilo_dev *dev,
    169                          const struct ilo_state_vs_info *info)
    170 {
    171    uint16_t thread_count;
    172 
    173    ILO_DEV_ASSERT(dev, 6, 8);
    174 
    175    /* Maximum Number of Threads of 3DSTATE_VS */
    176    switch (ilo_dev_gen(dev)) {
    177    case ILO_GEN(8):
    178       thread_count = 504;
    179       break;
    180    case ILO_GEN(7.5):
    181       thread_count = (dev->gt >= 2) ? 280 : 70;
    182       break;
    183    case ILO_GEN(7):
    184    case ILO_GEN(6):
    185    default:
    186       thread_count = dev->thread_count;
    187       break;
    188    }
    189 
    190    return thread_count - 1;
    191 }
    192 
    193 static bool
    194 vs_set_gen6_3DSTATE_VS(struct ilo_state_vs *vs,
    195                        const struct ilo_dev *dev,
    196                        const struct ilo_state_vs_info *info)
    197 {
    198    struct vertex_ff ff;
    199    uint16_t thread_count;
    200    uint32_t dw2, dw3, dw4, dw5;
    201 
    202    ILO_DEV_ASSERT(dev, 6, 8);
    203 
    204    if (!vertex_get_gen6_ff(dev, STAGE_VS, &info->kernel, &info->resource,
    205             &info->urb, info->per_thread_scratch_size, &ff))
    206       return false;
    207 
    208    thread_count = vs_get_gen6_thread_count(dev, info);
    209 
    210    dw2 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
    211          ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
    212 
    213    if (false)
    214       dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
    215 
    216    if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
    217       dw2 |= GEN75_THREADDISP_ACCESS_UAV;
    218 
    219    dw3 = ff.per_thread_scratch_space <<
    220       GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
    221 
    222    dw4 = ff.grf_start << GEN6_VS_DW4_URB_GRF_START__SHIFT |
    223          ff.vue_read_len << GEN6_VS_DW4_URB_READ_LEN__SHIFT |
    224          ff.vue_read_offset << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT;
    225 
    226    dw5 = 0;
    227 
    228    if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
    229       dw5 |= thread_count << GEN75_VS_DW5_MAX_THREADS__SHIFT;
    230    else
    231       dw5 |= thread_count << GEN6_VS_DW5_MAX_THREADS__SHIFT;
    232 
    233    if (info->stats_enable)
    234       dw5 |= GEN6_VS_DW5_STATISTICS;
    235    if (info->dispatch_enable)
    236       dw5 |= GEN6_VS_DW5_VS_ENABLE;
    237 
    238    STATIC_ASSERT(ARRAY_SIZE(vs->vs) >= 5);
    239    vs->vs[0] = dw2;
    240    vs->vs[1] = dw3;
    241    vs->vs[2] = dw4;
    242    vs->vs[3] = dw5;
    243 
    244    if (ilo_dev_gen(dev) >= ILO_GEN(8))
    245       vs->vs[4] = ff.user_clip_enables << GEN8_VS_DW8_UCP_CLIP_ENABLES__SHIFT;
    246 
    247    vs->scratch_size = ff.per_thread_scratch_size * thread_count;
    248 
    249    return true;
    250 }
    251 
    252 static uint16_t
    253 hs_get_gen7_thread_count(const struct ilo_dev *dev,
    254                          const struct ilo_state_hs_info *info)
    255 {
    256    uint16_t thread_count;
    257 
    258    ILO_DEV_ASSERT(dev, 7, 8);
    259 
    260    /* Maximum Number of Threads of 3DSTATE_HS */
    261    switch (ilo_dev_gen(dev)) {
    262    case ILO_GEN(8):
    263       thread_count = 504;
    264       break;
    265    case ILO_GEN(7.5):
    266       thread_count = (dev->gt >= 2) ? 256 : 70;
    267       break;
    268    case ILO_GEN(7):
    269    default:
    270       thread_count = dev->thread_count;
    271       break;
    272    }
    273 
    274    return thread_count - 1;
    275 }
    276 
    277 static bool
    278 hs_set_gen7_3DSTATE_HS(struct ilo_state_hs *hs,
    279                        const struct ilo_dev *dev,
    280                        const struct ilo_state_hs_info *info)
    281 {
    282    struct vertex_ff ff;
    283    uint16_t thread_count;
    284    uint32_t dw1, dw2, dw4, dw5;
    285 
    286    ILO_DEV_ASSERT(dev, 7, 8);
    287 
    288    if (!vertex_get_gen6_ff(dev, STAGE_HS, &info->kernel, &info->resource,
    289             &info->urb, info->per_thread_scratch_size, &ff))
    290       return false;
    291 
    292    thread_count = hs_get_gen7_thread_count(dev, info);
    293 
    294    dw1 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
    295          ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
    296 
    297    dw2 = 0 << GEN7_HS_DW2_INSTANCE_COUNT__SHIFT;
    298 
    299    if (ilo_dev_gen(dev) >= ILO_GEN(8))
    300       dw2 |= thread_count << GEN8_HS_DW2_MAX_THREADS__SHIFT;
    301    else if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
    302       dw1 |= thread_count << GEN75_HS_DW1_DISPATCH_MAX_THREADS__SHIFT;
    303    else
    304       dw1 |= thread_count << GEN7_HS_DW1_DISPATCH_MAX_THREADS__SHIFT;
    305 
    306    if (info->dispatch_enable)
    307       dw2 |= GEN7_HS_DW2_HS_ENABLE;
    308    if (info->stats_enable)
    309       dw2 |= GEN7_HS_DW2_STATISTICS;
    310 
    311    dw4 = ff.per_thread_scratch_space <<
    312       GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
    313 
    314    dw5 = GEN7_HS_DW5_INCLUDE_VERTEX_HANDLES |
    315          ff.grf_start << GEN7_HS_DW5_URB_GRF_START__SHIFT |
    316          ff.vue_read_len << GEN7_HS_DW5_URB_READ_LEN__SHIFT |
    317          ff.vue_read_offset << GEN7_HS_DW5_URB_READ_OFFSET__SHIFT;
    318 
    319    if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
    320       dw5 |= GEN75_HS_DW5_ACCESS_UAV;
    321 
    322    STATIC_ASSERT(ARRAY_SIZE(hs->hs) >= 4);
    323    hs->hs[0] = dw1;
    324    hs->hs[1] = dw2;
    325    hs->hs[2] = dw4;
    326    hs->hs[3] = dw5;
    327 
    328    hs->scratch_size = ff.per_thread_scratch_size * thread_count;
    329 
    330    return true;
    331 }
    332 
    333 static bool
    334 ds_set_gen7_3DSTATE_TE(struct ilo_state_ds *ds,
    335                        const struct ilo_dev *dev,
    336                        const struct ilo_state_ds_info *info)
    337 {
    338    uint32_t dw1;
    339 
    340    ILO_DEV_ASSERT(dev, 7, 8);
    341 
    342    dw1 = 0;
    343 
    344    if (info->dispatch_enable) {
    345       dw1 |= GEN7_TE_DW1_MODE_HW |
    346              GEN7_TE_DW1_TE_ENABLE;
    347    }
    348 
    349    STATIC_ASSERT(ARRAY_SIZE(ds->te) >= 3);
    350    ds->te[0] = dw1;
    351    ds->te[1] = fui(63.0f);
    352    ds->te[2] = fui(64.0f);
    353 
    354    return true;
    355 }
    356 
    357 static uint16_t
    358 ds_get_gen7_thread_count(const struct ilo_dev *dev,
    359                          const struct ilo_state_ds_info *info)
    360 {
    361    uint16_t thread_count;
    362 
    363    ILO_DEV_ASSERT(dev, 7, 8);
    364 
    365    /* Maximum Number of Threads of 3DSTATE_DS */
    366    switch (ilo_dev_gen(dev)) {
    367    case ILO_GEN(8):
    368       thread_count = 504;
    369       break;
    370    case ILO_GEN(7.5):
    371       thread_count = (dev->gt >= 2) ? 280 : 70;
    372       break;
    373    case ILO_GEN(7):
    374    default:
    375       thread_count = dev->thread_count;
    376       break;
    377    }
    378 
    379    return thread_count - 1;
    380 }
    381 
    382 static bool
    383 ds_set_gen7_3DSTATE_DS(struct ilo_state_ds *ds,
    384                        const struct ilo_dev *dev,
    385                        const struct ilo_state_ds_info *info)
    386 {
    387    struct vertex_ff ff;
    388    uint16_t thread_count;
    389    uint32_t dw2, dw3, dw4, dw5;
    390 
    391    ILO_DEV_ASSERT(dev, 7, 8);
    392 
    393    if (!vertex_get_gen6_ff(dev, STAGE_DS, &info->kernel, &info->resource,
    394             &info->urb, info->per_thread_scratch_size, &ff))
    395       return false;
    396 
    397    thread_count = ds_get_gen7_thread_count(dev, info);
    398 
    399    dw2 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
    400          ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
    401 
    402    if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
    403       dw2 |= GEN75_THREADDISP_ACCESS_UAV;
    404 
    405    dw3 = ff.per_thread_scratch_space <<
    406       GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
    407 
    408    dw4 = ff.grf_start << GEN7_DS_DW4_URB_GRF_START__SHIFT |
    409          ff.vue_read_len << GEN7_DS_DW4_URB_READ_LEN__SHIFT |
    410          ff.vue_read_offset << GEN7_DS_DW4_URB_READ_OFFSET__SHIFT;
    411 
    412    dw5 = 0;
    413 
    414    if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
    415       dw5 |= thread_count << GEN75_DS_DW5_MAX_THREADS__SHIFT;
    416    else
    417       dw5 |= thread_count << GEN7_DS_DW5_MAX_THREADS__SHIFT;
    418 
    419    if (info->stats_enable)
    420       dw5 |= GEN7_DS_DW5_STATISTICS;
    421    if (info->dispatch_enable)
    422       dw5 |= GEN7_DS_DW5_DS_ENABLE;
    423 
    424    STATIC_ASSERT(ARRAY_SIZE(ds->ds) >= 5);
    425    ds->ds[0] = dw2;
    426    ds->ds[1] = dw3;
    427    ds->ds[2] = dw4;
    428    ds->ds[3] = dw5;
    429 
    430    if (ilo_dev_gen(dev) >= ILO_GEN(8))
    431       ds->ds[4] = ff.user_clip_enables << GEN8_DS_DW8_UCP_CLIP_ENABLES__SHIFT;
    432 
    433    ds->scratch_size = ff.per_thread_scratch_size * thread_count;
    434 
    435    return true;
    436 }
    437 
    438 static bool
    439 gs_get_gen6_ff(const struct ilo_dev *dev,
    440                const struct ilo_state_gs_info *info,
    441                struct vertex_ff *ff)
    442 {
    443    const struct ilo_state_shader_urb_info *urb = &info->urb;
    444    const struct ilo_state_gs_sol_info *sol = &info->sol;
    445 
    446    ILO_DEV_ASSERT(dev, 6, 8);
    447 
    448    if (!vertex_get_gen6_ff(dev, STAGE_GS, &info->kernel, &info->resource,
    449             &info->urb, info->per_thread_scratch_size, ff))
    450       return false;
    451 
    452    /*
    453     * From the Ivy Bridge PRM, volume 2 part 1, page 168-169:
    454     *
    455     *     "[0,62] indicating [1,63] 16B units"
    456     *
    457     *     "Programming Restrictions: The vertex size must be programmed as a
    458     *      multiple of 32B units with the following exception: Rendering is
    459     *      disabled (as per SOL stage state) and the vertex size output by the
    460     *      GS thread is 16B.
    461     *
    462     *      If rendering is enabled (as per SOL state) the vertex size must be
    463     *      programmed as a multiple of 32B units. In other words, the only
    464     *      time software can program a vertex size with an odd number of 16B
    465     *      units is when rendering is disabled."
    466     */
    467    assert(urb->output_attr_count <= 63);
    468    if (!sol->render_disable)
    469       assert(urb->output_attr_count % 2 == 0);
    470 
    471    return true;
    472 }
    473 
    474 static uint16_t
    475 gs_get_gen6_thread_count(const struct ilo_dev *dev,
    476                          const struct ilo_state_gs_info *info)
    477 {
    478    const struct ilo_state_gs_sol_info *sol = &info->sol;
    479    uint16_t thread_count;
    480 
    481    ILO_DEV_ASSERT(dev, 6, 8);
    482 
    483    /* Maximum Number of Threads of 3DSTATE_GS */
    484    switch (ilo_dev_gen(dev)) {
    485    case ILO_GEN(8):
    486       thread_count = 504;
    487       break;
    488    case ILO_GEN(7.5):
    489       thread_count = (dev->gt >= 2) ? 256 : 70;
    490       break;
    491    case ILO_GEN(7):
    492    case ILO_GEN(6):
    493    default:
    494       thread_count = dev->thread_count;
    495 
    496       /*
    497        * From the Sandy Bridge PRM, volume 2 part 1, page 154:
    498        *
    499        *     "Maximum Number of Threads valid range is [0,27] when Rendering
    500        *      Enabled bit is set."
    501        *
    502        * According to the classic driver, [0, 20] for GT1.
    503        */
    504       if (!sol->render_disable)
    505          thread_count = (dev->gt == 2) ? 27 : 20;
    506       break;
    507    }
    508 
    509    return thread_count - 1;
    510 }
    511 
    512 static bool
    513 gs_set_gen6_3DSTATE_GS(struct ilo_state_gs *gs,
    514                        const struct ilo_dev *dev,
    515                        const struct ilo_state_gs_info *info)
    516 {
    517    const struct ilo_state_gs_sol_info *sol = &info->sol;
    518    struct vertex_ff ff;
    519    uint16_t thread_count;
    520    uint32_t dw2, dw3, dw4, dw5, dw6;
    521 
    522    ILO_DEV_ASSERT(dev, 6, 6);
    523 
    524    if (!gs_get_gen6_ff(dev, info, &ff))
    525       return false;
    526 
    527    thread_count = gs_get_gen6_thread_count(dev, info);
    528 
    529    dw2 = GEN6_THREADDISP_SPF |
    530          ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
    531          ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
    532 
    533    dw3 = ff.per_thread_scratch_space <<
    534       GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
    535 
    536    dw4 = ff.vue_read_len << GEN6_GS_DW4_URB_READ_LEN__SHIFT |
    537          ff.vue_read_offset << GEN6_GS_DW4_URB_READ_OFFSET__SHIFT |
    538          ff.grf_start << GEN6_GS_DW4_URB_GRF_START__SHIFT;
    539 
    540    dw5 = thread_count << GEN6_GS_DW5_MAX_THREADS__SHIFT;
    541 
    542    if (info->stats_enable)
    543       dw5 |= GEN6_GS_DW5_STATISTICS;
    544    if (sol->stats_enable)
    545       dw5 |= GEN6_GS_DW5_SO_STATISTICS;
    546    if (!sol->render_disable)
    547       dw5 |= GEN6_GS_DW5_RENDER_ENABLE;
    548 
    549    dw6 = 0;
    550 
    551    /* GEN7_REORDER_TRAILING is handled by the kernel */
    552    if (sol->tristrip_reorder == GEN7_REORDER_LEADING)
    553       dw6 |= GEN6_GS_DW6_REORDER_LEADING_ENABLE;
    554 
    555    if (sol->sol_enable) {
    556       dw6 |= GEN6_GS_DW6_SVBI_PAYLOAD_ENABLE;
    557 
    558       if (sol->svbi_post_inc) {
    559          dw6 |= GEN6_GS_DW6_SVBI_POST_INC_ENABLE |
    560                 sol->svbi_post_inc << GEN6_GS_DW6_SVBI_POST_INC_VAL__SHIFT;
    561       }
    562    }
    563 
    564    if (info->dispatch_enable)
    565       dw6 |= GEN6_GS_DW6_GS_ENABLE;
    566 
    567    STATIC_ASSERT(ARRAY_SIZE(gs->gs) >= 5);
    568    gs->gs[0] = dw2;
    569    gs->gs[1] = dw3;
    570    gs->gs[2] = dw4;
    571    gs->gs[3] = dw5;
    572    gs->gs[4] = dw6;
    573 
    574    gs->scratch_size = ff.per_thread_scratch_size * thread_count;
    575 
    576    return true;
    577 }
    578 
    579 static uint8_t
    580 gs_get_gen7_vertex_size(const struct ilo_dev *dev,
    581                         const struct ilo_state_gs_info *info)
    582 {
    583    const struct ilo_state_shader_urb_info *urb = &info->urb;
    584 
    585    ILO_DEV_ASSERT(dev, 7, 8);
    586 
    587    return (urb->output_attr_count) ? urb->output_attr_count - 1 : 0;
    588 }
    589 
    590 static bool
    591 gs_set_gen7_3DSTATE_GS(struct ilo_state_gs *gs,
    592                        const struct ilo_dev *dev,
    593                        const struct ilo_state_gs_info *info)
    594 {
    595    struct vertex_ff ff;
    596    uint16_t thread_count;
    597    uint8_t vertex_size;
    598    uint32_t dw2, dw3, dw4, dw5;
    599 
    600    ILO_DEV_ASSERT(dev, 7, 8);
    601 
    602    if (!gs_get_gen6_ff(dev, info, &ff))
    603       return false;
    604 
    605    thread_count = gs_get_gen6_thread_count(dev, info);
    606    vertex_size = gs_get_gen7_vertex_size(dev, info);
    607 
    608    dw2 = ff.sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
    609          ff.surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
    610 
    611    if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff.has_uav)
    612       dw2 |= GEN75_THREADDISP_ACCESS_UAV;
    613 
    614    dw3 = ff.per_thread_scratch_space <<
    615       GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
    616 
    617    dw4 = vertex_size << GEN7_GS_DW4_OUTPUT_SIZE__SHIFT |
    618          0 << GEN7_GS_DW4_OUTPUT_TOPO__SHIFT |
    619          ff.vue_read_len << GEN7_GS_DW4_URB_READ_LEN__SHIFT |
    620          GEN7_GS_DW4_INCLUDE_VERTEX_HANDLES |
    621          ff.vue_read_offset << GEN7_GS_DW4_URB_READ_OFFSET__SHIFT |
    622          ff.grf_start << GEN7_GS_DW4_URB_GRF_START__SHIFT;
    623 
    624    dw5 = 0;
    625 
    626    if (ilo_dev_gen(dev) >= ILO_GEN(7.5))
    627       dw5 = thread_count << GEN75_GS_DW5_MAX_THREADS__SHIFT;
    628    else
    629       dw5 = thread_count << GEN7_GS_DW5_MAX_THREADS__SHIFT;
    630 
    631    if (info->stats_enable)
    632       dw5 |= GEN7_GS_DW5_STATISTICS;
    633    if (info->dispatch_enable)
    634       dw5 |= GEN7_GS_DW5_GS_ENABLE;
    635 
    636    STATIC_ASSERT(ARRAY_SIZE(gs->gs) >= 5);
    637    gs->gs[0] = dw2;
    638    gs->gs[1] = dw3;
    639    gs->gs[2] = dw4;
    640    gs->gs[3] = dw5;
    641 
    642    if (ilo_dev_gen(dev) >= ILO_GEN(8))
    643       gs->gs[4] = ff.user_clip_enables << GEN8_GS_DW9_UCP_CLIP_ENABLES__SHIFT;
    644 
    645    gs->scratch_size = ff.per_thread_scratch_size * thread_count;
    646 
    647    return true;
    648 }
    649 
    650 bool
    651 ilo_state_vs_init(struct ilo_state_vs *vs,
    652                   const struct ilo_dev *dev,
    653                   const struct ilo_state_vs_info *info)
    654 {
    655    bool ret = true;
    656 
    657    assert(ilo_is_zeroed(vs, sizeof(*vs)));
    658 
    659    ret &= vs_set_gen6_3DSTATE_VS(vs, dev, info);
    660 
    661    assert(ret);
    662 
    663    return ret;
    664 }
    665 
    666 bool
    667 ilo_state_vs_init_disabled(struct ilo_state_vs *vs,
    668                            const struct ilo_dev *dev)
    669 {
    670    struct ilo_state_vs_info info;
    671 
    672    memset(&info, 0, sizeof(info));
    673 
    674    return ilo_state_vs_init(vs, dev, &info);
    675 }
    676 
    677 bool
    678 ilo_state_hs_init(struct ilo_state_hs *hs,
    679                   const struct ilo_dev *dev,
    680                   const struct ilo_state_hs_info *info)
    681 {
    682    bool ret = true;
    683 
    684    assert(ilo_is_zeroed(hs, sizeof(*hs)));
    685 
    686    if (ilo_dev_gen(dev) >= ILO_GEN(7))
    687       ret &= hs_set_gen7_3DSTATE_HS(hs, dev, info);
    688 
    689    assert(ret);
    690 
    691    return ret;
    692 }
    693 
    694 bool
    695 ilo_state_hs_init_disabled(struct ilo_state_hs *hs,
    696                            const struct ilo_dev *dev)
    697 {
    698    struct ilo_state_hs_info info;
    699 
    700    memset(&info, 0, sizeof(info));
    701 
    702    return ilo_state_hs_init(hs, dev, &info);
    703 }
    704 
    705 bool
    706 ilo_state_ds_init(struct ilo_state_ds *ds,
    707                   const struct ilo_dev *dev,
    708                   const struct ilo_state_ds_info *info)
    709 {
    710    bool ret = true;
    711 
    712    assert(ilo_is_zeroed(ds, sizeof(*ds)));
    713 
    714    if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
    715       ret &= ds_set_gen7_3DSTATE_TE(ds, dev, info);
    716       ret &= ds_set_gen7_3DSTATE_DS(ds, dev, info);
    717    }
    718 
    719    assert(ret);
    720 
    721    return ret;
    722 }
    723 
    724 bool
    725 ilo_state_ds_init_disabled(struct ilo_state_ds *ds,
    726                            const struct ilo_dev *dev)
    727 {
    728    struct ilo_state_ds_info info;
    729 
    730    memset(&info, 0, sizeof(info));
    731 
    732    return ilo_state_ds_init(ds, dev, &info);
    733 }
    734 
    735 bool
    736 ilo_state_gs_init(struct ilo_state_gs *gs,
    737                   const struct ilo_dev *dev,
    738                   const struct ilo_state_gs_info *info)
    739 {
    740    bool ret = true;
    741 
    742    assert(ilo_is_zeroed(gs, sizeof(*gs)));
    743 
    744    if (ilo_dev_gen(dev) >= ILO_GEN(7))
    745       ret &= gs_set_gen7_3DSTATE_GS(gs, dev, info);
    746    else
    747       ret &= gs_set_gen6_3DSTATE_GS(gs, dev, info);
    748 
    749    assert(ret);
    750 
    751    return ret;
    752 }
    753 
    754 bool
    755 ilo_state_gs_init_disabled(struct ilo_state_gs *gs,
    756                            const struct ilo_dev *dev)
    757 {
    758    struct ilo_state_gs_info info;
    759 
    760    memset(&info, 0, sizeof(info));
    761 
    762    return ilo_state_gs_init(gs, dev, &info);
    763 }
    764