Home | History | Annotate | Download | only in core
      1 /*
      2  * Mesa 3-D graphics library
      3  *
      4  * Copyright (C) 2012-2015 LunarG, Inc.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included
     14  * in all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     22  * DEALINGS IN THE SOFTWARE.
     23  *
     24  * Authors:
     25  *    Chia-I Wu <olv (at) lunarg.com>
     26  */
     27 
     28 #include "ilo_debug.h"
     29 #include "ilo_state_shader.h"
     30 
     31 struct pixel_ff {
     32    uint8_t dispatch_modes;
     33 
     34    uint32_t kernel_offsets[3];
     35    uint8_t grf_starts[3];
     36    bool pcb_enable;
     37    uint8_t per_thread_scratch_space;
     38    uint32_t per_thread_scratch_size;
     39 
     40    uint8_t sampler_count;
     41    uint8_t surface_count;
     42    bool has_uav;
     43 
     44    uint16_t thread_count;
     45 
     46    struct ilo_state_ps_dispatch_conds conds;
     47 
     48    bool kill_pixel;
     49    bool dispatch_enable;
     50    bool dual_source_blending;
     51    uint32_t sample_mask;
     52 };
     53 
     54 static bool
     55 ps_kernel_validate_gen6(const struct ilo_dev *dev,
     56                         const struct ilo_state_shader_kernel_info *kernel)
     57 {
     58    /* "Dispatch GRF Start Register for Constant/Setup Data" is U7 */
     59    const uint8_t max_grf_start = 128;
     60 
     61    ILO_DEV_ASSERT(dev, 6, 8);
     62 
     63    /* "Kernel Start Pointer" is 64-byte aligned */
     64    assert(kernel->offset % 64 == 0);
     65 
     66    assert(kernel->grf_start < max_grf_start);
     67 
     68    return true;
     69 }
     70 
     71 static bool
     72 ps_validate_gen6(const struct ilo_dev *dev,
     73                  const struct ilo_state_ps_info *info)
     74 {
     75    const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8;
     76    const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16;
     77    const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32;
     78    const struct ilo_state_ps_io_info *io = &info->io;
     79 
     80    ILO_DEV_ASSERT(dev, 6, 8);
     81 
     82    if (!ps_kernel_validate_gen6(dev, kernel_8) ||
     83        !ps_kernel_validate_gen6(dev, kernel_16) ||
     84        !ps_kernel_validate_gen6(dev, kernel_32))
     85       return false;
     86 
     87    /* unsupported on Gen6 */
     88    if (ilo_dev_gen(dev) == ILO_GEN(6))
     89       assert(!io->use_coverage_mask);
     90 
     91    /*
     92     * From the Sandy Bridge PRM, volume 2 part 1, page 275:
     93     *
     94     *     "If a NULL Depth Buffer is selected, the Pixel Shader Computed Depth
     95     *      field must be set to disabled."
     96     */
     97    if (ilo_dev_gen(dev) == ILO_GEN(6) && io->pscdepth != GEN7_PSCDEPTH_OFF)
     98       assert(info->cv_has_depth_buffer);
     99 
    100    if (!info->per_sample_dispatch) {
    101       /*
    102        * From the Sandy Bridge PRM, volume 2 part 1, page 281:
    103        *
    104        *     "MSDISPMODE_PERSAMPLE is required in order to select
    105        *      POSOFFSET_SAMPLE."
    106        */
    107       assert(io->posoffset != GEN6_POSOFFSET_SAMPLE);
    108 
    109       /*
    110        * From the Sandy Bridge PRM, volume 2 part 1, page 282:
    111        *
    112        *     "MSDISPMODE_PERSAMPLE is required in order to select
    113        *      INTERP_SAMPLE."
    114        *
    115        * From the Sandy Bridge PRM, volume 2 part 1, page 283:
    116        *
    117        *     "MSDISPMODE_PERSAMPLE is required in order to select Perspective
    118        *      Sample or Non-perspective Sample barycentric coordinates."
    119        */
    120       assert(!info->cv_per_sample_interp);
    121    }
    122 
    123    /*
    124     *
    125     * From the Sandy Bridge PRM, volume 2 part 1, page 314:
    126     *
    127     *     "Pixel Shader Dispatch, Alpha... must all be disabled."
    128     *
    129     * Simply disallow any valid kernel when there is early-z op.  Also, when
    130     * there is no valid kernel, io should be zeroed.
    131     */
    132    if (info->valid_kernels)
    133       assert(!info->cv_has_earlyz_op);
    134    else
    135       assert(ilo_is_zeroed(io, sizeof(*io)));
    136 
    137    return true;
    138 }
    139 
    140 static uint8_t
    141 ps_get_gen6_dispatch_modes(const struct ilo_dev *dev,
    142                            const struct ilo_state_ps_info *info)
    143 {
    144    const struct ilo_state_ps_io_info *io = &info->io;
    145    uint8_t dispatch_modes = info->valid_kernels;
    146 
    147    ILO_DEV_ASSERT(dev, 6, 8);
    148 
    149    if (!dispatch_modes)
    150       return 0;
    151 
    152    /*
    153     * From the Sandy Bridge PRM, volume 2 part 1, page 334:
    154     *
    155     *     "Not valid on [DevSNB] if 4x PERPIXEL mode with pixel shader
    156     *      computed depth."
    157     *
    158     *     "Valid on all products, except when in non-1x PERSAMPLE mode
    159     *      (applies to [DevSNB+] only)"
    160     *
    161     * From the Sandy Bridge PRM, volume 4 part 1, page 239:
    162     *
    163     *     "[DevSNB]: When Pixel Shader outputs oDepth and PS invocation mode
    164     *      is PERPIXEL, Message Type for Render Target Write must be SIMD8.
    165     *
    166     *      Errata: [DevSNB+]: When Pixel Shader outputs oMask, this message
    167     *      type is not supported: SIMD8 (including SIMD8_DUALSRC_xx)."
    168     *
    169     * It is really hard to follow what combinations are valid on what
    170     * platforms.  Judging from the restrictions on RT write messages on Gen6,
    171     * oDepth and oMask related issues should be Gen6-specific.  PERSAMPLE
    172     * issue should be universal, and disallows multiple dispatch modes.
    173     */
    174    if (ilo_dev_gen(dev) == ILO_GEN(6)) {
    175       if (io->pscdepth != GEN7_PSCDEPTH_OFF && !info->per_sample_dispatch)
    176          dispatch_modes &= GEN6_PS_DISPATCH_8;
    177       if (io->write_omask)
    178          dispatch_modes &= ~GEN6_PS_DISPATCH_8;
    179    }
    180    if (info->per_sample_dispatch && !info->sample_count_one) {
    181       /* prefer 32 over 16 over 8 */
    182       if (dispatch_modes & GEN6_PS_DISPATCH_32)
    183          dispatch_modes &= GEN6_PS_DISPATCH_32;
    184       else if (dispatch_modes & GEN6_PS_DISPATCH_16)
    185          dispatch_modes &= GEN6_PS_DISPATCH_16;
    186       else
    187          dispatch_modes &= GEN6_PS_DISPATCH_8;
    188    }
    189 
    190    /*
    191     * From the Broadwell PRM, volume 2b, page 149:
    192     *
    193     *     "When Render Target Fast Clear Enable is ENABLED or Render Target
    194     *      Resolve Type = RESOLVE_PARTIAL or RESOLVE_FULL, this bit (8 Pixel
    195     *      Dispatch or Dual-8 Pixel Dispatch Enable) must be DISABLED."
    196     */
    197    if (info->rt_clear_enable || info->rt_resolve_enable)
    198       dispatch_modes &= ~GEN6_PS_DISPATCH_8;
    199 
    200    assert(dispatch_modes);
    201 
    202    return dispatch_modes;
    203 }
    204 
    205 static uint16_t
    206 ps_get_gen6_thread_count(const struct ilo_dev *dev,
    207                          const struct ilo_state_ps_info *info)
    208 {
    209    uint16_t thread_count;
    210 
    211    ILO_DEV_ASSERT(dev, 6, 8);
    212 
    213    /* Maximum Number of Threads of 3DSTATE_PS */
    214    switch (ilo_dev_gen(dev)) {
    215    case ILO_GEN(8):
    216       /* scaled automatically */
    217       thread_count = 64 - 1;
    218       break;
    219    case ILO_GEN(7.5):
    220       thread_count = (dev->gt == 3) ? 408 :
    221                      (dev->gt == 2) ? 204 : 102;
    222       break;
    223    case ILO_GEN(7):
    224       thread_count = (dev->gt == 2) ? 172 : 48;
    225       break;
    226    case ILO_GEN(6):
    227    default:
    228       /* from the classic driver instead of the PRM */
    229       thread_count = (dev->gt == 2) ? 80 : 40;
    230       break;
    231    }
    232 
    233    return thread_count - 1;
    234 }
    235 
    236 static bool
    237 ps_params_get_gen6_kill_pixel(const struct ilo_dev *dev,
    238                               const struct ilo_state_ps_params_info *params,
    239                               const struct ilo_state_ps_dispatch_conds *conds)
    240 {
    241    ILO_DEV_ASSERT(dev, 6, 8);
    242 
    243    /*
    244     * From the Sandy Bridge PRM, volume 2 part 1, page 275:
    245     *
    246     *     "This bit (Pixel Shader Kill Pixel), if ENABLED, indicates that the
    247     *      PS kernel or color calculator has the ability to kill (discard)
    248     *      pixels or samples, other than due to depth or stencil testing.
    249     *      This bit is required to be ENABLED in the following situations:
    250     *
    251     *      The API pixel shader program contains "killpix" or "discard"
    252     *      instructions, or other code in the pixel shader kernel that can
    253     *      cause the final pixel mask to differ from the pixel mask received
    254     *      on dispatch.
    255     *
    256     *      A sampler with chroma key enabled with kill pixel mode is used by
    257     *      the pixel shader.
    258     *
    259     *      Any render target has Alpha Test Enable or AlphaToCoverage Enable
    260     *      enabled.
    261     *
    262     *      The pixel shader kernel generates and outputs oMask.
    263     *
    264     *      Note: As ClipDistance clipping is fully supported in hardware and
    265     *      therefore not via PS instructions, there should be no need to
    266     *      ENABLE this bit due to ClipDistance clipping."
    267     */
    268    return (conds->ps_may_kill || params->alpha_may_kill);
    269 }
    270 
    271 static bool
    272 ps_params_get_gen6_dispatch_enable(const struct ilo_dev *dev,
    273                                    const struct ilo_state_ps_params_info *params,
    274                                    const struct ilo_state_ps_dispatch_conds *conds)
    275 {
    276    /*
    277     * We want to skip dispatching when EarlyZ suffices.  The conditions that
    278     * require dispatching are
    279     *
    280     *  - PS writes RTs and RTs are writeable
    281     *  - PS changes depth value and depth test/write is enabled
    282     *  - PS changes stencil value and stencil test is enabled
    283     *  - PS writes UAVs
    284     *  - PS or CC kills pixels
    285     *  - EDSC is PSEXEC, and depth test/write or stencil test is enabled
    286     */
    287    bool dispatch_required =
    288       ((conds->has_rt_write && params->has_writeable_rt) ||
    289        conds->write_odepth ||
    290        conds->write_ostencil ||
    291        conds->has_uav_write ||
    292        ps_params_get_gen6_kill_pixel(dev, params, conds) ||
    293        params->earlyz_control_psexec);
    294 
    295    ILO_DEV_ASSERT(dev, 6, 8);
    296 
    297    /*
    298     * From the Ivy Bridge PRM, volume 2 part 1, page 280:
    299     *
    300     *     "If EDSC_PSEXEC mode is selected, Thread Dispatch Enable must be
    301     *      set."
    302     */
    303    if (ilo_dev_gen(dev) < ILO_GEN(8) && params->earlyz_control_psexec)
    304       dispatch_required = true;
    305 
    306    /* assert it is valid to dispatch */
    307    if (dispatch_required)
    308       assert(conds->ps_valid);
    309 
    310    return dispatch_required;
    311 }
    312 
    313 static bool
    314 ps_get_gen6_ff_kernels(const struct ilo_dev *dev,
    315                        const struct ilo_state_ps_info *info,
    316                        struct pixel_ff *ff)
    317 {
    318    const struct ilo_state_shader_kernel_info *kernel_8 = &info->kernel_8;
    319    const struct ilo_state_shader_kernel_info *kernel_16 = &info->kernel_16;
    320    const struct ilo_state_shader_kernel_info *kernel_32 = &info->kernel_32;
    321 
    322    ILO_DEV_ASSERT(dev, 6, 8);
    323 
    324    ff->dispatch_modes = ps_get_gen6_dispatch_modes(dev, info);
    325 
    326    /* initialize kernel offsets and GRF starts */
    327    if (util_is_power_of_two(ff->dispatch_modes)) {
    328       if (ff->dispatch_modes & GEN6_PS_DISPATCH_8) {
    329          ff->kernel_offsets[0] = kernel_8->offset;
    330          ff->grf_starts[0] = kernel_8->grf_start;
    331       } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_16) {
    332          ff->kernel_offsets[0] = kernel_16->offset;
    333          ff->grf_starts[0] = kernel_16->grf_start;
    334       } else if (ff->dispatch_modes & GEN6_PS_DISPATCH_32) {
    335          ff->kernel_offsets[0] = kernel_32->offset;
    336          ff->grf_starts[0] = kernel_32->grf_start;
    337       }
    338    } else {
    339       ff->kernel_offsets[0] = kernel_8->offset;
    340       ff->kernel_offsets[1] = kernel_32->offset;
    341       ff->kernel_offsets[2] = kernel_16->offset;
    342 
    343       ff->grf_starts[0] = kernel_8->grf_start;
    344       ff->grf_starts[1] = kernel_32->grf_start;
    345       ff->grf_starts[2] = kernel_16->grf_start;
    346    }
    347 
    348    /* we do not want to save it */
    349    assert(ff->kernel_offsets[0] == 0);
    350 
    351    ff->pcb_enable = (((ff->dispatch_modes & GEN6_PS_DISPATCH_8) &&
    352                       kernel_8->pcb_attr_count) ||
    353                      ((ff->dispatch_modes & GEN6_PS_DISPATCH_16) &&
    354                       kernel_16->pcb_attr_count) ||
    355                      ((ff->dispatch_modes & GEN6_PS_DISPATCH_32) &&
    356                       kernel_32->pcb_attr_count));
    357 
    358    /* GPU hangs on Haswell if none of the dispatch mode bits is set */
    359    if (ilo_dev_gen(dev) == ILO_GEN(7.5) && !ff->dispatch_modes)
    360       ff->dispatch_modes |= GEN6_PS_DISPATCH_8;
    361 
    362    return true;
    363 }
    364 
    365 static bool
    366 ps_get_gen6_ff(const struct ilo_dev *dev,
    367                const struct ilo_state_ps_info *info,
    368                struct pixel_ff *ff)
    369 {
    370    const struct ilo_state_shader_resource_info *resource = &info->resource;
    371    const struct ilo_state_ps_io_info *io = &info->io;
    372    const struct ilo_state_ps_params_info *params = &info->params;
    373 
    374    ILO_DEV_ASSERT(dev, 6, 8);
    375 
    376    memset(ff, 0, sizeof(*ff));
    377 
    378    if (!ps_validate_gen6(dev, info) || !ps_get_gen6_ff_kernels(dev, info, ff))
    379       return false;
    380 
    381    if (info->per_thread_scratch_size) {
    382       /*
    383        * From the Sandy Bridge PRM, volume 2 part 1, page 271:
    384        *
    385        *     "(Per-Thread Scratch Space)
    386        *      Range  [0,11] indicating [1k bytes, 2M bytes] in powers of two"
    387        */
    388       assert(info->per_thread_scratch_size <= 2 * 1024 * 1024);
    389 
    390       /* next power of two, starting from 1KB */
    391       ff->per_thread_scratch_space = (info->per_thread_scratch_size > 1024) ?
    392          (util_last_bit(info->per_thread_scratch_size - 1) - 10) : 0;
    393       ff->per_thread_scratch_size = 1 << (10 + ff->per_thread_scratch_space);
    394    }
    395 
    396    ff->sampler_count = (resource->sampler_count <= 12) ?
    397       (resource->sampler_count + 3) / 4 : 4;
    398    ff->surface_count = resource->surface_count;
    399    ff->has_uav = resource->has_uav;
    400 
    401    ff->thread_count = ps_get_gen6_thread_count(dev, info);
    402 
    403    ff->conds.ps_valid = (info->valid_kernels != 0x0);
    404    ff->conds.has_rt_write = io->has_rt_write;
    405    ff->conds.write_odepth = (io->pscdepth != GEN7_PSCDEPTH_OFF);
    406    ff->conds.write_ostencil = false;
    407    ff->conds.has_uav_write = resource->has_uav;
    408    ff->conds.ps_may_kill = (io->write_pixel_mask || io->write_omask);
    409 
    410    ff->kill_pixel = ps_params_get_gen6_kill_pixel(dev, params, &ff->conds);
    411    ff->dispatch_enable =
    412       ps_params_get_gen6_dispatch_enable(dev, params, &ff->conds);
    413    ff->dual_source_blending = params->dual_source_blending;
    414    ff->sample_mask = params->sample_mask;
    415 
    416    return true;
    417 }
    418 
    419 static bool
    420 ps_set_gen6_3dstate_wm(struct ilo_state_ps *ps,
    421                        const struct ilo_dev *dev,
    422                        const struct ilo_state_ps_info *info,
    423                        const struct pixel_ff *ff)
    424 {
    425    const struct ilo_state_ps_io_info *io = &info->io;
    426    uint32_t dw2, dw3, dw4, dw5, dw6;
    427 
    428    ILO_DEV_ASSERT(dev, 6, 6);
    429 
    430    dw2 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
    431          ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
    432 
    433    if (false)
    434       dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
    435 
    436    dw3 = ff->per_thread_scratch_space <<
    437       GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
    438 
    439    dw4 = ff->grf_starts[0] << GEN6_WM_DW4_URB_GRF_START0__SHIFT |
    440          ff->grf_starts[1] << GEN6_WM_DW4_URB_GRF_START1__SHIFT |
    441          ff->grf_starts[2] << GEN6_WM_DW4_URB_GRF_START2__SHIFT;
    442 
    443    dw5 = ff->thread_count << GEN6_WM_DW5_MAX_THREADS__SHIFT |
    444          ff->dispatch_modes << GEN6_WM_DW5_PS_DISPATCH_MODE__SHIFT;
    445 
    446    if (ff->kill_pixel)
    447       dw5 |= GEN6_WM_DW5_PS_KILL_PIXEL;
    448 
    449    if (io->pscdepth != GEN7_PSCDEPTH_OFF)
    450       dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH;
    451    if (io->use_z)
    452       dw5 |= GEN6_WM_DW5_PS_USE_DEPTH;
    453 
    454    if (ff->dispatch_enable)
    455       dw5 |= GEN6_WM_DW5_PS_DISPATCH_ENABLE;
    456 
    457    if (io->write_omask)
    458       dw5 |= GEN6_WM_DW5_PS_COMPUTE_OMASK;
    459    if (io->use_w)
    460       dw5 |= GEN6_WM_DW5_PS_USE_W;
    461 
    462    if (ff->dual_source_blending)
    463       dw5 |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
    464 
    465    dw6 = io->attr_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT |
    466          io->posoffset << GEN6_WM_DW6_PS_POSOFFSET__SHIFT;
    467 
    468    dw6 |= (info->per_sample_dispatch) ?
    469       GEN6_WM_DW6_MSDISPMODE_PERSAMPLE : GEN6_WM_DW6_MSDISPMODE_PERPIXEL;
    470 
    471    STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 7);
    472    ps->ps[0] = dw2;
    473    ps->ps[1] = dw3;
    474    ps->ps[2] = dw4;
    475    ps->ps[3] = dw5;
    476    ps->ps[4] = dw6;
    477    ps->ps[5] = ff->kernel_offsets[1];
    478    ps->ps[6] = ff->kernel_offsets[2];
    479 
    480    return true;
    481 }
    482 
    483 static bool
    484 ps_set_gen7_3dstate_wm(struct ilo_state_ps *ps,
    485                        const struct ilo_dev *dev,
    486                        const struct ilo_state_ps_info *info,
    487                        const struct pixel_ff *ff)
    488 {
    489    const struct ilo_state_ps_io_info *io = &info->io;
    490    uint32_t dw1, dw2;
    491 
    492    ILO_DEV_ASSERT(dev, 7, 7.5);
    493 
    494    dw1 = io->pscdepth << GEN7_WM_DW1_PSCDEPTH__SHIFT;
    495 
    496    if (ff->dispatch_enable)
    497       dw1 |= GEN7_WM_DW1_PS_DISPATCH_ENABLE;
    498    if (ff->kill_pixel)
    499       dw1 |= GEN7_WM_DW1_PS_KILL_PIXEL;
    500 
    501    if (io->use_z)
    502       dw1 |= GEN7_WM_DW1_PS_USE_DEPTH;
    503    if (io->use_w)
    504       dw1 |= GEN7_WM_DW1_PS_USE_W;
    505    if (io->use_coverage_mask)
    506       dw1 |= GEN7_WM_DW1_PS_USE_COVERAGE_MASK;
    507 
    508    dw2 = (info->per_sample_dispatch) ?
    509       GEN7_WM_DW2_MSDISPMODE_PERSAMPLE : GEN7_WM_DW2_MSDISPMODE_PERPIXEL;
    510 
    511    STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 2);
    512    ps->ps[0] = dw1;
    513    ps->ps[1] = dw2;
    514 
    515    return true;
    516 }
    517 
    518 static bool
    519 ps_set_gen7_3DSTATE_PS(struct ilo_state_ps *ps,
    520                        const struct ilo_dev *dev,
    521                        const struct ilo_state_ps_info *info,
    522                        const struct pixel_ff *ff)
    523 {
    524    const struct ilo_state_ps_io_info *io = &info->io;
    525    uint32_t dw2, dw3, dw4, dw5;
    526 
    527    ILO_DEV_ASSERT(dev, 7, 7.5);
    528 
    529    dw2 = ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
    530          ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
    531 
    532    if (false)
    533       dw2 |= GEN6_THREADDISP_FP_MODE_ALT;
    534 
    535    dw3 = ff->per_thread_scratch_space <<
    536       GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
    537 
    538    dw4 = io->posoffset << GEN7_PS_DW4_POSOFFSET__SHIFT |
    539          ff->dispatch_modes << GEN7_PS_DW4_DISPATCH_MODE__SHIFT;
    540 
    541    if (ilo_dev_gen(dev) == ILO_GEN(7.5)) {
    542       dw4 |= ff->thread_count << GEN75_PS_DW4_MAX_THREADS__SHIFT |
    543              (ff->sample_mask & 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
    544    } else {
    545       dw4 |= ff->thread_count << GEN7_PS_DW4_MAX_THREADS__SHIFT;
    546    }
    547 
    548    if (ff->pcb_enable)
    549       dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE;
    550    if (io->attr_count)
    551       dw4 |= GEN7_PS_DW4_ATTR_ENABLE;
    552    if (io->write_omask)
    553       dw4 |= GEN7_PS_DW4_COMPUTE_OMASK;
    554    if (info->rt_clear_enable)
    555       dw4 |= GEN7_PS_DW4_RT_FAST_CLEAR;
    556    if (ff->dual_source_blending)
    557       dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
    558    if (info->rt_resolve_enable)
    559       dw4 |= GEN7_PS_DW4_RT_RESOLVE;
    560    if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && ff->has_uav)
    561       dw4 |= GEN75_PS_DW4_ACCESS_UAV;
    562 
    563    dw5 = ff->grf_starts[0] << GEN7_PS_DW5_URB_GRF_START0__SHIFT |
    564          ff->grf_starts[1] << GEN7_PS_DW5_URB_GRF_START1__SHIFT |
    565          ff->grf_starts[2] << GEN7_PS_DW5_URB_GRF_START2__SHIFT;
    566 
    567    STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 8);
    568    ps->ps[2] = dw2;
    569    ps->ps[3] = dw3;
    570    ps->ps[4] = dw4;
    571    ps->ps[5] = dw5;
    572    ps->ps[6] = ff->kernel_offsets[1];
    573    ps->ps[7] = ff->kernel_offsets[2];
    574 
    575    return true;
    576 }
    577 
    578 static bool
    579 ps_set_gen8_3DSTATE_PS(struct ilo_state_ps *ps,
    580                        const struct ilo_dev *dev,
    581                        const struct ilo_state_ps_info *info,
    582                        const struct pixel_ff *ff)
    583 {
    584    const struct ilo_state_ps_io_info *io = &info->io;
    585    uint32_t dw3, dw4, dw6, dw7;
    586 
    587    ILO_DEV_ASSERT(dev, 8, 8);
    588 
    589    /*
    590     * Set VME here for correct computation of LODs and others.  Not sure why
    591     * it is needed now.
    592     */
    593    dw3 = GEN6_THREADDISP_VME |
    594          ff->sampler_count << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT |
    595          ff->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT;
    596 
    597    if (false)
    598       dw3 |= GEN6_THREADDISP_FP_MODE_ALT;
    599 
    600    dw4 = ff->per_thread_scratch_space <<
    601       GEN6_THREADSCRATCH_SPACE_PER_THREAD__SHIFT;
    602 
    603    dw6 = ff->thread_count << GEN8_PS_DW6_MAX_THREADS__SHIFT |
    604          io->posoffset << GEN8_PS_DW6_POSOFFSET__SHIFT |
    605          ff->dispatch_modes << GEN8_PS_DW6_DISPATCH_MODE__SHIFT;
    606 
    607    if (ff->pcb_enable)
    608       dw6 |= GEN8_PS_DW6_PUSH_CONSTANT_ENABLE;
    609 
    610    if (info->rt_clear_enable)
    611       dw6 |= GEN8_PS_DW6_RT_FAST_CLEAR;
    612    if (info->rt_resolve_enable)
    613       dw6 |= GEN8_PS_DW6_RT_RESOLVE;
    614 
    615    dw7 = ff->grf_starts[0] << GEN8_PS_DW7_URB_GRF_START0__SHIFT |
    616          ff->grf_starts[1] << GEN8_PS_DW7_URB_GRF_START1__SHIFT |
    617          ff->grf_starts[2] << GEN8_PS_DW7_URB_GRF_START2__SHIFT;
    618 
    619    STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 6);
    620    ps->ps[0] = dw3;
    621    ps->ps[1] = dw4;
    622    ps->ps[2] = dw6;
    623    ps->ps[3] = dw7;
    624    ps->ps[4] = ff->kernel_offsets[1];
    625    ps->ps[5] = ff->kernel_offsets[2];
    626 
    627    return true;
    628 }
    629 
    630 static bool
    631 ps_set_gen8_3DSTATE_PS_EXTRA(struct ilo_state_ps *ps,
    632                              const struct ilo_dev *dev,
    633                              const struct ilo_state_ps_info *info,
    634                              const struct pixel_ff *ff)
    635 {
    636    const struct ilo_state_ps_io_info *io = &info->io;
    637    uint32_t dw1;
    638 
    639    ILO_DEV_ASSERT(dev, 8, 8);
    640 
    641    dw1 = io->pscdepth << GEN8_PSX_DW1_PSCDEPTH__SHIFT;
    642 
    643    if (info->valid_kernels)
    644       dw1 |= GEN8_PSX_DW1_VALID;
    645    if (!io->has_rt_write)
    646       dw1 |= GEN8_PSX_DW1_UAV_ONLY;
    647    if (io->write_omask)
    648       dw1 |= GEN8_PSX_DW1_COMPUTE_OMASK;
    649    if (io->write_pixel_mask)
    650       dw1 |= GEN8_PSX_DW1_KILL_PIXEL;
    651 
    652    if (io->use_z)
    653       dw1 |= GEN8_PSX_DW1_USE_DEPTH;
    654    if (io->use_w)
    655       dw1 |= GEN8_PSX_DW1_USE_W;
    656    if (io->attr_count)
    657       dw1 |= GEN8_PSX_DW1_ATTR_ENABLE;
    658 
    659    if (info->per_sample_dispatch)
    660       dw1 |= GEN8_PSX_DW1_PER_SAMPLE;
    661    if (ff->has_uav)
    662       dw1 |= GEN8_PSX_DW1_ACCESS_UAV;
    663    if (io->use_coverage_mask)
    664       dw1 |= GEN8_PSX_DW1_USE_COVERAGE_MASK;
    665 
    666    /*
    667     * From the Broadwell PRM, volume 2b, page 151:
    668     *
    669     *     "When this bit (Pixel Shader Valid) clear the rest of this command
    670     *      should also be clear.
    671     */
    672    if (!info->valid_kernels)
    673       dw1 = 0;
    674 
    675    STATIC_ASSERT(ARRAY_SIZE(ps->ps) >= 5);
    676    ps->ps[4] = dw1;
    677 
    678    return true;
    679 }
    680 
    681 bool
    682 ilo_state_ps_init(struct ilo_state_ps *ps,
    683                   const struct ilo_dev *dev,
    684                   const struct ilo_state_ps_info *info)
    685 {
    686    struct pixel_ff ff;
    687    bool ret = true;
    688 
    689    assert(ilo_is_zeroed(ps, sizeof(*ps)));
    690 
    691    ret &= ps_get_gen6_ff(dev, info, &ff);
    692 
    693    if (ilo_dev_gen(dev) >= ILO_GEN(8)) {
    694       ret &= ps_set_gen8_3DSTATE_PS(ps, dev, info, &ff);
    695       ret &= ps_set_gen8_3DSTATE_PS_EXTRA(ps, dev, info, &ff);
    696    } else if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
    697       ret &= ps_set_gen7_3dstate_wm(ps, dev, info, &ff);
    698       ret &= ps_set_gen7_3DSTATE_PS(ps, dev, info, &ff);
    699    } else {
    700       ret &= ps_set_gen6_3dstate_wm(ps, dev, info, &ff);
    701    }
    702 
    703    ps->scratch_size = ff.per_thread_scratch_size * ff.thread_count;
    704    /* save conditions */
    705    ps->conds = ff.conds;
    706 
    707    assert(ret);
    708 
    709    return ret;
    710 }
    711 
    712 bool
    713 ilo_state_ps_init_disabled(struct ilo_state_ps *ps,
    714                            const struct ilo_dev *dev)
    715 {
    716    struct ilo_state_ps_info info;
    717 
    718    memset(&info, 0, sizeof(info));
    719 
    720    return ilo_state_ps_init(ps, dev, &info);
    721 }
    722 
    723 bool
    724 ilo_state_ps_set_params(struct ilo_state_ps *ps,
    725                         const struct ilo_dev *dev,
    726                         const struct ilo_state_ps_params_info *params)
    727 {
    728    ILO_DEV_ASSERT(dev, 6, 8);
    729 
    730    /* modify sample mask */
    731    if (ilo_dev_gen(dev) == ILO_GEN(7.5)) {
    732       ps->ps[4] = (ps->ps[4] & ~GEN75_PS_DW4_SAMPLE_MASK__MASK) |
    733          (params->sample_mask & 0xff) << GEN75_PS_DW4_SAMPLE_MASK__SHIFT;
    734    }
    735 
    736    /* modify dispatch enable, pixel kill, and dual source blending */
    737    if (ilo_dev_gen(dev) < ILO_GEN(8)) {
    738       if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
    739          if (ps_params_get_gen6_dispatch_enable(dev, params, &ps->conds))
    740             ps->ps[0] |= GEN7_WM_DW1_PS_DISPATCH_ENABLE;
    741          else
    742             ps->ps[0] &= ~GEN7_WM_DW1_PS_DISPATCH_ENABLE;
    743 
    744          if (ps_params_get_gen6_kill_pixel(dev, params, &ps->conds))
    745             ps->ps[0] |= GEN7_WM_DW1_PS_KILL_PIXEL;
    746          else
    747             ps->ps[0] &= ~GEN7_WM_DW1_PS_KILL_PIXEL;
    748 
    749          if (params->dual_source_blending)
    750             ps->ps[4] |= GEN7_PS_DW4_DUAL_SOURCE_BLEND;
    751          else
    752             ps->ps[4] &= ~GEN7_PS_DW4_DUAL_SOURCE_BLEND;
    753       } else {
    754          if (ps_params_get_gen6_dispatch_enable(dev, params, &ps->conds))
    755             ps->ps[3] |= GEN6_WM_DW5_PS_DISPATCH_ENABLE;
    756          else
    757             ps->ps[3] &= ~GEN6_WM_DW5_PS_DISPATCH_ENABLE;
    758 
    759          if (ps_params_get_gen6_kill_pixel(dev, params, &ps->conds))
    760             ps->ps[3] |= GEN6_WM_DW5_PS_KILL_PIXEL;
    761          else
    762             ps->ps[3] &= ~GEN6_WM_DW5_PS_KILL_PIXEL;
    763 
    764          if (params->dual_source_blending)
    765             ps->ps[3] |= GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
    766          else
    767             ps->ps[3] &= ~GEN6_WM_DW5_PS_DUAL_SOURCE_BLEND;
    768       }
    769    }
    770 
    771    return true;
    772 }
    773