Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2016 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include "compiler/nir/nir_builder.h"
     25 #include "brw_nir.h"
     26 
     27 /**
     28  * Implements the WaPreventHSTessLevelsInterference workaround (for Gen7-8).
     29  *
     30  * From the Broadwell PRM, Volume 7 (3D-Media-GPGPU), Page 494 (below the
     31  * definition of the patch header layouts):
     32  *
     33  *    "HW Bug: The Tessellation stage will incorrectly add domain points
     34  *     along patch edges under the following conditions, which may result
     35  *     in conformance failures and/or cracking artifacts:
     36  *
     37  *       * QUAD domain
     38  *       * INTEGER partitioning
     39  *       * All three TessFactors in a given U or V direction (e.g., V
     40  *         direction: UEQ0, InsideV, UEQ1) are all exactly 1.0
     41  *       * All three TessFactors in the other direction are > 1.0 and all
     42  *         round up to the same integer value (e.g, U direction:
     43  *         VEQ0 = 3.1, InsideU = 3.7, VEQ1 = 3.4)
     44  *
     45  *     The suggested workaround (to be implemented as part of the postamble
     46  *     to the HS shader in the HS kernel) is:
     47  *
     48  *     if (
     49  *        (TF[UEQ0] > 1.0) ||
     50  *        (TF[VEQ0] > 1.0) ||
     51  *        (TF[UEQ1] > 1.0) ||
     52  *        (TF[VEQ1] > 1.0) ||
     53  *        (TF[INSIDE_U] > 1.0) ||
     54  *        (TF[INSIDE_V] > 1.0) )
     55  *     {
     56  *        TF[INSIDE_U] = (TF[INSIDE_U] == 1.0) ? 2.0 : TF[INSIDE_U];
     57  *        TF[INSIDE_V] = (TF[INSIDE_V] == 1.0) ? 2.0 : TF[INSIDE_V];
     58  *     }"
     59  *
     60  * There's a subtlety here.  Intel internal HSD-ES bug 1208668495 notes
     61  * that the above workaround fails to fix certain GL/ES CTS tests which
     62  * have inside tessellation factors of -1.0.  This can be explained by
     63  * a quote from the ARB_tessellation_shader specification:
     64  *
     65  *    "If "equal_spacing" is used, the floating-point tessellation level is
     66  *     first clamped to the range [1,<max>], where <max> is implementation-
     67  *     dependent maximum tessellation level (MAX_TESS_GEN_LEVEL)."
     68  *
     69  * In other words, the actual inner tessellation factor used is
     70  * clamp(TF[INSIDE_*], 1.0, 64.0).  So we want to compare the clamped
     71  * value against 1.0.  To accomplish this, we change the comparison from
     72  * (TF[INSIDE_*] == 1.0) to (TF[INSIDE_*] <= 1.0).
     73  */
     74 
     75 static inline nir_ssa_def *
     76 load_output(nir_builder *b, int num_components, int offset, int component)
     77 {
     78    nir_intrinsic_instr *load =
     79       nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_output);
     80    nir_ssa_dest_init(&load->instr, &load->dest, num_components, 32, NULL);
     81    load->num_components = num_components;
     82    load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
     83    nir_intrinsic_set_base(load, offset);
     84    nir_intrinsic_set_component(load, component);
     85 
     86    nir_builder_instr_insert(b, &load->instr);
     87 
     88    return &load->dest.ssa;
     89 }
     90 
     91 static void
     92 emit_quads_workaround(nir_builder *b, nir_block *block)
     93 {
     94    b->cursor = nir_after_block_before_jump(block);
     95 
     96    nir_ssa_def *inner = load_output(b, 2, 0, 2);
     97    nir_ssa_def *outer = load_output(b, 4, 1, 0);
     98 
     99    nir_ssa_def *any_greater_than_1 =
    100        nir_ior(b, nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), outer)),
    101                   nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), inner)));
    102 
    103    nir_if *if_stmt = nir_if_create(b->shader);
    104    if_stmt->condition = nir_src_for_ssa(any_greater_than_1);
    105    nir_builder_cf_insert(b, &if_stmt->cf_node);
    106 
    107    /* Fill out the new then-block */
    108    b->cursor = nir_after_cf_list(&if_stmt->then_list);
    109 
    110    inner = nir_bcsel(b, nir_fge(b, nir_imm_float(b, 1.0f), inner),
    111                         nir_imm_float(b, 2.0f), inner);
    112 
    113    nir_intrinsic_instr *store =
    114       nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
    115    store->num_components = 2;
    116    nir_intrinsic_set_write_mask(store, WRITEMASK_XY);
    117    nir_intrinsic_set_component(store, 2);
    118    store->src[0] = nir_src_for_ssa(inner);
    119    store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
    120    nir_builder_instr_insert(b, &store->instr);
    121 }
    122 
    123 void
    124 brw_nir_apply_tcs_quads_workaround(nir_shader *nir)
    125 {
    126    assert(nir->stage == MESA_SHADER_TESS_CTRL);
    127 
    128    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
    129 
    130    nir_builder b;
    131    nir_builder_init(&b, impl);
    132 
    133    /* emit_quads_workaround() inserts an if statement into each block,
    134     * which splits it in two.  This changes the set of predecessors of
    135     * the end block.  We want to process the original set, so to be safe,
    136     * save it off to an array first.
    137     */
    138    const unsigned num_end_preds = impl->end_block->predecessors->entries;
    139    nir_block *end_preds[num_end_preds];
    140    unsigned i = 0;
    141    struct set_entry *entry;
    142 
    143    set_foreach(impl->end_block->predecessors, entry) {
    144       end_preds[i++] = (nir_block *) entry->key;
    145    }
    146 
    147    for (i = 0; i < num_end_preds; i++) {
    148       emit_quads_workaround(&b, end_preds[i]);
    149    }
    150 
    151    nir_metadata_preserve(impl, 0);
    152 }
    153