1 /* 2 * Copyright 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include "compiler/nir/nir_builder.h" 25 #include "brw_nir.h" 26 27 /** 28 * Implements the WaPreventHSTessLevelsInterference workaround (for Gen7-8). 29 * 30 * From the Broadwell PRM, Volume 7 (3D-Media-GPGPU), Page 494 (below the 31 * definition of the patch header layouts): 32 * 33 * "HW Bug: The Tessellation stage will incorrectly add domain points 34 * along patch edges under the following conditions, which may result 35 * in conformance failures and/or cracking artifacts: 36 * 37 * * QUAD domain 38 * * INTEGER partitioning 39 * * All three TessFactors in a given U or V direction (e.g., V 40 * direction: UEQ0, InsideV, UEQ1) are all exactly 1.0 41 * * All three TessFactors in the other direction are > 1.0 and all 42 * round up to the same integer value (e.g, U direction: 43 * VEQ0 = 3.1, InsideU = 3.7, VEQ1 = 3.4) 44 * 45 * The suggested workaround (to be implemented as part of the postamble 46 * to the HS shader in the HS kernel) is: 47 * 48 * if ( 49 * (TF[UEQ0] > 1.0) || 50 * (TF[VEQ0] > 1.0) || 51 * (TF[UEQ1] > 1.0) || 52 * (TF[VEQ1] > 1.0) || 53 * (TF[INSIDE_U] > 1.0) || 54 * (TF[INSIDE_V] > 1.0) ) 55 * { 56 * TF[INSIDE_U] = (TF[INSIDE_U] == 1.0) ? 2.0 : TF[INSIDE_U]; 57 * TF[INSIDE_V] = (TF[INSIDE_V] == 1.0) ? 2.0 : TF[INSIDE_V]; 58 * }" 59 * 60 * There's a subtlety here. Intel internal HSD-ES bug 1208668495 notes 61 * that the above workaround fails to fix certain GL/ES CTS tests which 62 * have inside tessellation factors of -1.0. This can be explained by 63 * a quote from the ARB_tessellation_shader specification: 64 * 65 * "If "equal_spacing" is used, the floating-point tessellation level is 66 * first clamped to the range [1,<max>], where <max> is implementation- 67 * dependent maximum tessellation level (MAX_TESS_GEN_LEVEL)." 68 * 69 * In other words, the actual inner tessellation factor used is 70 * clamp(TF[INSIDE_*], 1.0, 64.0). So we want to compare the clamped 71 * value against 1.0. To accomplish this, we change the comparison from 72 * (TF[INSIDE_*] == 1.0) to (TF[INSIDE_*] <= 1.0). 73 */ 74 75 static inline nir_ssa_def * 76 load_output(nir_builder *b, int num_components, int offset, int component) 77 { 78 nir_intrinsic_instr *load = 79 nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_output); 80 nir_ssa_dest_init(&load->instr, &load->dest, num_components, 32, NULL); 81 load->num_components = num_components; 82 load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); 83 nir_intrinsic_set_base(load, offset); 84 nir_intrinsic_set_component(load, component); 85 86 nir_builder_instr_insert(b, &load->instr); 87 88 return &load->dest.ssa; 89 } 90 91 static void 92 emit_quads_workaround(nir_builder *b, nir_block *block) 93 { 94 b->cursor = nir_after_block_before_jump(block); 95 96 nir_ssa_def *inner = load_output(b, 2, 0, 2); 97 nir_ssa_def *outer = load_output(b, 4, 1, 0); 98 99 nir_ssa_def *any_greater_than_1 = 100 nir_ior(b, nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), outer)), 101 nir_bany(b, nir_flt(b, nir_imm_float(b, 1.0f), inner))); 102 103 nir_if *if_stmt = nir_if_create(b->shader); 104 if_stmt->condition = nir_src_for_ssa(any_greater_than_1); 105 nir_builder_cf_insert(b, &if_stmt->cf_node); 106 107 /* Fill out the new then-block */ 108 b->cursor = nir_after_cf_list(&if_stmt->then_list); 109 110 inner = nir_bcsel(b, nir_fge(b, nir_imm_float(b, 1.0f), inner), 111 nir_imm_float(b, 2.0f), inner); 112 113 nir_intrinsic_instr *store = 114 nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output); 115 store->num_components = 2; 116 nir_intrinsic_set_write_mask(store, WRITEMASK_XY); 117 nir_intrinsic_set_component(store, 2); 118 store->src[0] = nir_src_for_ssa(inner); 119 store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); 120 nir_builder_instr_insert(b, &store->instr); 121 } 122 123 void 124 brw_nir_apply_tcs_quads_workaround(nir_shader *nir) 125 { 126 assert(nir->stage == MESA_SHADER_TESS_CTRL); 127 128 nir_function_impl *impl = nir_shader_get_entrypoint(nir); 129 130 nir_builder b; 131 nir_builder_init(&b, impl); 132 133 /* emit_quads_workaround() inserts an if statement into each block, 134 * which splits it in two. This changes the set of predecessors of 135 * the end block. We want to process the original set, so to be safe, 136 * save it off to an array first. 137 */ 138 const unsigned num_end_preds = impl->end_block->predecessors->entries; 139 nir_block *end_preds[num_end_preds]; 140 unsigned i = 0; 141 struct set_entry *entry; 142 143 set_foreach(impl->end_block->predecessors, entry) { 144 end_preds[i++] = (nir_block *) entry->key; 145 } 146 147 for (i = 0; i < num_end_preds; i++) { 148 emit_quads_workaround(&b, end_preds[i]); 149 } 150 151 nir_metadata_preserve(impl, 0); 152 } 153