1 /* 2 * Copyright 2013 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /** 25 * \file brw_vec4_tes.cpp 26 * 27 * Tessellaton evaluation shader specific code derived from the vec4_visitor class. 28 */ 29 30 #include "brw_vec4_tes.h" 31 #include "brw_cfg.h" 32 33 namespace brw { 34 35 vec4_tes_visitor::vec4_tes_visitor(const struct brw_compiler *compiler, 36 void *log_data, 37 const struct brw_tes_prog_key *key, 38 struct brw_tes_prog_data *prog_data, 39 const nir_shader *shader, 40 void *mem_ctx, 41 int shader_time_index) 42 : vec4_visitor(compiler, log_data, &key->tex, &prog_data->base, 43 shader, mem_ctx, false, shader_time_index) 44 { 45 } 46 47 48 dst_reg * 49 vec4_tes_visitor::make_reg_for_system_value(int location) 50 { 51 return NULL; 52 } 53 54 void 55 vec4_tes_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr) 56 { 57 switch (instr->intrinsic) { 58 case nir_intrinsic_load_tess_level_outer: 59 case nir_intrinsic_load_tess_level_inner: 60 break; 61 default: 62 vec4_visitor::nir_setup_system_value_intrinsic(instr); 63 } 64 } 65 66 67 void 68 vec4_tes_visitor::setup_payload() 69 { 70 int reg = 0; 71 72 /* The payload always contains important data in r0 and r1, which contains 73 * the URB handles that are passed on to the URB write at the end 74 * of the thread. 75 */ 76 reg += 2; 77 78 reg = setup_uniforms(reg); 79 80 foreach_block_and_inst(block, vec4_instruction, inst, cfg) { 81 for (int i = 0; i < 3; i++) { 82 if (inst->src[i].file != ATTR) 83 continue; 84 85 bool is_64bit = type_sz(inst->src[i].type) == 8; 86 87 unsigned slot = inst->src[i].nr + inst->src[i].offset / 16; 88 struct brw_reg grf = brw_vec4_grf(reg + slot / 2, 4 * (slot % 2)); 89 grf = stride(grf, 0, is_64bit ? 2 : 4, 1); 90 grf.swizzle = inst->src[i].swizzle; 91 grf.type = inst->src[i].type; 92 grf.abs = inst->src[i].abs; 93 grf.negate = inst->src[i].negate; 94 95 /* For 64-bit attributes we can end up with components XY in the 96 * second half of a register and components ZW in the first half 97 * of the next. Fix it up here. 98 */ 99 if (is_64bit && grf.subnr > 0) { 100 /* We can't do swizzles that mix XY and ZW channels in this case. 101 * Such cases should have been handled by the scalarization pass. 102 */ 103 assert((brw_mask_for_swizzle(grf.swizzle) & 0x3) ^ 104 (brw_mask_for_swizzle(grf.swizzle) & 0xc)); 105 if (brw_mask_for_swizzle(grf.swizzle) & 0xc) { 106 grf.subnr = 0; 107 grf.nr++; 108 grf.swizzle -= BRW_SWIZZLE_ZZZZ; 109 } 110 } 111 112 inst->src[i] = grf; 113 } 114 } 115 116 reg += 8 * prog_data->urb_read_length; 117 118 this->first_non_payload_grf = reg; 119 } 120 121 122 void 123 vec4_tes_visitor::emit_prolog() 124 { 125 input_read_header = src_reg(this, glsl_type::uvec4_type); 126 emit(TES_OPCODE_CREATE_INPUT_READ_HEADER, dst_reg(input_read_header)); 127 128 this->current_annotation = NULL; 129 } 130 131 132 void 133 vec4_tes_visitor::emit_urb_write_header(int mrf) 134 { 135 /* No need to do anything for DS; an implied write to this MRF will be 136 * performed by VS_OPCODE_URB_WRITE. 137 */ 138 (void) mrf; 139 } 140 141 142 vec4_instruction * 143 vec4_tes_visitor::emit_urb_write_opcode(bool complete) 144 { 145 /* For DS, the URB writes end the thread. */ 146 if (complete) { 147 if (INTEL_DEBUG & DEBUG_SHADER_TIME) 148 emit_shader_time_end(); 149 } 150 151 vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); 152 inst->urb_write_flags = complete ? 153 BRW_URB_WRITE_EOT_COMPLETE : BRW_URB_WRITE_NO_FLAGS; 154 155 return inst; 156 } 157 158 void 159 vec4_tes_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) 160 { 161 const struct brw_tes_prog_data *tes_prog_data = 162 (const struct brw_tes_prog_data *) prog_data; 163 164 switch (instr->intrinsic) { 165 case nir_intrinsic_load_tess_coord: 166 /* gl_TessCoord is part of the payload in g1 channels 0-2 and 4-6. */ 167 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), 168 src_reg(brw_vec8_grf(1, 0)))); 169 break; 170 case nir_intrinsic_load_tess_level_outer: 171 if (tes_prog_data->domain == BRW_TESS_DOMAIN_ISOLINE) { 172 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), 173 swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), 174 BRW_SWIZZLE_ZWZW))); 175 } else { 176 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), 177 swizzle(src_reg(ATTR, 1, glsl_type::vec4_type), 178 BRW_SWIZZLE_WZYX))); 179 } 180 break; 181 case nir_intrinsic_load_tess_level_inner: 182 if (tes_prog_data->domain == BRW_TESS_DOMAIN_QUAD) { 183 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), 184 swizzle(src_reg(ATTR, 0, glsl_type::vec4_type), 185 BRW_SWIZZLE_WZYX))); 186 } else { 187 emit(MOV(get_nir_dest(instr->dest, BRW_REGISTER_TYPE_F), 188 src_reg(ATTR, 1, glsl_type::float_type))); 189 } 190 break; 191 case nir_intrinsic_load_primitive_id: 192 emit(TES_OPCODE_GET_PRIMITIVE_ID, 193 get_nir_dest(instr->dest, BRW_REGISTER_TYPE_UD)); 194 break; 195 196 case nir_intrinsic_load_input: 197 case nir_intrinsic_load_per_vertex_input: { 198 src_reg indirect_offset = get_indirect_offset(instr); 199 unsigned imm_offset = instr->const_index[0]; 200 src_reg header = input_read_header; 201 bool is_64bit = nir_dest_bit_size(instr->dest) == 64; 202 unsigned first_component = nir_intrinsic_component(instr); 203 if (is_64bit) 204 first_component /= 2; 205 206 if (indirect_offset.file != BAD_FILE) { 207 header = src_reg(this, glsl_type::uvec4_type); 208 emit(TES_OPCODE_ADD_INDIRECT_URB_OFFSET, dst_reg(header), 209 input_read_header, indirect_offset); 210 } else { 211 /* Arbitrarily only push up to 24 vec4 slots worth of data, 212 * which is 12 registers (since each holds 2 vec4 slots). 213 */ 214 const unsigned max_push_slots = 24; 215 if (imm_offset < max_push_slots) { 216 const glsl_type *src_glsl_type = 217 is_64bit ? glsl_type::dvec4_type : glsl_type::ivec4_type; 218 src_reg src = src_reg(ATTR, imm_offset, src_glsl_type); 219 src.swizzle = BRW_SWZ_COMP_INPUT(first_component); 220 221 const brw_reg_type dst_reg_type = 222 is_64bit ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_D; 223 emit(MOV(get_nir_dest(instr->dest, dst_reg_type), src)); 224 225 prog_data->urb_read_length = 226 MAX2(prog_data->urb_read_length, 227 DIV_ROUND_UP(imm_offset + (is_64bit ? 2 : 1), 2)); 228 break; 229 } 230 } 231 232 if (!is_64bit) { 233 dst_reg temp(this, glsl_type::ivec4_type); 234 vec4_instruction *read = 235 emit(VEC4_OPCODE_URB_READ, temp, src_reg(header)); 236 read->offset = imm_offset; 237 read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; 238 239 src_reg src = src_reg(temp); 240 src.swizzle = BRW_SWZ_COMP_INPUT(first_component); 241 242 /* Copy to target. We might end up with some funky writemasks landing 243 * in here, but we really don't want them in the above pseudo-ops. 244 */ 245 dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); 246 dst.writemask = brw_writemask_for_size(instr->num_components); 247 emit(MOV(dst, src)); 248 } else { 249 /* For 64-bit we need to load twice as many 32-bit components, and for 250 * dvec3/4 we need to emit 2 URB Read messages 251 */ 252 dst_reg temp(this, glsl_type::dvec4_type); 253 dst_reg temp_d = retype(temp, BRW_REGISTER_TYPE_D); 254 255 vec4_instruction *read = 256 emit(VEC4_OPCODE_URB_READ, temp_d, src_reg(header)); 257 read->offset = imm_offset; 258 read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; 259 260 if (instr->num_components > 2) { 261 read = emit(VEC4_OPCODE_URB_READ, byte_offset(temp_d, REG_SIZE), 262 src_reg(header)); 263 read->offset = imm_offset + 1; 264 read->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET; 265 } 266 267 src_reg temp_as_src = src_reg(temp); 268 temp_as_src.swizzle = BRW_SWZ_COMP_INPUT(first_component); 269 270 dst_reg shuffled(this, glsl_type::dvec4_type); 271 shuffle_64bit_data(shuffled, temp_as_src, false); 272 273 dst_reg dst = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_DF); 274 dst.writemask = brw_writemask_for_size(instr->num_components); 275 emit(MOV(dst, src_reg(shuffled))); 276 } 277 break; 278 } 279 default: 280 vec4_visitor::nir_emit_intrinsic(instr); 281 } 282 } 283 284 285 void 286 vec4_tes_visitor::emit_thread_end() 287 { 288 /* For DS, we always end the thread by emitting a single vertex. 289 * emit_urb_write_opcode() will take care of setting the eot flag on the 290 * SEND instruction. 291 */ 292 emit_vertex(); 293 } 294 295 } /* namespace brw */ 296