1 /* 2 * Copyright 2011 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #ifndef BRW_VEC4_H 25 #define BRW_VEC4_H 26 27 #include "brw_shader.h" 28 #include "brw_program.h" 29 30 #ifdef __cplusplus 31 #include "brw_ir_vec4.h" 32 #endif 33 34 #include "compiler/glsl/ir.h" 35 #include "compiler/nir/nir.h" 36 37 38 #ifdef __cplusplus 39 extern "C" { 40 #endif 41 42 const unsigned * 43 brw_vec4_generate_assembly(const struct brw_compiler *compiler, 44 void *log_data, 45 void *mem_ctx, 46 const nir_shader *nir, 47 struct brw_vue_prog_data *prog_data, 48 const struct cfg_t *cfg, 49 unsigned *out_assembly_size); 50 51 #ifdef __cplusplus 52 } /* extern "C" */ 53 54 namespace brw { 55 56 class vec4_live_variables; 57 58 /** 59 * The vertex shader front-end. 60 * 61 * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and 62 * fixed-function) into VS IR. 63 */ 64 class vec4_visitor : public backend_shader 65 { 66 public: 67 vec4_visitor(const struct brw_compiler *compiler, 68 void *log_data, 69 const struct brw_sampler_prog_key_data *key, 70 struct brw_vue_prog_data *prog_data, 71 const nir_shader *shader, 72 void *mem_ctx, 73 bool no_spills, 74 int shader_time_index); 75 virtual ~vec4_visitor(); 76 77 dst_reg dst_null_f() 78 { 79 return dst_reg(brw_null_reg()); 80 } 81 82 dst_reg dst_null_df() 83 { 84 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_DF)); 85 } 86 87 dst_reg dst_null_d() 88 { 89 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); 90 } 91 92 dst_reg dst_null_ud() 93 { 94 return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD)); 95 } 96 97 const struct brw_sampler_prog_key_data * const key_tex; 98 struct brw_vue_prog_data * const prog_data; 99 char *fail_msg; 100 bool failed; 101 102 /** 103 * GLSL IR currently being processed, which is associated with our 104 * driver IR instructions for debugging purposes. 105 */ 106 const void *base_ir; 107 const char *current_annotation; 108 109 int first_non_payload_grf; 110 unsigned int max_grf; 111 int *virtual_grf_start; 112 int *virtual_grf_end; 113 brw::vec4_live_variables *live_intervals; 114 dst_reg userplane[MAX_CLIP_PLANES]; 115 116 bool need_all_constants_in_pull_buffer; 117 118 /* Regs for vertex results. Generated at ir_variable visiting time 119 * for the ir->location's used. 120 */ 121 dst_reg output_reg[VARYING_SLOT_TESS_MAX][4]; 122 unsigned output_num_components[VARYING_SLOT_TESS_MAX][4]; 123 const char *output_reg_annotation[VARYING_SLOT_TESS_MAX]; 124 int uniforms; 125 126 src_reg shader_start_time; 127 128 bool run(); 129 void fail(const char *msg, ...); 130 131 int setup_uniforms(int payload_reg); 132 133 bool reg_allocate_trivial(); 134 bool reg_allocate(); 135 void evaluate_spill_costs(float *spill_costs, bool *no_spill); 136 int choose_spill_reg(struct ra_graph *g); 137 void spill_reg(int spill_reg); 138 void move_grf_array_access_to_scratch(); 139 void move_uniform_array_access_to_pull_constants(); 140 void move_push_constants_to_pull_constants(); 141 void split_uniform_registers(); 142 void pack_uniform_registers(); 143 void calculate_live_intervals(); 144 void invalidate_live_intervals(); 145 void split_virtual_grfs(); 146 bool opt_vector_float(); 147 bool opt_reduce_swizzle(); 148 bool dead_code_eliminate(); 149 int var_range_start(unsigned v, unsigned n) const; 150 int var_range_end(unsigned v, unsigned n) const; 151 bool virtual_grf_interferes(int a, int b); 152 bool opt_cmod_propagation(); 153 bool opt_copy_propagation(bool do_constant_prop = true); 154 bool opt_cse_local(bblock_t *block); 155 bool opt_cse(); 156 bool opt_algebraic(); 157 bool opt_register_coalesce(); 158 bool eliminate_find_live_channel(); 159 bool is_dep_ctrl_unsafe(const vec4_instruction *inst); 160 void opt_set_dependency_control(); 161 void opt_schedule_instructions(); 162 void convert_to_hw_regs(); 163 164 bool is_supported_64bit_region(vec4_instruction *inst, unsigned arg); 165 bool lower_simd_width(); 166 bool scalarize_df(); 167 bool lower_64bit_mad_to_mul_add(); 168 void apply_logical_swizzle(struct brw_reg *hw_reg, 169 vec4_instruction *inst, int arg); 170 171 vec4_instruction *emit(vec4_instruction *inst); 172 173 vec4_instruction *emit(enum opcode opcode); 174 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst); 175 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst, 176 const src_reg &src0); 177 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst, 178 const src_reg &src0, const src_reg &src1); 179 vec4_instruction *emit(enum opcode opcode, const dst_reg &dst, 180 const src_reg &src0, const src_reg &src1, 181 const src_reg &src2); 182 183 vec4_instruction *emit_before(bblock_t *block, 184 vec4_instruction *inst, 185 vec4_instruction *new_inst); 186 187 #define EMIT1(op) vec4_instruction *op(const dst_reg &, const src_reg &); 188 #define EMIT2(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &); 189 #define EMIT3(op) vec4_instruction *op(const dst_reg &, const src_reg &, const src_reg &, const src_reg &); 190 EMIT1(MOV) 191 EMIT1(NOT) 192 EMIT1(RNDD) 193 EMIT1(RNDE) 194 EMIT1(RNDZ) 195 EMIT1(FRC) 196 EMIT1(F32TO16) 197 EMIT1(F16TO32) 198 EMIT2(ADD) 199 EMIT2(MUL) 200 EMIT2(MACH) 201 EMIT2(MAC) 202 EMIT2(AND) 203 EMIT2(OR) 204 EMIT2(XOR) 205 EMIT2(DP3) 206 EMIT2(DP4) 207 EMIT2(DPH) 208 EMIT2(SHL) 209 EMIT2(SHR) 210 EMIT2(ASR) 211 vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1, 212 enum brw_conditional_mod condition); 213 vec4_instruction *IF(src_reg src0, src_reg src1, 214 enum brw_conditional_mod condition); 215 vec4_instruction *IF(enum brw_predicate predicate); 216 EMIT1(SCRATCH_READ) 217 EMIT2(SCRATCH_WRITE) 218 EMIT3(LRP) 219 EMIT1(BFREV) 220 EMIT3(BFE) 221 EMIT2(BFI1) 222 EMIT3(BFI2) 223 EMIT1(FBH) 224 EMIT1(FBL) 225 EMIT1(CBIT) 226 EMIT3(MAD) 227 EMIT2(ADDC) 228 EMIT2(SUBB) 229 EMIT1(DIM) 230 231 #undef EMIT1 232 #undef EMIT2 233 #undef EMIT3 234 235 int implied_mrf_writes(vec4_instruction *inst); 236 237 vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, 238 src_reg src0, src_reg src1); 239 240 vec4_instruction *emit_lrp(const dst_reg &dst, const src_reg &x, 241 const src_reg &y, const src_reg &a); 242 243 /** 244 * Copy any live channel from \p src to the first channel of the 245 * result. 246 */ 247 src_reg emit_uniformize(const src_reg &src); 248 249 src_reg fix_3src_operand(const src_reg &src); 250 src_reg resolve_source_modifiers(const src_reg &src); 251 252 vec4_instruction *emit_math(enum opcode opcode, const dst_reg &dst, const src_reg &src0, 253 const src_reg &src1 = src_reg()); 254 255 src_reg fix_math_operand(const src_reg &src); 256 257 void emit_pack_half_2x16(dst_reg dst, src_reg src0); 258 void emit_unpack_half_2x16(dst_reg dst, src_reg src0); 259 void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0); 260 void emit_unpack_snorm_4x8(const dst_reg &dst, src_reg src0); 261 void emit_pack_unorm_4x8(const dst_reg &dst, const src_reg &src0); 262 void emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0); 263 264 void emit_texture(ir_texture_opcode op, 265 dst_reg dest, 266 const glsl_type *dest_type, 267 src_reg coordinate, 268 int coord_components, 269 src_reg shadow_comparator, 270 src_reg lod, src_reg lod2, 271 src_reg sample_index, 272 uint32_t constant_offset, 273 src_reg offset_value, 274 src_reg mcs, 275 uint32_t surface, src_reg surface_reg, 276 src_reg sampler_reg); 277 278 src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate, 279 src_reg surface); 280 void emit_gen6_gather_wa(uint8_t wa, dst_reg dst); 281 282 void emit_ndc_computation(); 283 void emit_psiz_and_flags(dst_reg reg); 284 vec4_instruction *emit_generic_urb_slot(dst_reg reg, int varying, int comp); 285 virtual void emit_urb_slot(dst_reg reg, int varying); 286 287 void emit_shader_time_begin(); 288 void emit_shader_time_end(); 289 void emit_shader_time_write(int shader_time_subindex, src_reg value); 290 291 src_reg get_scratch_offset(bblock_t *block, vec4_instruction *inst, 292 src_reg *reladdr, int reg_offset); 293 void emit_scratch_read(bblock_t *block, vec4_instruction *inst, 294 dst_reg dst, 295 src_reg orig_src, 296 int base_offset); 297 void emit_scratch_write(bblock_t *block, vec4_instruction *inst, 298 int base_offset); 299 void emit_pull_constant_load(bblock_t *block, vec4_instruction *inst, 300 dst_reg dst, 301 src_reg orig_src, 302 int base_offset, 303 src_reg indirect); 304 void emit_pull_constant_load_reg(dst_reg dst, 305 src_reg surf_index, 306 src_reg offset, 307 bblock_t *before_block, 308 vec4_instruction *before_inst); 309 src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block, 310 vec4_instruction *inst, src_reg src); 311 312 void resolve_ud_negate(src_reg *reg); 313 314 bool lower_minmax(); 315 316 src_reg get_timestamp(); 317 318 void dump_instruction(backend_instruction *inst); 319 void dump_instruction(backend_instruction *inst, FILE *file); 320 321 bool is_high_sampler(src_reg sampler); 322 323 bool optimize_predicate(nir_alu_instr *instr, enum brw_predicate *predicate); 324 325 void emit_conversion_from_double(dst_reg dst, src_reg src, bool saturate, 326 brw_reg_type single_type); 327 void emit_conversion_to_double(dst_reg dst, src_reg src, bool saturate, 328 brw_reg_type single_type); 329 330 src_reg setup_imm_df(double v); 331 332 vec4_instruction *shuffle_64bit_data(dst_reg dst, src_reg src, 333 bool for_write, 334 bblock_t *block = NULL, 335 vec4_instruction *ref = NULL); 336 337 virtual void emit_nir_code(); 338 virtual void nir_setup_uniforms(); 339 virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr); 340 virtual void nir_setup_system_values(); 341 virtual void nir_emit_impl(nir_function_impl *impl); 342 virtual void nir_emit_cf_list(exec_list *list); 343 virtual void nir_emit_if(nir_if *if_stmt); 344 virtual void nir_emit_loop(nir_loop *loop); 345 virtual void nir_emit_block(nir_block *block); 346 virtual void nir_emit_instr(nir_instr *instr); 347 virtual void nir_emit_load_const(nir_load_const_instr *instr); 348 virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr); 349 virtual void nir_emit_alu(nir_alu_instr *instr); 350 virtual void nir_emit_jump(nir_jump_instr *instr); 351 virtual void nir_emit_texture(nir_tex_instr *instr); 352 virtual void nir_emit_undef(nir_ssa_undef_instr *instr); 353 virtual void nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr); 354 355 dst_reg get_nir_dest(const nir_dest &dest, enum brw_reg_type type); 356 dst_reg get_nir_dest(const nir_dest &dest, nir_alu_type type); 357 dst_reg get_nir_dest(const nir_dest &dest); 358 src_reg get_nir_src(const nir_src &src, enum brw_reg_type type, 359 unsigned num_components = 4); 360 src_reg get_nir_src(const nir_src &src, nir_alu_type type, 361 unsigned num_components = 4); 362 src_reg get_nir_src(const nir_src &src, 363 unsigned num_components = 4); 364 src_reg get_indirect_offset(nir_intrinsic_instr *instr); 365 366 virtual dst_reg *make_reg_for_system_value(int location) = 0; 367 368 dst_reg *nir_locals; 369 dst_reg *nir_ssa_values; 370 dst_reg *nir_system_values; 371 372 protected: 373 void emit_vertex(); 374 void lower_attributes_to_hw_regs(const int *attribute_map, 375 bool interleaved); 376 void setup_payload_interference(struct ra_graph *g, int first_payload_node, 377 int reg_node_count); 378 virtual void setup_payload() = 0; 379 virtual void emit_prolog() = 0; 380 virtual void emit_thread_end() = 0; 381 virtual void emit_urb_write_header(int mrf) = 0; 382 virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0; 383 virtual void gs_emit_vertex(int stream_id); 384 virtual void gs_end_primitive(); 385 386 private: 387 /** 388 * If true, then register allocation should fail instead of spilling. 389 */ 390 const bool no_spills; 391 392 int shader_time_index; 393 394 unsigned last_scratch; /**< measured in 32-byte (register size) units */ 395 }; 396 397 } /* namespace brw */ 398 #endif /* __cplusplus */ 399 400 #endif /* BRW_VEC4_H */ 401