1 /* 2 * Copyright 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include "brw_nir.h" 25 #include "brw_shader.h" 26 #include "compiler/glsl_types.h" 27 #include "compiler/nir/nir_builder.h" 28 29 static bool 30 is_input(nir_intrinsic_instr *intrin) 31 { 32 return intrin->intrinsic == nir_intrinsic_load_input || 33 intrin->intrinsic == nir_intrinsic_load_per_vertex_input || 34 intrin->intrinsic == nir_intrinsic_load_interpolated_input; 35 } 36 37 static bool 38 is_output(nir_intrinsic_instr *intrin) 39 { 40 return intrin->intrinsic == nir_intrinsic_load_output || 41 intrin->intrinsic == nir_intrinsic_load_per_vertex_output || 42 intrin->intrinsic == nir_intrinsic_store_output || 43 intrin->intrinsic == nir_intrinsic_store_per_vertex_output; 44 } 45 46 /** 47 * In many cases, we just add the base and offset together, so there's no 48 * reason to keep them separate. Sometimes, combining them is essential: 49 * if a shader only accesses part of a compound variable (such as a matrix 50 * or array), the variable's base may not actually exist in the VUE map. 51 * 52 * This pass adds constant offsets to instr->const_index[0], and resets 53 * the offset source to 0. Non-constant offsets remain unchanged - since 54 * we don't know what part of a compound variable is accessed, we allocate 55 * storage for the entire thing. 56 */ 57 58 static bool 59 add_const_offset_to_base_block(nir_block *block, nir_builder *b, 60 nir_variable_mode mode) 61 { 62 nir_foreach_instr_safe(instr, block) { 63 if (instr->type != nir_instr_type_intrinsic) 64 continue; 65 66 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 67 68 if ((mode == nir_var_shader_in && is_input(intrin)) || 69 (mode == nir_var_shader_out && is_output(intrin))) { 70 nir_src *offset = nir_get_io_offset_src(intrin); 71 nir_const_value *const_offset = nir_src_as_const_value(*offset); 72 73 if (const_offset) { 74 intrin->const_index[0] += const_offset->u32[0]; 75 b->cursor = nir_before_instr(&intrin->instr); 76 nir_instr_rewrite_src(&intrin->instr, offset, 77 nir_src_for_ssa(nir_imm_int(b, 0))); 78 } 79 } 80 } 81 return true; 82 } 83 84 static void 85 add_const_offset_to_base(nir_shader *nir, nir_variable_mode mode) 86 { 87 nir_foreach_function(f, nir) { 88 if (f->impl) { 89 nir_builder b; 90 nir_builder_init(&b, f->impl); 91 nir_foreach_block(block, f->impl) { 92 add_const_offset_to_base_block(block, &b, mode); 93 } 94 } 95 } 96 } 97 98 static bool 99 remap_vs_attrs(nir_block *block, shader_info *nir_info) 100 { 101 nir_foreach_instr(instr, block) { 102 if (instr->type != nir_instr_type_intrinsic) 103 continue; 104 105 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 106 107 if (intrin->intrinsic == nir_intrinsic_load_input) { 108 /* Attributes come in a contiguous block, ordered by their 109 * gl_vert_attrib value. That means we can compute the slot 110 * number for an attribute by masking out the enabled attributes 111 * before it and counting the bits. 112 */ 113 int attr = intrin->const_index[0]; 114 int slot = _mesa_bitcount_64(nir_info->inputs_read & 115 BITFIELD64_MASK(attr)); 116 intrin->const_index[0] = 4 * slot; 117 } 118 } 119 return true; 120 } 121 122 static bool 123 remap_inputs_with_vue_map(nir_block *block, const struct brw_vue_map *vue_map) 124 { 125 nir_foreach_instr(instr, block) { 126 if (instr->type != nir_instr_type_intrinsic) 127 continue; 128 129 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 130 131 if (intrin->intrinsic == nir_intrinsic_load_input || 132 intrin->intrinsic == nir_intrinsic_load_per_vertex_input) { 133 int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]]; 134 assert(vue_slot != -1); 135 intrin->const_index[0] = vue_slot; 136 } 137 } 138 return true; 139 } 140 141 static bool 142 remap_tess_levels(nir_builder *b, nir_intrinsic_instr *intr, 143 GLenum primitive_mode) 144 { 145 const int location = nir_intrinsic_base(intr); 146 const unsigned component = nir_intrinsic_component(intr); 147 bool out_of_bounds; 148 149 if (location == VARYING_SLOT_TESS_LEVEL_INNER) { 150 switch (primitive_mode) { 151 case GL_QUADS: 152 /* gl_TessLevelInner[0..1] lives at DWords 3-2 (reversed). */ 153 nir_intrinsic_set_base(intr, 0); 154 nir_intrinsic_set_component(intr, 3 - component); 155 out_of_bounds = false; 156 break; 157 case GL_TRIANGLES: 158 /* gl_TessLevelInner[0] lives at DWord 4. */ 159 nir_intrinsic_set_base(intr, 1); 160 out_of_bounds = component > 0; 161 break; 162 case GL_ISOLINES: 163 out_of_bounds = true; 164 break; 165 default: 166 unreachable("Bogus tessellation domain"); 167 } 168 } else if (location == VARYING_SLOT_TESS_LEVEL_OUTER) { 169 if (primitive_mode == GL_ISOLINES) { 170 /* gl_TessLevelOuter[0..1] lives at DWords 6-7 (in order). */ 171 nir_intrinsic_set_base(intr, 1); 172 nir_intrinsic_set_component(intr, 2 + nir_intrinsic_component(intr)); 173 out_of_bounds = component > 1; 174 } else { 175 /* Triangles use DWords 7-5 (reversed); Quads use 7-4 (reversed) */ 176 nir_intrinsic_set_base(intr, 1); 177 nir_intrinsic_set_component(intr, 3 - nir_intrinsic_component(intr)); 178 out_of_bounds = component == 3 && primitive_mode == GL_TRIANGLES; 179 } 180 } else { 181 return false; 182 } 183 184 if (out_of_bounds) { 185 if (nir_intrinsic_infos[intr->intrinsic].has_dest) { 186 b->cursor = nir_before_instr(&intr->instr); 187 nir_ssa_def *undef = nir_ssa_undef(b, 1, 32); 188 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(undef)); 189 } 190 nir_instr_remove(&intr->instr); 191 } 192 193 return true; 194 } 195 196 static bool 197 remap_patch_urb_offsets(nir_block *block, nir_builder *b, 198 const struct brw_vue_map *vue_map, 199 GLenum tes_primitive_mode) 200 { 201 const bool is_passthrough_tcs = b->shader->info->name && 202 strcmp(b->shader->info->name, "passthrough") == 0; 203 204 nir_foreach_instr_safe(instr, block) { 205 if (instr->type != nir_instr_type_intrinsic) 206 continue; 207 208 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 209 210 gl_shader_stage stage = b->shader->stage; 211 212 if ((stage == MESA_SHADER_TESS_CTRL && is_output(intrin)) || 213 (stage == MESA_SHADER_TESS_EVAL && is_input(intrin))) { 214 215 if (!is_passthrough_tcs && 216 remap_tess_levels(b, intrin, tes_primitive_mode)) 217 continue; 218 219 int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]]; 220 assert(vue_slot != -1); 221 intrin->const_index[0] = vue_slot; 222 223 nir_src *vertex = nir_get_io_vertex_index_src(intrin); 224 if (vertex) { 225 nir_const_value *const_vertex = nir_src_as_const_value(*vertex); 226 if (const_vertex) { 227 intrin->const_index[0] += const_vertex->u32[0] * 228 vue_map->num_per_vertex_slots; 229 } else { 230 b->cursor = nir_before_instr(&intrin->instr); 231 232 /* Multiply by the number of per-vertex slots. */ 233 nir_ssa_def *vertex_offset = 234 nir_imul(b, 235 nir_ssa_for_src(b, *vertex, 1), 236 nir_imm_int(b, 237 vue_map->num_per_vertex_slots)); 238 239 /* Add it to the existing offset */ 240 nir_src *offset = nir_get_io_offset_src(intrin); 241 nir_ssa_def *total_offset = 242 nir_iadd(b, vertex_offset, 243 nir_ssa_for_src(b, *offset, 1)); 244 245 nir_instr_rewrite_src(&intrin->instr, offset, 246 nir_src_for_ssa(total_offset)); 247 } 248 } 249 } 250 } 251 return true; 252 } 253 254 void 255 brw_nir_lower_vs_inputs(nir_shader *nir, 256 bool is_scalar, 257 bool use_legacy_snorm_formula, 258 const uint8_t *vs_attrib_wa_flags) 259 { 260 /* Start with the location of the variable's base. */ 261 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 262 var->data.driver_location = var->data.location; 263 } 264 265 /* Now use nir_lower_io to walk dereference chains. Attribute arrays are 266 * loaded as one vec4 or dvec4 per element (or matrix column), depending on 267 * whether it is a double-precision type or not. 268 */ 269 nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0); 270 271 /* This pass needs actual constants */ 272 nir_opt_constant_folding(nir); 273 274 add_const_offset_to_base(nir, nir_var_shader_in); 275 276 brw_nir_apply_attribute_workarounds(nir, use_legacy_snorm_formula, 277 vs_attrib_wa_flags); 278 279 if (is_scalar) { 280 /* Finally, translate VERT_ATTRIB_* values into the actual registers. */ 281 282 nir_foreach_function(function, nir) { 283 if (function->impl) { 284 nir_foreach_block(block, function->impl) { 285 remap_vs_attrs(block, nir->info); 286 } 287 } 288 } 289 } 290 } 291 292 void 293 brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar, 294 const struct brw_vue_map *vue_map) 295 { 296 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 297 var->data.driver_location = var->data.location; 298 } 299 300 /* Inputs are stored in vec4 slots, so use type_size_vec4(). */ 301 nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0); 302 303 if (is_scalar || nir->stage != MESA_SHADER_GEOMETRY) { 304 /* This pass needs actual constants */ 305 nir_opt_constant_folding(nir); 306 307 add_const_offset_to_base(nir, nir_var_shader_in); 308 309 nir_foreach_function(function, nir) { 310 if (function->impl) { 311 nir_foreach_block(block, function->impl) { 312 remap_inputs_with_vue_map(block, vue_map); 313 } 314 } 315 } 316 } 317 } 318 319 void 320 brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue_map) 321 { 322 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 323 var->data.driver_location = var->data.location; 324 } 325 326 nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0); 327 328 /* This pass needs actual constants */ 329 nir_opt_constant_folding(nir); 330 331 add_const_offset_to_base(nir, nir_var_shader_in); 332 333 nir_foreach_function(function, nir) { 334 if (function->impl) { 335 nir_builder b; 336 nir_builder_init(&b, function->impl); 337 nir_foreach_block(block, function->impl) { 338 remap_patch_urb_offsets(block, &b, vue_map, 339 nir->info->tess.primitive_mode); 340 } 341 } 342 } 343 } 344 345 void 346 brw_nir_lower_fs_inputs(nir_shader *nir, 347 const struct gen_device_info *devinfo, 348 const struct brw_wm_prog_key *key) 349 { 350 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 351 var->data.driver_location = var->data.location; 352 353 /* Apply default interpolation mode. 354 * 355 * Everything defaults to smooth except for the legacy GL color 356 * built-in variables, which might be flat depending on API state. 357 */ 358 if (var->data.interpolation == INTERP_MODE_NONE) { 359 const bool flat = key->flat_shade && 360 (var->data.location == VARYING_SLOT_COL0 || 361 var->data.location == VARYING_SLOT_COL1); 362 363 var->data.interpolation = flat ? INTERP_MODE_FLAT 364 : INTERP_MODE_SMOOTH; 365 } 366 367 /* On Ironlake and below, there is only one interpolation mode. 368 * Centroid interpolation doesn't mean anything on this hardware -- 369 * there is no multisampling. 370 */ 371 if (devinfo->gen < 6) { 372 var->data.centroid = false; 373 var->data.sample = false; 374 } 375 } 376 377 nir_lower_io_options lower_io_options = 0; 378 if (key->persample_interp) 379 lower_io_options |= nir_lower_io_force_sample_interpolation; 380 381 nir_lower_io(nir, nir_var_shader_in, type_size_vec4, lower_io_options); 382 383 /* This pass needs actual constants */ 384 nir_opt_constant_folding(nir); 385 386 add_const_offset_to_base(nir, nir_var_shader_in); 387 } 388 389 void 390 brw_nir_lower_vue_outputs(nir_shader *nir, 391 bool is_scalar) 392 { 393 nir_foreach_variable(var, &nir->outputs) { 394 var->data.driver_location = var->data.location; 395 } 396 397 nir_lower_io(nir, nir_var_shader_out, type_size_vec4, 0); 398 } 399 400 void 401 brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue_map, 402 GLenum tes_primitive_mode) 403 { 404 nir_foreach_variable(var, &nir->outputs) { 405 var->data.driver_location = var->data.location; 406 } 407 408 nir_lower_io(nir, nir_var_shader_out, type_size_vec4, 0); 409 410 /* This pass needs actual constants */ 411 nir_opt_constant_folding(nir); 412 413 add_const_offset_to_base(nir, nir_var_shader_out); 414 415 nir_foreach_function(function, nir) { 416 if (function->impl) { 417 nir_builder b; 418 nir_builder_init(&b, function->impl); 419 nir_foreach_block(block, function->impl) { 420 remap_patch_urb_offsets(block, &b, vue_map, tes_primitive_mode); 421 } 422 } 423 } 424 } 425 426 void 427 brw_nir_lower_fs_outputs(nir_shader *nir) 428 { 429 nir_foreach_variable(var, &nir->outputs) { 430 var->data.driver_location = 431 SET_FIELD(var->data.index, BRW_NIR_FRAG_OUTPUT_INDEX) | 432 SET_FIELD(var->data.location, BRW_NIR_FRAG_OUTPUT_LOCATION); 433 } 434 435 nir_lower_io(nir, nir_var_shader_out, type_size_dvec4, 0); 436 } 437 438 void 439 brw_nir_lower_cs_shared(nir_shader *nir) 440 { 441 nir_assign_var_locations(&nir->shared, &nir->num_shared, 442 type_size_scalar_bytes); 443 nir_lower_io(nir, nir_var_shared, type_size_scalar_bytes, 0); 444 } 445 446 #define OPT(pass, ...) ({ \ 447 bool this_progress = false; \ 448 NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ 449 if (this_progress) \ 450 progress = true; \ 451 this_progress; \ 452 }) 453 454 #define OPT_V(pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__) 455 456 static nir_shader * 457 nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, 458 bool is_scalar) 459 { 460 nir_variable_mode indirect_mask = 0; 461 if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectInput) 462 indirect_mask |= nir_var_shader_in; 463 if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectOutput) 464 indirect_mask |= nir_var_shader_out; 465 if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectTemp) 466 indirect_mask |= nir_var_local; 467 468 bool progress; 469 do { 470 progress = false; 471 OPT_V(nir_lower_vars_to_ssa); 472 OPT(nir_opt_copy_prop_vars); 473 474 if (is_scalar) { 475 OPT(nir_lower_alu_to_scalar); 476 } 477 478 OPT(nir_copy_prop); 479 480 if (is_scalar) { 481 OPT(nir_lower_phis_to_scalar); 482 } 483 484 OPT(nir_copy_prop); 485 OPT(nir_opt_dce); 486 OPT(nir_opt_cse); 487 OPT(nir_opt_peephole_select, 0); 488 OPT(nir_opt_algebraic); 489 OPT(nir_opt_constant_folding); 490 OPT(nir_opt_dead_cf); 491 if (OPT(nir_opt_trivial_continues)) { 492 /* If nir_opt_trivial_continues makes progress, then we need to clean 493 * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll 494 * to make progress. 495 */ 496 OPT(nir_copy_prop); 497 OPT(nir_opt_dce); 498 } 499 OPT(nir_opt_if); 500 if (nir->options->max_unroll_iterations != 0) { 501 OPT(nir_opt_loop_unroll, indirect_mask); 502 } 503 OPT(nir_opt_remove_phis); 504 OPT(nir_opt_undef); 505 OPT_V(nir_lower_doubles, nir_lower_drcp | 506 nir_lower_dsqrt | 507 nir_lower_drsq | 508 nir_lower_dtrunc | 509 nir_lower_dfloor | 510 nir_lower_dceil | 511 nir_lower_dfract | 512 nir_lower_dround_even | 513 nir_lower_dmod); 514 OPT_V(nir_lower_double_pack); 515 } while (progress); 516 517 return nir; 518 } 519 520 /* Does some simple lowering and runs the standard suite of optimizations 521 * 522 * This is intended to be called more-or-less directly after you get the 523 * shader out of GLSL or some other source. While it is geared towards i965, 524 * it is not at all generator-specific except for the is_scalar flag. Even 525 * there, it is safe to call with is_scalar = false for a shader that is 526 * intended for the FS backend as long as nir_optimize is called again with 527 * is_scalar = true to scalarize everything prior to code gen. 528 */ 529 nir_shader * 530 brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir) 531 { 532 const struct gen_device_info *devinfo = compiler->devinfo; 533 bool progress; /* Written by OPT and OPT_V */ 534 (void)progress; 535 536 const bool is_scalar = compiler->scalar_stage[nir->stage]; 537 538 if (nir->stage == MESA_SHADER_GEOMETRY) 539 OPT(nir_lower_gs_intrinsics); 540 541 /* See also brw_nir_trig_workarounds.py */ 542 if (compiler->precise_trig && 543 !(devinfo->gen >= 10 || devinfo->is_kabylake)) 544 OPT(brw_nir_apply_trig_workarounds); 545 546 static const nir_lower_tex_options tex_options = { 547 .lower_txp = ~0, 548 .lower_txf_offset = true, 549 .lower_rect_offset = true, 550 .lower_txd_cube_map = true, 551 }; 552 553 OPT(nir_lower_tex, &tex_options); 554 OPT(nir_normalize_cubemap_coords); 555 556 OPT(nir_lower_global_vars_to_local); 557 558 OPT(nir_split_var_copies); 559 560 nir = nir_optimize(nir, compiler, is_scalar); 561 562 if (is_scalar) { 563 OPT_V(nir_lower_load_const_to_scalar); 564 } 565 566 /* Lower a bunch of stuff */ 567 OPT_V(nir_lower_var_copies); 568 569 OPT_V(nir_lower_clip_cull_distance_arrays); 570 571 nir_variable_mode indirect_mask = 0; 572 if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectInput) 573 indirect_mask |= nir_var_shader_in; 574 if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectOutput) 575 indirect_mask |= nir_var_shader_out; 576 if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectTemp) 577 indirect_mask |= nir_var_local; 578 579 nir_lower_indirect_derefs(nir, indirect_mask); 580 581 /* Get rid of split copies */ 582 nir = nir_optimize(nir, compiler, is_scalar); 583 584 OPT(nir_remove_dead_variables, nir_var_local); 585 586 return nir; 587 } 588 589 /* Prepare the given shader for codegen 590 * 591 * This function is intended to be called right before going into the actual 592 * backend and is highly backend-specific. Also, once this function has been 593 * called on a shader, it will no longer be in SSA form so most optimizations 594 * will not work. 595 */ 596 nir_shader * 597 brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, 598 bool is_scalar) 599 { 600 const struct gen_device_info *devinfo = compiler->devinfo; 601 bool debug_enabled = 602 (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->stage)); 603 604 bool progress; /* Written by OPT and OPT_V */ 605 (void)progress; 606 607 nir = nir_optimize(nir, compiler, is_scalar); 608 609 if (devinfo->gen >= 6) { 610 /* Try and fuse multiply-adds */ 611 OPT(brw_nir_opt_peephole_ffma); 612 } 613 614 OPT(nir_opt_algebraic_late); 615 616 OPT_V(nir_lower_to_source_mods); 617 OPT(nir_copy_prop); 618 OPT(nir_opt_dce); 619 OPT(nir_opt_move_comparisons); 620 621 OPT(nir_lower_locals_to_regs); 622 623 if (unlikely(debug_enabled)) { 624 /* Re-index SSA defs so we print more sensible numbers. */ 625 nir_foreach_function(function, nir) { 626 if (function->impl) 627 nir_index_ssa_defs(function->impl); 628 } 629 630 fprintf(stderr, "NIR (SSA form) for %s shader:\n", 631 _mesa_shader_stage_to_string(nir->stage)); 632 nir_print_shader(nir, stderr); 633 } 634 635 OPT_V(nir_convert_from_ssa, true); 636 637 if (!is_scalar) { 638 OPT_V(nir_move_vec_src_uses_to_dest); 639 OPT(nir_lower_vec_to_movs); 640 } 641 642 /* This is the last pass we run before we start emitting stuff. It 643 * determines when we need to insert boolean resolves on Gen <= 5. We 644 * run it last because it stashes data in instr->pass_flags and we don't 645 * want that to be squashed by other NIR passes. 646 */ 647 if (devinfo->gen <= 5) 648 brw_nir_analyze_boolean_resolves(nir); 649 650 nir_sweep(nir); 651 652 if (unlikely(debug_enabled)) { 653 fprintf(stderr, "NIR (final form) for %s shader:\n", 654 _mesa_shader_stage_to_string(nir->stage)); 655 nir_print_shader(nir, stderr); 656 } 657 658 return nir; 659 } 660 661 nir_shader * 662 brw_nir_apply_sampler_key(nir_shader *nir, 663 const struct brw_compiler *compiler, 664 const struct brw_sampler_prog_key_data *key_tex, 665 bool is_scalar) 666 { 667 const struct gen_device_info *devinfo = compiler->devinfo; 668 nir_lower_tex_options tex_options = { 0 }; 669 670 /* Iron Lake and prior require lowering of all rectangle textures */ 671 if (devinfo->gen < 6) 672 tex_options.lower_rect = true; 673 674 /* Prior to Broadwell, our hardware can't actually do GL_CLAMP */ 675 if (devinfo->gen < 8) { 676 tex_options.saturate_s = key_tex->gl_clamp_mask[0]; 677 tex_options.saturate_t = key_tex->gl_clamp_mask[1]; 678 tex_options.saturate_r = key_tex->gl_clamp_mask[2]; 679 } 680 681 /* Prior to Haswell, we have to fake texture swizzle */ 682 for (unsigned s = 0; s < MAX_SAMPLERS; s++) { 683 if (key_tex->swizzles[s] == SWIZZLE_NOOP) 684 continue; 685 686 tex_options.swizzle_result |= (1 << s); 687 for (unsigned c = 0; c < 4; c++) 688 tex_options.swizzles[s][c] = GET_SWZ(key_tex->swizzles[s], c); 689 } 690 691 /* Prior to Haswell, we have to lower gradients on shadow samplers */ 692 tex_options.lower_txd_shadow = devinfo->gen < 8 && !devinfo->is_haswell; 693 694 tex_options.lower_y_uv_external = key_tex->y_uv_image_mask; 695 tex_options.lower_y_u_v_external = key_tex->y_u_v_image_mask; 696 tex_options.lower_yx_xuxv_external = key_tex->yx_xuxv_image_mask; 697 698 if (nir_lower_tex(nir, &tex_options)) { 699 nir_validate_shader(nir); 700 nir = nir_optimize(nir, compiler, is_scalar); 701 } 702 703 return nir; 704 } 705 706 enum brw_reg_type 707 brw_type_for_nir_type(nir_alu_type type) 708 { 709 switch (type) { 710 case nir_type_uint: 711 case nir_type_uint32: 712 return BRW_REGISTER_TYPE_UD; 713 case nir_type_bool: 714 case nir_type_int: 715 case nir_type_bool32: 716 case nir_type_int32: 717 return BRW_REGISTER_TYPE_D; 718 case nir_type_float: 719 case nir_type_float32: 720 return BRW_REGISTER_TYPE_F; 721 case nir_type_float64: 722 return BRW_REGISTER_TYPE_DF; 723 case nir_type_int64: 724 case nir_type_uint64: 725 /* TODO we should only see these in moves, so for now it's ok, but when 726 * we add actual 64-bit integer support we should fix this. 727 */ 728 return BRW_REGISTER_TYPE_DF; 729 default: 730 unreachable("unknown type"); 731 } 732 733 return BRW_REGISTER_TYPE_F; 734 } 735 736 /* Returns the glsl_base_type corresponding to a nir_alu_type. 737 * This is used by both brw_vec4_nir and brw_fs_nir. 738 */ 739 enum glsl_base_type 740 brw_glsl_base_type_for_nir_type(nir_alu_type type) 741 { 742 switch (type) { 743 case nir_type_float: 744 case nir_type_float32: 745 return GLSL_TYPE_FLOAT; 746 747 case nir_type_float64: 748 return GLSL_TYPE_DOUBLE; 749 750 case nir_type_int: 751 case nir_type_int32: 752 return GLSL_TYPE_INT; 753 754 case nir_type_uint: 755 case nir_type_uint32: 756 return GLSL_TYPE_UINT; 757 758 default: 759 unreachable("bad type"); 760 } 761 } 762