1 /* 2 * Copyright 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include "brw_nir.h" 25 #include "brw_shader.h" 26 #include "common/gen_debug.h" 27 #include "compiler/glsl_types.h" 28 #include "compiler/nir/nir_builder.h" 29 30 static bool 31 is_input(nir_intrinsic_instr *intrin) 32 { 33 return intrin->intrinsic == nir_intrinsic_load_input || 34 intrin->intrinsic == nir_intrinsic_load_per_vertex_input || 35 intrin->intrinsic == nir_intrinsic_load_interpolated_input; 36 } 37 38 static bool 39 is_output(nir_intrinsic_instr *intrin) 40 { 41 return intrin->intrinsic == nir_intrinsic_load_output || 42 intrin->intrinsic == nir_intrinsic_load_per_vertex_output || 43 intrin->intrinsic == nir_intrinsic_store_output || 44 intrin->intrinsic == nir_intrinsic_store_per_vertex_output; 45 } 46 47 /** 48 * In many cases, we just add the base and offset together, so there's no 49 * reason to keep them separate. Sometimes, combining them is essential: 50 * if a shader only accesses part of a compound variable (such as a matrix 51 * or array), the variable's base may not actually exist in the VUE map. 52 * 53 * This pass adds constant offsets to instr->const_index[0], and resets 54 * the offset source to 0. Non-constant offsets remain unchanged - since 55 * we don't know what part of a compound variable is accessed, we allocate 56 * storage for the entire thing. 57 */ 58 59 static bool 60 add_const_offset_to_base_block(nir_block *block, nir_builder *b, 61 nir_variable_mode mode) 62 { 63 nir_foreach_instr_safe(instr, block) { 64 if (instr->type != nir_instr_type_intrinsic) 65 continue; 66 67 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 68 69 if ((mode == nir_var_shader_in && is_input(intrin)) || 70 (mode == nir_var_shader_out && is_output(intrin))) { 71 nir_src *offset = nir_get_io_offset_src(intrin); 72 nir_const_value *const_offset = nir_src_as_const_value(*offset); 73 74 if (const_offset) { 75 intrin->const_index[0] += const_offset->u32[0]; 76 b->cursor = nir_before_instr(&intrin->instr); 77 nir_instr_rewrite_src(&intrin->instr, offset, 78 nir_src_for_ssa(nir_imm_int(b, 0))); 79 } 80 } 81 } 82 return true; 83 } 84 85 static void 86 add_const_offset_to_base(nir_shader *nir, nir_variable_mode mode) 87 { 88 nir_foreach_function(f, nir) { 89 if (f->impl) { 90 nir_builder b; 91 nir_builder_init(&b, f->impl); 92 nir_foreach_block(block, f->impl) { 93 add_const_offset_to_base_block(block, &b, mode); 94 } 95 } 96 } 97 } 98 99 static bool 100 remap_tess_levels(nir_builder *b, nir_intrinsic_instr *intr, 101 GLenum primitive_mode) 102 { 103 const int location = nir_intrinsic_base(intr); 104 const unsigned component = nir_intrinsic_component(intr); 105 bool out_of_bounds; 106 107 if (location == VARYING_SLOT_TESS_LEVEL_INNER) { 108 switch (primitive_mode) { 109 case GL_QUADS: 110 /* gl_TessLevelInner[0..1] lives at DWords 3-2 (reversed). */ 111 nir_intrinsic_set_base(intr, 0); 112 nir_intrinsic_set_component(intr, 3 - component); 113 out_of_bounds = false; 114 break; 115 case GL_TRIANGLES: 116 /* gl_TessLevelInner[0] lives at DWord 4. */ 117 nir_intrinsic_set_base(intr, 1); 118 out_of_bounds = component > 0; 119 break; 120 case GL_ISOLINES: 121 out_of_bounds = true; 122 break; 123 default: 124 unreachable("Bogus tessellation domain"); 125 } 126 } else if (location == VARYING_SLOT_TESS_LEVEL_OUTER) { 127 if (primitive_mode == GL_ISOLINES) { 128 /* gl_TessLevelOuter[0..1] lives at DWords 6-7 (in order). */ 129 nir_intrinsic_set_base(intr, 1); 130 nir_intrinsic_set_component(intr, 2 + nir_intrinsic_component(intr)); 131 out_of_bounds = component > 1; 132 } else { 133 /* Triangles use DWords 7-5 (reversed); Quads use 7-4 (reversed) */ 134 nir_intrinsic_set_base(intr, 1); 135 nir_intrinsic_set_component(intr, 3 - nir_intrinsic_component(intr)); 136 out_of_bounds = component == 3 && primitive_mode == GL_TRIANGLES; 137 } 138 } else { 139 return false; 140 } 141 142 if (out_of_bounds) { 143 if (nir_intrinsic_infos[intr->intrinsic].has_dest) { 144 b->cursor = nir_before_instr(&intr->instr); 145 nir_ssa_def *undef = nir_ssa_undef(b, 1, 32); 146 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(undef)); 147 } 148 nir_instr_remove(&intr->instr); 149 } 150 151 return true; 152 } 153 154 static bool 155 remap_patch_urb_offsets(nir_block *block, nir_builder *b, 156 const struct brw_vue_map *vue_map, 157 GLenum tes_primitive_mode) 158 { 159 const bool is_passthrough_tcs = b->shader->info.name && 160 strcmp(b->shader->info.name, "passthrough") == 0; 161 162 nir_foreach_instr_safe(instr, block) { 163 if (instr->type != nir_instr_type_intrinsic) 164 continue; 165 166 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 167 168 gl_shader_stage stage = b->shader->info.stage; 169 170 if ((stage == MESA_SHADER_TESS_CTRL && is_output(intrin)) || 171 (stage == MESA_SHADER_TESS_EVAL && is_input(intrin))) { 172 173 if (!is_passthrough_tcs && 174 remap_tess_levels(b, intrin, tes_primitive_mode)) 175 continue; 176 177 int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]]; 178 assert(vue_slot != -1); 179 intrin->const_index[0] = vue_slot; 180 181 nir_src *vertex = nir_get_io_vertex_index_src(intrin); 182 if (vertex) { 183 nir_const_value *const_vertex = nir_src_as_const_value(*vertex); 184 if (const_vertex) { 185 intrin->const_index[0] += const_vertex->u32[0] * 186 vue_map->num_per_vertex_slots; 187 } else { 188 b->cursor = nir_before_instr(&intrin->instr); 189 190 /* Multiply by the number of per-vertex slots. */ 191 nir_ssa_def *vertex_offset = 192 nir_imul(b, 193 nir_ssa_for_src(b, *vertex, 1), 194 nir_imm_int(b, 195 vue_map->num_per_vertex_slots)); 196 197 /* Add it to the existing offset */ 198 nir_src *offset = nir_get_io_offset_src(intrin); 199 nir_ssa_def *total_offset = 200 nir_iadd(b, vertex_offset, 201 nir_ssa_for_src(b, *offset, 1)); 202 203 nir_instr_rewrite_src(&intrin->instr, offset, 204 nir_src_for_ssa(total_offset)); 205 } 206 } 207 } 208 } 209 return true; 210 } 211 212 void 213 brw_nir_lower_vs_inputs(nir_shader *nir, 214 const uint8_t *vs_attrib_wa_flags) 215 { 216 /* Start with the location of the variable's base. */ 217 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 218 var->data.driver_location = var->data.location; 219 } 220 221 /* Now use nir_lower_io to walk dereference chains. Attribute arrays are 222 * loaded as one vec4 or dvec4 per element (or matrix column), depending on 223 * whether it is a double-precision type or not. 224 */ 225 nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0); 226 227 /* This pass needs actual constants */ 228 nir_opt_constant_folding(nir); 229 230 add_const_offset_to_base(nir, nir_var_shader_in); 231 232 brw_nir_apply_attribute_workarounds(nir, vs_attrib_wa_flags); 233 234 /* The last step is to remap VERT_ATTRIB_* to actual registers */ 235 236 /* Whether or not we have any system generated values. gl_DrawID is not 237 * included here as it lives in its own vec4. 238 */ 239 const bool has_sgvs = 240 nir->info.system_values_read & 241 (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) | 242 BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) | 243 BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) | 244 BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID)); 245 246 const unsigned num_inputs = _mesa_bitcount_64(nir->info.inputs_read); 247 248 nir_foreach_function(function, nir) { 249 if (!function->impl) 250 continue; 251 252 nir_builder b; 253 nir_builder_init(&b, function->impl); 254 255 nir_foreach_block(block, function->impl) { 256 nir_foreach_instr_safe(instr, block) { 257 if (instr->type != nir_instr_type_intrinsic) 258 continue; 259 260 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 261 262 switch (intrin->intrinsic) { 263 case nir_intrinsic_load_base_vertex: 264 case nir_intrinsic_load_base_instance: 265 case nir_intrinsic_load_vertex_id_zero_base: 266 case nir_intrinsic_load_instance_id: 267 case nir_intrinsic_load_draw_id: { 268 b.cursor = nir_after_instr(&intrin->instr); 269 270 /* gl_VertexID and friends are stored by the VF as the last 271 * vertex element. We convert them to load_input intrinsics at 272 * the right location. 273 */ 274 nir_intrinsic_instr *load = 275 nir_intrinsic_instr_create(nir, nir_intrinsic_load_input); 276 load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); 277 278 nir_intrinsic_set_base(load, num_inputs); 279 switch (intrin->intrinsic) { 280 case nir_intrinsic_load_base_vertex: 281 nir_intrinsic_set_component(load, 0); 282 break; 283 case nir_intrinsic_load_base_instance: 284 nir_intrinsic_set_component(load, 1); 285 break; 286 case nir_intrinsic_load_vertex_id_zero_base: 287 nir_intrinsic_set_component(load, 2); 288 break; 289 case nir_intrinsic_load_instance_id: 290 nir_intrinsic_set_component(load, 3); 291 break; 292 case nir_intrinsic_load_draw_id: 293 /* gl_DrawID is stored right after gl_VertexID and friends 294 * if any of them exist. 295 */ 296 nir_intrinsic_set_base(load, num_inputs + has_sgvs); 297 nir_intrinsic_set_component(load, 0); 298 break; 299 default: 300 unreachable("Invalid system value intrinsic"); 301 } 302 303 load->num_components = 1; 304 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL); 305 nir_builder_instr_insert(&b, &load->instr); 306 307 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 308 nir_src_for_ssa(&load->dest.ssa)); 309 nir_instr_remove(&intrin->instr); 310 break; 311 } 312 313 case nir_intrinsic_load_input: { 314 /* Attributes come in a contiguous block, ordered by their 315 * gl_vert_attrib value. That means we can compute the slot 316 * number for an attribute by masking out the enabled attributes 317 * before it and counting the bits. 318 */ 319 int attr = nir_intrinsic_base(intrin); 320 int slot = _mesa_bitcount_64(nir->info.inputs_read & 321 BITFIELD64_MASK(attr)); 322 nir_intrinsic_set_base(intrin, slot); 323 break; 324 } 325 326 default: 327 break; /* Nothing to do */ 328 } 329 } 330 } 331 } 332 } 333 334 void 335 brw_nir_lower_vue_inputs(nir_shader *nir, 336 const struct brw_vue_map *vue_map) 337 { 338 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 339 var->data.driver_location = var->data.location; 340 } 341 342 /* Inputs are stored in vec4 slots, so use type_size_vec4(). */ 343 nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0); 344 345 /* This pass needs actual constants */ 346 nir_opt_constant_folding(nir); 347 348 add_const_offset_to_base(nir, nir_var_shader_in); 349 350 nir_foreach_function(function, nir) { 351 if (!function->impl) 352 continue; 353 354 nir_foreach_block(block, function->impl) { 355 nir_foreach_instr(instr, block) { 356 if (instr->type != nir_instr_type_intrinsic) 357 continue; 358 359 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 360 361 if (intrin->intrinsic == nir_intrinsic_load_input || 362 intrin->intrinsic == nir_intrinsic_load_per_vertex_input) { 363 /* Offset 0 is the VUE header, which contains 364 * VARYING_SLOT_LAYER [.y], VARYING_SLOT_VIEWPORT [.z], and 365 * VARYING_SLOT_PSIZ [.w]. 366 */ 367 int varying = nir_intrinsic_base(intrin); 368 int vue_slot; 369 switch (varying) { 370 case VARYING_SLOT_PSIZ: 371 nir_intrinsic_set_base(intrin, 0); 372 nir_intrinsic_set_component(intrin, 3); 373 break; 374 375 default: 376 vue_slot = vue_map->varying_to_slot[varying]; 377 assert(vue_slot != -1); 378 nir_intrinsic_set_base(intrin, vue_slot); 379 break; 380 } 381 } 382 } 383 } 384 } 385 } 386 387 void 388 brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue_map) 389 { 390 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 391 var->data.driver_location = var->data.location; 392 } 393 394 nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0); 395 396 /* This pass needs actual constants */ 397 nir_opt_constant_folding(nir); 398 399 add_const_offset_to_base(nir, nir_var_shader_in); 400 401 nir_foreach_function(function, nir) { 402 if (function->impl) { 403 nir_builder b; 404 nir_builder_init(&b, function->impl); 405 nir_foreach_block(block, function->impl) { 406 remap_patch_urb_offsets(block, &b, vue_map, 407 nir->info.tess.primitive_mode); 408 } 409 } 410 } 411 } 412 413 void 414 brw_nir_lower_fs_inputs(nir_shader *nir, 415 const struct gen_device_info *devinfo, 416 const struct brw_wm_prog_key *key) 417 { 418 foreach_list_typed(nir_variable, var, node, &nir->inputs) { 419 var->data.driver_location = var->data.location; 420 421 /* Apply default interpolation mode. 422 * 423 * Everything defaults to smooth except for the legacy GL color 424 * built-in variables, which might be flat depending on API state. 425 */ 426 if (var->data.interpolation == INTERP_MODE_NONE) { 427 const bool flat = key->flat_shade && 428 (var->data.location == VARYING_SLOT_COL0 || 429 var->data.location == VARYING_SLOT_COL1); 430 431 var->data.interpolation = flat ? INTERP_MODE_FLAT 432 : INTERP_MODE_SMOOTH; 433 } 434 435 /* On Ironlake and below, there is only one interpolation mode. 436 * Centroid interpolation doesn't mean anything on this hardware -- 437 * there is no multisampling. 438 */ 439 if (devinfo->gen < 6) { 440 var->data.centroid = false; 441 var->data.sample = false; 442 } 443 } 444 445 nir_lower_io_options lower_io_options = 0; 446 if (key->persample_interp) 447 lower_io_options |= nir_lower_io_force_sample_interpolation; 448 449 nir_lower_io(nir, nir_var_shader_in, type_size_vec4, lower_io_options); 450 451 /* This pass needs actual constants */ 452 nir_opt_constant_folding(nir); 453 454 add_const_offset_to_base(nir, nir_var_shader_in); 455 } 456 457 void 458 brw_nir_lower_vue_outputs(nir_shader *nir, 459 bool is_scalar) 460 { 461 nir_foreach_variable(var, &nir->outputs) { 462 var->data.driver_location = var->data.location; 463 } 464 465 nir_lower_io(nir, nir_var_shader_out, type_size_vec4, 0); 466 } 467 468 void 469 brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue_map, 470 GLenum tes_primitive_mode) 471 { 472 nir_foreach_variable(var, &nir->outputs) { 473 var->data.driver_location = var->data.location; 474 } 475 476 nir_lower_io(nir, nir_var_shader_out, type_size_vec4, 0); 477 478 /* This pass needs actual constants */ 479 nir_opt_constant_folding(nir); 480 481 add_const_offset_to_base(nir, nir_var_shader_out); 482 483 nir_foreach_function(function, nir) { 484 if (function->impl) { 485 nir_builder b; 486 nir_builder_init(&b, function->impl); 487 nir_foreach_block(block, function->impl) { 488 remap_patch_urb_offsets(block, &b, vue_map, tes_primitive_mode); 489 } 490 } 491 } 492 } 493 494 void 495 brw_nir_lower_fs_outputs(nir_shader *nir) 496 { 497 nir_foreach_variable(var, &nir->outputs) { 498 var->data.driver_location = 499 SET_FIELD(var->data.index, BRW_NIR_FRAG_OUTPUT_INDEX) | 500 SET_FIELD(var->data.location, BRW_NIR_FRAG_OUTPUT_LOCATION); 501 } 502 503 nir_lower_io(nir, nir_var_shader_out, type_size_dvec4, 0); 504 } 505 506 void 507 brw_nir_lower_cs_shared(nir_shader *nir) 508 { 509 nir_assign_var_locations(&nir->shared, &nir->num_shared, 510 type_size_scalar_bytes); 511 nir_lower_io(nir, nir_var_shared, type_size_scalar_bytes, 0); 512 } 513 514 #define OPT(pass, ...) ({ \ 515 bool this_progress = false; \ 516 NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ 517 if (this_progress) \ 518 progress = true; \ 519 this_progress; \ 520 }) 521 522 static nir_variable_mode 523 brw_nir_no_indirect_mask(const struct brw_compiler *compiler, 524 gl_shader_stage stage) 525 { 526 nir_variable_mode indirect_mask = 0; 527 528 if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput) 529 indirect_mask |= nir_var_shader_in; 530 if (compiler->glsl_compiler_options[stage].EmitNoIndirectOutput) 531 indirect_mask |= nir_var_shader_out; 532 if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp) 533 indirect_mask |= nir_var_local; 534 535 return indirect_mask; 536 } 537 538 nir_shader * 539 brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, 540 bool is_scalar) 541 { 542 nir_variable_mode indirect_mask = 543 brw_nir_no_indirect_mask(compiler, nir->info.stage); 544 545 bool progress; 546 do { 547 progress = false; 548 OPT(nir_lower_vars_to_ssa); 549 OPT(nir_opt_copy_prop_vars); 550 551 if (is_scalar) { 552 OPT(nir_lower_alu_to_scalar); 553 } 554 555 OPT(nir_copy_prop); 556 557 if (is_scalar) { 558 OPT(nir_lower_phis_to_scalar); 559 } 560 561 OPT(nir_copy_prop); 562 OPT(nir_opt_dce); 563 OPT(nir_opt_cse); 564 OPT(nir_opt_peephole_select, 0); 565 OPT(nir_opt_intrinsics); 566 OPT(nir_opt_algebraic); 567 OPT(nir_opt_constant_folding); 568 OPT(nir_opt_dead_cf); 569 if (OPT(nir_opt_trivial_continues)) { 570 /* If nir_opt_trivial_continues makes progress, then we need to clean 571 * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll 572 * to make progress. 573 */ 574 OPT(nir_copy_prop); 575 OPT(nir_opt_dce); 576 } 577 OPT(nir_opt_if); 578 if (nir->options->max_unroll_iterations != 0) { 579 OPT(nir_opt_loop_unroll, indirect_mask); 580 } 581 OPT(nir_opt_remove_phis); 582 OPT(nir_opt_undef); 583 OPT(nir_lower_doubles, nir_lower_drcp | 584 nir_lower_dsqrt | 585 nir_lower_drsq | 586 nir_lower_dtrunc | 587 nir_lower_dfloor | 588 nir_lower_dceil | 589 nir_lower_dfract | 590 nir_lower_dround_even | 591 nir_lower_dmod); 592 OPT(nir_lower_64bit_pack); 593 } while (progress); 594 595 return nir; 596 } 597 598 /* Does some simple lowering and runs the standard suite of optimizations 599 * 600 * This is intended to be called more-or-less directly after you get the 601 * shader out of GLSL or some other source. While it is geared towards i965, 602 * it is not at all generator-specific except for the is_scalar flag. Even 603 * there, it is safe to call with is_scalar = false for a shader that is 604 * intended for the FS backend as long as nir_optimize is called again with 605 * is_scalar = true to scalarize everything prior to code gen. 606 */ 607 nir_shader * 608 brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir) 609 { 610 const struct gen_device_info *devinfo = compiler->devinfo; 611 UNUSED bool progress; /* Written by OPT */ 612 613 const bool is_scalar = compiler->scalar_stage[nir->info.stage]; 614 615 if (nir->info.stage == MESA_SHADER_GEOMETRY) 616 OPT(nir_lower_gs_intrinsics); 617 618 /* See also brw_nir_trig_workarounds.py */ 619 if (compiler->precise_trig && 620 !(devinfo->gen >= 10 || devinfo->is_kabylake)) 621 OPT(brw_nir_apply_trig_workarounds); 622 623 static const nir_lower_tex_options tex_options = { 624 .lower_txp = ~0, 625 .lower_txf_offset = true, 626 .lower_rect_offset = true, 627 .lower_txd_cube_map = true, 628 }; 629 630 OPT(nir_lower_tex, &tex_options); 631 OPT(nir_normalize_cubemap_coords); 632 633 OPT(nir_lower_global_vars_to_local); 634 635 OPT(nir_split_var_copies); 636 637 nir = brw_nir_optimize(nir, compiler, is_scalar); 638 639 if (is_scalar) { 640 OPT(nir_lower_load_const_to_scalar); 641 } 642 643 /* Lower a bunch of stuff */ 644 OPT(nir_lower_var_copies); 645 646 OPT(nir_lower_system_values); 647 648 const nir_lower_subgroups_options subgroups_options = { 649 .subgroup_size = nir->info.stage == MESA_SHADER_COMPUTE ? 32 : 650 nir->info.stage == MESA_SHADER_FRAGMENT ? 16 : 8, 651 .ballot_bit_size = 32, 652 .lower_to_scalar = true, 653 .lower_subgroup_masks = true, 654 .lower_vote_trivial = !is_scalar, 655 }; 656 OPT(nir_lower_subgroups, &subgroups_options); 657 658 OPT(nir_lower_clip_cull_distance_arrays); 659 660 nir_variable_mode indirect_mask = 661 brw_nir_no_indirect_mask(compiler, nir->info.stage); 662 nir_lower_indirect_derefs(nir, indirect_mask); 663 664 nir_lower_int64(nir, nir_lower_imul64 | 665 nir_lower_isign64 | 666 nir_lower_divmod64); 667 668 /* Get rid of split copies */ 669 nir = brw_nir_optimize(nir, compiler, is_scalar); 670 671 OPT(nir_remove_dead_variables, nir_var_local); 672 673 return nir; 674 } 675 676 void 677 brw_nir_link_shaders(const struct brw_compiler *compiler, 678 nir_shader **producer, nir_shader **consumer) 679 { 680 NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out); 681 NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in); 682 683 if (nir_remove_unused_varyings(*producer, *consumer)) { 684 NIR_PASS_V(*producer, nir_lower_global_vars_to_local); 685 NIR_PASS_V(*consumer, nir_lower_global_vars_to_local); 686 687 /* The backend might not be able to handle indirects on 688 * temporaries so we need to lower indirects on any of the 689 * varyings we have demoted here. 690 */ 691 NIR_PASS_V(*producer, nir_lower_indirect_derefs, 692 brw_nir_no_indirect_mask(compiler, (*producer)->info.stage)); 693 NIR_PASS_V(*consumer, nir_lower_indirect_derefs, 694 brw_nir_no_indirect_mask(compiler, (*consumer)->info.stage)); 695 696 const bool p_is_scalar = 697 compiler->scalar_stage[(*producer)->info.stage]; 698 *producer = brw_nir_optimize(*producer, compiler, p_is_scalar); 699 700 const bool c_is_scalar = 701 compiler->scalar_stage[(*producer)->info.stage]; 702 *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar); 703 } 704 } 705 706 /* Prepare the given shader for codegen 707 * 708 * This function is intended to be called right before going into the actual 709 * backend and is highly backend-specific. Also, once this function has been 710 * called on a shader, it will no longer be in SSA form so most optimizations 711 * will not work. 712 */ 713 nir_shader * 714 brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, 715 bool is_scalar) 716 { 717 const struct gen_device_info *devinfo = compiler->devinfo; 718 bool debug_enabled = 719 (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->info.stage)); 720 721 UNUSED bool progress; /* Written by OPT */ 722 723 724 do { 725 progress = false; 726 OPT(nir_opt_algebraic_before_ffma); 727 } while (progress); 728 729 nir = brw_nir_optimize(nir, compiler, is_scalar); 730 731 if (devinfo->gen >= 6) { 732 /* Try and fuse multiply-adds */ 733 OPT(brw_nir_opt_peephole_ffma); 734 } 735 736 OPT(nir_opt_algebraic_late); 737 738 OPT(nir_lower_to_source_mods); 739 OPT(nir_copy_prop); 740 OPT(nir_opt_dce); 741 OPT(nir_opt_move_comparisons); 742 743 OPT(nir_lower_locals_to_regs); 744 745 if (unlikely(debug_enabled)) { 746 /* Re-index SSA defs so we print more sensible numbers. */ 747 nir_foreach_function(function, nir) { 748 if (function->impl) 749 nir_index_ssa_defs(function->impl); 750 } 751 752 fprintf(stderr, "NIR (SSA form) for %s shader:\n", 753 _mesa_shader_stage_to_string(nir->info.stage)); 754 nir_print_shader(nir, stderr); 755 } 756 757 OPT(nir_convert_from_ssa, true); 758 759 if (!is_scalar) { 760 OPT(nir_move_vec_src_uses_to_dest); 761 OPT(nir_lower_vec_to_movs); 762 } 763 764 /* This is the last pass we run before we start emitting stuff. It 765 * determines when we need to insert boolean resolves on Gen <= 5. We 766 * run it last because it stashes data in instr->pass_flags and we don't 767 * want that to be squashed by other NIR passes. 768 */ 769 if (devinfo->gen <= 5) 770 brw_nir_analyze_boolean_resolves(nir); 771 772 nir_sweep(nir); 773 774 if (unlikely(debug_enabled)) { 775 fprintf(stderr, "NIR (final form) for %s shader:\n", 776 _mesa_shader_stage_to_string(nir->info.stage)); 777 nir_print_shader(nir, stderr); 778 } 779 780 return nir; 781 } 782 783 nir_shader * 784 brw_nir_apply_sampler_key(nir_shader *nir, 785 const struct brw_compiler *compiler, 786 const struct brw_sampler_prog_key_data *key_tex, 787 bool is_scalar) 788 { 789 const struct gen_device_info *devinfo = compiler->devinfo; 790 nir_lower_tex_options tex_options = { 0 }; 791 792 /* Iron Lake and prior require lowering of all rectangle textures */ 793 if (devinfo->gen < 6) 794 tex_options.lower_rect = true; 795 796 /* Prior to Broadwell, our hardware can't actually do GL_CLAMP */ 797 if (devinfo->gen < 8) { 798 tex_options.saturate_s = key_tex->gl_clamp_mask[0]; 799 tex_options.saturate_t = key_tex->gl_clamp_mask[1]; 800 tex_options.saturate_r = key_tex->gl_clamp_mask[2]; 801 } 802 803 /* Prior to Haswell, we have to fake texture swizzle */ 804 for (unsigned s = 0; s < MAX_SAMPLERS; s++) { 805 if (key_tex->swizzles[s] == SWIZZLE_NOOP) 806 continue; 807 808 tex_options.swizzle_result |= (1 << s); 809 for (unsigned c = 0; c < 4; c++) 810 tex_options.swizzles[s][c] = GET_SWZ(key_tex->swizzles[s], c); 811 } 812 813 /* Prior to Haswell, we have to lower gradients on shadow samplers */ 814 tex_options.lower_txd_shadow = devinfo->gen < 8 && !devinfo->is_haswell; 815 816 tex_options.lower_y_uv_external = key_tex->y_uv_image_mask; 817 tex_options.lower_y_u_v_external = key_tex->y_u_v_image_mask; 818 tex_options.lower_yx_xuxv_external = key_tex->yx_xuxv_image_mask; 819 tex_options.lower_xy_uxvx_external = key_tex->xy_uxvx_image_mask; 820 821 if (nir_lower_tex(nir, &tex_options)) { 822 nir_validate_shader(nir); 823 nir = brw_nir_optimize(nir, compiler, is_scalar); 824 } 825 826 return nir; 827 } 828 829 enum brw_reg_type 830 brw_type_for_nir_type(const struct gen_device_info *devinfo, nir_alu_type type) 831 { 832 switch (type) { 833 case nir_type_uint: 834 case nir_type_uint32: 835 return BRW_REGISTER_TYPE_UD; 836 case nir_type_bool: 837 case nir_type_int: 838 case nir_type_bool32: 839 case nir_type_int32: 840 return BRW_REGISTER_TYPE_D; 841 case nir_type_float: 842 case nir_type_float32: 843 return BRW_REGISTER_TYPE_F; 844 case nir_type_float16: 845 return BRW_REGISTER_TYPE_HF; 846 case nir_type_float64: 847 return BRW_REGISTER_TYPE_DF; 848 case nir_type_int64: 849 return devinfo->gen < 8 ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_Q; 850 case nir_type_uint64: 851 return devinfo->gen < 8 ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_UQ; 852 case nir_type_int16: 853 return BRW_REGISTER_TYPE_W; 854 case nir_type_uint16: 855 return BRW_REGISTER_TYPE_UW; 856 default: 857 unreachable("unknown type"); 858 } 859 860 return BRW_REGISTER_TYPE_F; 861 } 862 863 /* Returns the glsl_base_type corresponding to a nir_alu_type. 864 * This is used by both brw_vec4_nir and brw_fs_nir. 865 */ 866 enum glsl_base_type 867 brw_glsl_base_type_for_nir_type(nir_alu_type type) 868 { 869 switch (type) { 870 case nir_type_float: 871 case nir_type_float32: 872 return GLSL_TYPE_FLOAT; 873 874 case nir_type_float16: 875 return GLSL_TYPE_FLOAT16; 876 877 case nir_type_float64: 878 return GLSL_TYPE_DOUBLE; 879 880 case nir_type_int: 881 case nir_type_int32: 882 return GLSL_TYPE_INT; 883 884 case nir_type_uint: 885 case nir_type_uint32: 886 return GLSL_TYPE_UINT; 887 888 case nir_type_int16: 889 return GLSL_TYPE_INT16; 890 891 case nir_type_uint16: 892 return GLSL_TYPE_UINT16; 893 894 default: 895 unreachable("bad type"); 896 } 897 } 898