1 /* 2 * Copyright 2014 Connor Abbott 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Connor Abbott (cwabbott0 (at) gmail.com) 25 * 26 */ 27 28 #pragma once 29 30 #include "util/hash_table.h" 31 #include "compiler/glsl/list.h" 32 #include "GL/gl.h" /* GLenum */ 33 #include "util/list.h" 34 #include "util/ralloc.h" 35 #include "util/set.h" 36 #include "util/bitset.h" 37 #include "util/macros.h" 38 #include "compiler/nir_types.h" 39 #include "compiler/shader_enums.h" 40 #include "compiler/shader_info.h" 41 #include <stdio.h> 42 43 #include "nir_opcodes.h" 44 45 #ifdef __cplusplus 46 extern "C" { 47 #endif 48 49 struct gl_program; 50 struct gl_shader_program; 51 52 #define NIR_FALSE 0u 53 #define NIR_TRUE (~0u) 54 55 /** Defines a cast function 56 * 57 * This macro defines a cast function from in_type to out_type where 58 * out_type is some structure type that contains a field of type out_type. 59 * 60 * Note that you have to be a bit careful as the generated cast function 61 * destroys constness. 62 */ 63 #define NIR_DEFINE_CAST(name, in_type, out_type, field, \ 64 type_field, type_value) \ 65 static inline out_type * \ 66 name(const in_type *parent) \ 67 { \ 68 assert(parent && parent->type_field == type_value); \ 69 return exec_node_data(out_type, parent, field); \ 70 } 71 72 struct nir_function; 73 struct nir_shader; 74 struct nir_instr; 75 76 77 /** 78 * Description of built-in state associated with a uniform 79 * 80 * \sa nir_variable::state_slots 81 */ 82 typedef struct { 83 int tokens[5]; 84 int swizzle; 85 } nir_state_slot; 86 87 typedef enum { 88 nir_var_shader_in = (1 << 0), 89 nir_var_shader_out = (1 << 1), 90 nir_var_global = (1 << 2), 91 nir_var_local = (1 << 3), 92 nir_var_uniform = (1 << 4), 93 nir_var_shader_storage = (1 << 5), 94 nir_var_system_value = (1 << 6), 95 nir_var_param = (1 << 7), 96 nir_var_shared = (1 << 8), 97 nir_var_all = ~0, 98 } nir_variable_mode; 99 100 101 typedef union { 102 float f32[4]; 103 double f64[4]; 104 int32_t i32[4]; 105 uint32_t u32[4]; 106 int64_t i64[4]; 107 uint64_t u64[4]; 108 } nir_const_value; 109 110 typedef struct nir_constant { 111 /** 112 * Value of the constant. 113 * 114 * The field used to back the values supplied by the constant is determined 115 * by the type associated with the \c nir_variable. Constants may be 116 * scalars, vectors, or matrices. 117 */ 118 nir_const_value values[4]; 119 120 /* we could get this from the var->type but makes clone *much* easier to 121 * not have to care about the type. 122 */ 123 unsigned num_elements; 124 125 /* Array elements / Structure Fields */ 126 struct nir_constant **elements; 127 } nir_constant; 128 129 /** 130 * \brief Layout qualifiers for gl_FragDepth. 131 * 132 * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared 133 * with a layout qualifier. 134 */ 135 typedef enum { 136 nir_depth_layout_none, /**< No depth layout is specified. */ 137 nir_depth_layout_any, 138 nir_depth_layout_greater, 139 nir_depth_layout_less, 140 nir_depth_layout_unchanged 141 } nir_depth_layout; 142 143 /** 144 * Either a uniform, global variable, shader input, or shader output. Based on 145 * ir_variable - it should be easy to translate between the two. 146 */ 147 148 typedef struct nir_variable { 149 struct exec_node node; 150 151 /** 152 * Declared type of the variable 153 */ 154 const struct glsl_type *type; 155 156 /** 157 * Declared name of the variable 158 */ 159 char *name; 160 161 struct nir_variable_data { 162 /** 163 * Storage class of the variable. 164 * 165 * \sa nir_variable_mode 166 */ 167 nir_variable_mode mode; 168 169 /** 170 * Is the variable read-only? 171 * 172 * This is set for variables declared as \c const, shader inputs, 173 * and uniforms. 174 */ 175 unsigned read_only:1; 176 unsigned centroid:1; 177 unsigned sample:1; 178 unsigned patch:1; 179 unsigned invariant:1; 180 181 /** 182 * Interpolation mode for shader inputs / outputs 183 * 184 * \sa glsl_interp_mode 185 */ 186 unsigned interpolation:2; 187 188 /** 189 * \name ARB_fragment_coord_conventions 190 * @{ 191 */ 192 unsigned origin_upper_left:1; 193 unsigned pixel_center_integer:1; 194 /*@}*/ 195 196 /** 197 * If non-zero, then this variable may be packed along with other variables 198 * into a single varying slot, so this offset should be applied when 199 * accessing components. For example, an offset of 1 means that the x 200 * component of this variable is actually stored in component y of the 201 * location specified by \c location. 202 */ 203 unsigned location_frac:2; 204 205 /** 206 * If true, this variable represents an array of scalars that should 207 * be tightly packed. In other words, consecutive array elements 208 * should be stored one component apart, rather than one slot apart. 209 */ 210 bool compact:1; 211 212 /** 213 * Whether this is a fragment shader output implicitly initialized with 214 * the previous contents of the specified render target at the 215 * framebuffer location corresponding to this shader invocation. 216 */ 217 unsigned fb_fetch_output:1; 218 219 /** 220 * \brief Layout qualifier for gl_FragDepth. 221 * 222 * This is not equal to \c ir_depth_layout_none if and only if this 223 * variable is \c gl_FragDepth and a layout qualifier is specified. 224 */ 225 nir_depth_layout depth_layout; 226 227 /** 228 * Storage location of the base of this variable 229 * 230 * The precise meaning of this field depends on the nature of the variable. 231 * 232 * - Vertex shader input: one of the values from \c gl_vert_attrib. 233 * - Vertex shader output: one of the values from \c gl_varying_slot. 234 * - Geometry shader input: one of the values from \c gl_varying_slot. 235 * - Geometry shader output: one of the values from \c gl_varying_slot. 236 * - Fragment shader input: one of the values from \c gl_varying_slot. 237 * - Fragment shader output: one of the values from \c gl_frag_result. 238 * - Uniforms: Per-stage uniform slot number for default uniform block. 239 * - Uniforms: Index within the uniform block definition for UBO members. 240 * - Non-UBO Uniforms: uniform slot number. 241 * - Other: This field is not currently used. 242 * 243 * If the variable is a uniform, shader input, or shader output, and the 244 * slot has not been assigned, the value will be -1. 245 */ 246 int location; 247 248 /** 249 * The actual location of the variable in the IR. Only valid for inputs 250 * and outputs. 251 */ 252 unsigned int driver_location; 253 254 /** 255 * output index for dual source blending. 256 */ 257 int index; 258 259 /** 260 * Descriptor set binding for sampler or UBO. 261 */ 262 int descriptor_set; 263 264 /** 265 * Initial binding point for a sampler or UBO. 266 * 267 * For array types, this represents the binding point for the first element. 268 */ 269 int binding; 270 271 /** 272 * Location an atomic counter is stored at. 273 */ 274 unsigned offset; 275 276 /** 277 * ARB_shader_image_load_store qualifiers. 278 */ 279 struct { 280 bool read_only; /**< "readonly" qualifier. */ 281 bool write_only; /**< "writeonly" qualifier. */ 282 bool coherent; 283 bool _volatile; 284 bool restrict_flag; 285 286 /** Image internal format if specified explicitly, otherwise GL_NONE. */ 287 GLenum format; 288 } image; 289 } data; 290 291 /** 292 * Built-in state that backs this uniform 293 * 294 * Once set at variable creation, \c state_slots must remain invariant. 295 * This is because, ideally, this array would be shared by all clones of 296 * this variable in the IR tree. In other words, we'd really like for it 297 * to be a fly-weight. 298 * 299 * If the variable is not a uniform, \c num_state_slots will be zero and 300 * \c state_slots will be \c NULL. 301 */ 302 /*@{*/ 303 unsigned num_state_slots; /**< Number of state slots used */ 304 nir_state_slot *state_slots; /**< State descriptors. */ 305 /*@}*/ 306 307 /** 308 * Constant expression assigned in the initializer of the variable 309 * 310 * This field should only be used temporarily by creators of NIR shaders 311 * and then lower_constant_initializers can be used to get rid of them. 312 * Most of the rest of NIR ignores this field or asserts that it's NULL. 313 */ 314 nir_constant *constant_initializer; 315 316 /** 317 * For variables that are in an interface block or are an instance of an 318 * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block. 319 * 320 * \sa ir_variable::location 321 */ 322 const struct glsl_type *interface_type; 323 } nir_variable; 324 325 #define nir_foreach_variable(var, var_list) \ 326 foreach_list_typed(nir_variable, var, node, var_list) 327 328 #define nir_foreach_variable_safe(var, var_list) \ 329 foreach_list_typed_safe(nir_variable, var, node, var_list) 330 331 static inline bool 332 nir_variable_is_global(const nir_variable *var) 333 { 334 return var->data.mode != nir_var_local && var->data.mode != nir_var_param; 335 } 336 337 typedef struct nir_register { 338 struct exec_node node; 339 340 unsigned num_components; /** < number of vector components */ 341 unsigned num_array_elems; /** < size of array (0 for no array) */ 342 343 /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ 344 uint8_t bit_size; 345 346 /** generic register index. */ 347 unsigned index; 348 349 /** only for debug purposes, can be NULL */ 350 const char *name; 351 352 /** whether this register is local (per-function) or global (per-shader) */ 353 bool is_global; 354 355 /** 356 * If this flag is set to true, then accessing channels >= num_components 357 * is well-defined, and simply spills over to the next array element. This 358 * is useful for backends that can do per-component accessing, in 359 * particular scalar backends. By setting this flag and making 360 * num_components equal to 1, structures can be packed tightly into 361 * registers and then registers can be accessed per-component to get to 362 * each structure member, even if it crosses vec4 boundaries. 363 */ 364 bool is_packed; 365 366 /** set of nir_src's where this register is used (read from) */ 367 struct list_head uses; 368 369 /** set of nir_dest's where this register is defined (written to) */ 370 struct list_head defs; 371 372 /** set of nir_if's where this register is used as a condition */ 373 struct list_head if_uses; 374 } nir_register; 375 376 #define nir_foreach_register(reg, reg_list) \ 377 foreach_list_typed(nir_register, reg, node, reg_list) 378 #define nir_foreach_register_safe(reg, reg_list) \ 379 foreach_list_typed_safe(nir_register, reg, node, reg_list) 380 381 typedef enum { 382 nir_instr_type_alu, 383 nir_instr_type_call, 384 nir_instr_type_tex, 385 nir_instr_type_intrinsic, 386 nir_instr_type_load_const, 387 nir_instr_type_jump, 388 nir_instr_type_ssa_undef, 389 nir_instr_type_phi, 390 nir_instr_type_parallel_copy, 391 } nir_instr_type; 392 393 typedef struct nir_instr { 394 struct exec_node node; 395 nir_instr_type type; 396 struct nir_block *block; 397 398 /** generic instruction index. */ 399 unsigned index; 400 401 /* A temporary for optimization and analysis passes to use for storing 402 * flags. For instance, DCE uses this to store the "dead/live" info. 403 */ 404 uint8_t pass_flags; 405 } nir_instr; 406 407 static inline nir_instr * 408 nir_instr_next(nir_instr *instr) 409 { 410 struct exec_node *next = exec_node_get_next(&instr->node); 411 if (exec_node_is_tail_sentinel(next)) 412 return NULL; 413 else 414 return exec_node_data(nir_instr, next, node); 415 } 416 417 static inline nir_instr * 418 nir_instr_prev(nir_instr *instr) 419 { 420 struct exec_node *prev = exec_node_get_prev(&instr->node); 421 if (exec_node_is_head_sentinel(prev)) 422 return NULL; 423 else 424 return exec_node_data(nir_instr, prev, node); 425 } 426 427 static inline bool 428 nir_instr_is_first(nir_instr *instr) 429 { 430 return exec_node_is_head_sentinel(exec_node_get_prev(&instr->node)); 431 } 432 433 static inline bool 434 nir_instr_is_last(nir_instr *instr) 435 { 436 return exec_node_is_tail_sentinel(exec_node_get_next(&instr->node)); 437 } 438 439 typedef struct nir_ssa_def { 440 /** for debugging only, can be NULL */ 441 const char* name; 442 443 /** generic SSA definition index. */ 444 unsigned index; 445 446 /** Index into the live_in and live_out bitfields */ 447 unsigned live_index; 448 449 nir_instr *parent_instr; 450 451 /** set of nir_instr's where this register is used (read from) */ 452 struct list_head uses; 453 454 /** set of nir_if's where this register is used as a condition */ 455 struct list_head if_uses; 456 457 uint8_t num_components; 458 459 /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */ 460 uint8_t bit_size; 461 } nir_ssa_def; 462 463 struct nir_src; 464 465 typedef struct { 466 nir_register *reg; 467 struct nir_src *indirect; /** < NULL for no indirect offset */ 468 unsigned base_offset; 469 470 /* TODO use-def chain goes here */ 471 } nir_reg_src; 472 473 typedef struct { 474 nir_instr *parent_instr; 475 struct list_head def_link; 476 477 nir_register *reg; 478 struct nir_src *indirect; /** < NULL for no indirect offset */ 479 unsigned base_offset; 480 481 /* TODO def-use chain goes here */ 482 } nir_reg_dest; 483 484 struct nir_if; 485 486 typedef struct nir_src { 487 union { 488 nir_instr *parent_instr; 489 struct nir_if *parent_if; 490 }; 491 492 struct list_head use_link; 493 494 union { 495 nir_reg_src reg; 496 nir_ssa_def *ssa; 497 }; 498 499 bool is_ssa; 500 } nir_src; 501 502 static inline nir_src 503 nir_src_init(void) 504 { 505 nir_src src = { { NULL } }; 506 return src; 507 } 508 509 #define NIR_SRC_INIT nir_src_init() 510 511 #define nir_foreach_use(src, reg_or_ssa_def) \ 512 list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link) 513 514 #define nir_foreach_use_safe(src, reg_or_ssa_def) \ 515 list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link) 516 517 #define nir_foreach_if_use(src, reg_or_ssa_def) \ 518 list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) 519 520 #define nir_foreach_if_use_safe(src, reg_or_ssa_def) \ 521 list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) 522 523 typedef struct { 524 union { 525 nir_reg_dest reg; 526 nir_ssa_def ssa; 527 }; 528 529 bool is_ssa; 530 } nir_dest; 531 532 static inline nir_dest 533 nir_dest_init(void) 534 { 535 nir_dest dest = { { { NULL } } }; 536 return dest; 537 } 538 539 #define NIR_DEST_INIT nir_dest_init() 540 541 #define nir_foreach_def(dest, reg) \ 542 list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link) 543 544 #define nir_foreach_def_safe(dest, reg) \ 545 list_for_each_entry_safe(nir_dest, dest, &(reg)->defs, reg.def_link) 546 547 static inline nir_src 548 nir_src_for_ssa(nir_ssa_def *def) 549 { 550 nir_src src = NIR_SRC_INIT; 551 552 src.is_ssa = true; 553 src.ssa = def; 554 555 return src; 556 } 557 558 static inline nir_src 559 nir_src_for_reg(nir_register *reg) 560 { 561 nir_src src = NIR_SRC_INIT; 562 563 src.is_ssa = false; 564 src.reg.reg = reg; 565 src.reg.indirect = NULL; 566 src.reg.base_offset = 0; 567 568 return src; 569 } 570 571 static inline nir_dest 572 nir_dest_for_reg(nir_register *reg) 573 { 574 nir_dest dest = NIR_DEST_INIT; 575 576 dest.reg.reg = reg; 577 578 return dest; 579 } 580 581 static inline unsigned 582 nir_src_bit_size(nir_src src) 583 { 584 return src.is_ssa ? src.ssa->bit_size : src.reg.reg->bit_size; 585 } 586 587 static inline unsigned 588 nir_dest_bit_size(nir_dest dest) 589 { 590 return dest.is_ssa ? dest.ssa.bit_size : dest.reg.reg->bit_size; 591 } 592 593 void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if); 594 void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr); 595 596 typedef struct { 597 nir_src src; 598 599 /** 600 * \name input modifiers 601 */ 602 /*@{*/ 603 /** 604 * For inputs interpreted as floating point, flips the sign bit. For 605 * inputs interpreted as integers, performs the two's complement negation. 606 */ 607 bool negate; 608 609 /** 610 * Clears the sign bit for floating point values, and computes the integer 611 * absolute value for integers. Note that the negate modifier acts after 612 * the absolute value modifier, therefore if both are set then all inputs 613 * will become negative. 614 */ 615 bool abs; 616 /*@}*/ 617 618 /** 619 * For each input component, says which component of the register it is 620 * chosen from. Note that which elements of the swizzle are used and which 621 * are ignored are based on the write mask for most opcodes - for example, 622 * a statement like "foo.xzw = bar.zyx" would have a writemask of 1101b and 623 * a swizzle of {2, x, 1, 0} where x means "don't care." 624 */ 625 uint8_t swizzle[4]; 626 } nir_alu_src; 627 628 typedef struct { 629 nir_dest dest; 630 631 /** 632 * \name saturate output modifier 633 * 634 * Only valid for opcodes that output floating-point numbers. Clamps the 635 * output to between 0.0 and 1.0 inclusive. 636 */ 637 638 bool saturate; 639 640 unsigned write_mask : 4; /* ignored if dest.is_ssa is true */ 641 } nir_alu_dest; 642 643 typedef enum { 644 nir_type_invalid = 0, /* Not a valid type */ 645 nir_type_float, 646 nir_type_int, 647 nir_type_uint, 648 nir_type_bool, 649 nir_type_bool32 = 32 | nir_type_bool, 650 nir_type_int8 = 8 | nir_type_int, 651 nir_type_int16 = 16 | nir_type_int, 652 nir_type_int32 = 32 | nir_type_int, 653 nir_type_int64 = 64 | nir_type_int, 654 nir_type_uint8 = 8 | nir_type_uint, 655 nir_type_uint16 = 16 | nir_type_uint, 656 nir_type_uint32 = 32 | nir_type_uint, 657 nir_type_uint64 = 64 | nir_type_uint, 658 nir_type_float16 = 16 | nir_type_float, 659 nir_type_float32 = 32 | nir_type_float, 660 nir_type_float64 = 64 | nir_type_float, 661 } nir_alu_type; 662 663 #define NIR_ALU_TYPE_SIZE_MASK 0xfffffff8 664 #define NIR_ALU_TYPE_BASE_TYPE_MASK 0x00000007 665 666 static inline unsigned 667 nir_alu_type_get_type_size(nir_alu_type type) 668 { 669 return type & NIR_ALU_TYPE_SIZE_MASK; 670 } 671 672 static inline unsigned 673 nir_alu_type_get_base_type(nir_alu_type type) 674 { 675 return type & NIR_ALU_TYPE_BASE_TYPE_MASK; 676 } 677 678 static inline nir_alu_type 679 nir_get_nir_type_for_glsl_type(const struct glsl_type *type) 680 { 681 switch (glsl_get_base_type(type)) { 682 case GLSL_TYPE_BOOL: 683 return nir_type_bool32; 684 break; 685 case GLSL_TYPE_UINT: 686 return nir_type_uint32; 687 break; 688 case GLSL_TYPE_INT: 689 return nir_type_int32; 690 break; 691 case GLSL_TYPE_FLOAT: 692 return nir_type_float32; 693 break; 694 case GLSL_TYPE_DOUBLE: 695 return nir_type_float64; 696 break; 697 default: 698 unreachable("unknown type"); 699 } 700 } 701 702 nir_op nir_type_conversion_op(nir_alu_type src, nir_alu_type dst); 703 704 typedef enum { 705 NIR_OP_IS_COMMUTATIVE = (1 << 0), 706 NIR_OP_IS_ASSOCIATIVE = (1 << 1), 707 } nir_op_algebraic_property; 708 709 typedef struct { 710 const char *name; 711 712 unsigned num_inputs; 713 714 /** 715 * The number of components in the output 716 * 717 * If non-zero, this is the size of the output and input sizes are 718 * explicitly given; swizzle and writemask are still in effect, but if 719 * the output component is masked out, then the input component may 720 * still be in use. 721 * 722 * If zero, the opcode acts in the standard, per-component manner; the 723 * operation is performed on each component (except the ones that are 724 * masked out) with the input being taken from the input swizzle for 725 * that component. 726 * 727 * The size of some of the inputs may be given (i.e. non-zero) even 728 * though output_size is zero; in that case, the inputs with a zero 729 * size act per-component, while the inputs with non-zero size don't. 730 */ 731 unsigned output_size; 732 733 /** 734 * The type of vector that the instruction outputs. Note that the 735 * staurate modifier is only allowed on outputs with the float type. 736 */ 737 738 nir_alu_type output_type; 739 740 /** 741 * The number of components in each input 742 */ 743 unsigned input_sizes[4]; 744 745 /** 746 * The type of vector that each input takes. Note that negate and 747 * absolute value are only allowed on inputs with int or float type and 748 * behave differently on the two. 749 */ 750 nir_alu_type input_types[4]; 751 752 nir_op_algebraic_property algebraic_properties; 753 } nir_op_info; 754 755 extern const nir_op_info nir_op_infos[nir_num_opcodes]; 756 757 typedef struct nir_alu_instr { 758 nir_instr instr; 759 nir_op op; 760 761 /** Indicates that this ALU instruction generates an exact value 762 * 763 * This is kind of a mixture of GLSL "precise" and "invariant" and not 764 * really equivalent to either. This indicates that the value generated by 765 * this operation is high-precision and any code transformations that touch 766 * it must ensure that the resulting value is bit-for-bit identical to the 767 * original. 768 */ 769 bool exact; 770 771 nir_alu_dest dest; 772 nir_alu_src src[]; 773 } nir_alu_instr; 774 775 void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, 776 nir_alu_instr *instr); 777 void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, 778 nir_alu_instr *instr); 779 780 /* is this source channel used? */ 781 static inline bool 782 nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned channel) 783 { 784 if (nir_op_infos[instr->op].input_sizes[src] > 0) 785 return channel < nir_op_infos[instr->op].input_sizes[src]; 786 787 return (instr->dest.write_mask >> channel) & 1; 788 } 789 790 /* 791 * For instructions whose destinations are SSA, get the number of channels 792 * used for a source 793 */ 794 static inline unsigned 795 nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src) 796 { 797 assert(instr->dest.dest.is_ssa); 798 799 if (nir_op_infos[instr->op].input_sizes[src] > 0) 800 return nir_op_infos[instr->op].input_sizes[src]; 801 802 return instr->dest.dest.ssa.num_components; 803 } 804 805 bool nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2, 806 unsigned src1, unsigned src2); 807 808 typedef enum { 809 nir_deref_type_var, 810 nir_deref_type_array, 811 nir_deref_type_struct 812 } nir_deref_type; 813 814 typedef struct nir_deref { 815 nir_deref_type deref_type; 816 struct nir_deref *child; 817 const struct glsl_type *type; 818 } nir_deref; 819 820 typedef struct { 821 nir_deref deref; 822 823 nir_variable *var; 824 } nir_deref_var; 825 826 /* This enum describes how the array is referenced. If the deref is 827 * direct then the base_offset is used. If the deref is indirect then 828 * offset is given by base_offset + indirect. If the deref is a wildcard 829 * then the deref refers to all of the elements of the array at the same 830 * time. Wildcard dereferences are only ever allowed in copy_var 831 * intrinsics and the source and destination derefs must have matching 832 * wildcards. 833 */ 834 typedef enum { 835 nir_deref_array_type_direct, 836 nir_deref_array_type_indirect, 837 nir_deref_array_type_wildcard, 838 } nir_deref_array_type; 839 840 typedef struct { 841 nir_deref deref; 842 843 nir_deref_array_type deref_array_type; 844 unsigned base_offset; 845 nir_src indirect; 846 } nir_deref_array; 847 848 typedef struct { 849 nir_deref deref; 850 851 unsigned index; 852 } nir_deref_struct; 853 854 NIR_DEFINE_CAST(nir_deref_as_var, nir_deref, nir_deref_var, deref, 855 deref_type, nir_deref_type_var) 856 NIR_DEFINE_CAST(nir_deref_as_array, nir_deref, nir_deref_array, deref, 857 deref_type, nir_deref_type_array) 858 NIR_DEFINE_CAST(nir_deref_as_struct, nir_deref, nir_deref_struct, deref, 859 deref_type, nir_deref_type_struct) 860 861 /* Returns the last deref in the chain. */ 862 static inline nir_deref * 863 nir_deref_tail(nir_deref *deref) 864 { 865 while (deref->child) 866 deref = deref->child; 867 return deref; 868 } 869 870 typedef struct { 871 nir_instr instr; 872 873 unsigned num_params; 874 nir_deref_var **params; 875 nir_deref_var *return_deref; 876 877 struct nir_function *callee; 878 } nir_call_instr; 879 880 #define INTRINSIC(name, num_srcs, src_components, has_dest, dest_components, \ 881 num_variables, num_indices, idx0, idx1, idx2, flags) \ 882 nir_intrinsic_##name, 883 884 #define LAST_INTRINSIC(name) nir_last_intrinsic = nir_intrinsic_##name, 885 886 typedef enum { 887 #include "nir_intrinsics.h" 888 nir_num_intrinsics = nir_last_intrinsic + 1 889 } nir_intrinsic_op; 890 891 #define NIR_INTRINSIC_MAX_CONST_INDEX 3 892 893 /** Represents an intrinsic 894 * 895 * An intrinsic is an instruction type for handling things that are 896 * more-or-less regular operations but don't just consume and produce SSA 897 * values like ALU operations do. Intrinsics are not for things that have 898 * special semantic meaning such as phi nodes and parallel copies. 899 * Examples of intrinsics include variable load/store operations, system 900 * value loads, and the like. Even though texturing more-or-less falls 901 * under this category, texturing is its own instruction type because 902 * trying to represent texturing with intrinsics would lead to a 903 * combinatorial explosion of intrinsic opcodes. 904 * 905 * By having a single instruction type for handling a lot of different 906 * cases, optimization passes can look for intrinsics and, for the most 907 * part, completely ignore them. Each intrinsic type also has a few 908 * possible flags that govern whether or not they can be reordered or 909 * eliminated. That way passes like dead code elimination can still work 910 * on intrisics without understanding the meaning of each. 911 * 912 * Each intrinsic has some number of constant indices, some number of 913 * variables, and some number of sources. What these sources, variables, 914 * and indices mean depends on the intrinsic and is documented with the 915 * intrinsic declaration in nir_intrinsics.h. Intrinsics and texture 916 * instructions are the only types of instruction that can operate on 917 * variables. 918 */ 919 typedef struct { 920 nir_instr instr; 921 922 nir_intrinsic_op intrinsic; 923 924 nir_dest dest; 925 926 /** number of components if this is a vectorized intrinsic 927 * 928 * Similarly to ALU operations, some intrinsics are vectorized. 929 * An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0. 930 * For vectorized intrinsics, the num_components field specifies the 931 * number of destination components and the number of source components 932 * for all sources with nir_intrinsic_infos.src_components[i] == 0. 933 */ 934 uint8_t num_components; 935 936 int const_index[NIR_INTRINSIC_MAX_CONST_INDEX]; 937 938 nir_deref_var *variables[2]; 939 940 nir_src src[]; 941 } nir_intrinsic_instr; 942 943 /** 944 * \name NIR intrinsics semantic flags 945 * 946 * information about what the compiler can do with the intrinsics. 947 * 948 * \sa nir_intrinsic_info::flags 949 */ 950 typedef enum { 951 /** 952 * whether the intrinsic can be safely eliminated if none of its output 953 * value is not being used. 954 */ 955 NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0), 956 957 /** 958 * Whether the intrinsic can be reordered with respect to any other 959 * intrinsic, i.e. whether the only reordering dependencies of the 960 * intrinsic are due to the register reads/writes. 961 */ 962 NIR_INTRINSIC_CAN_REORDER = (1 << 1), 963 } nir_intrinsic_semantic_flag; 964 965 /** 966 * \name NIR intrinsics const-index flag 967 * 968 * Indicates the usage of a const_index slot. 969 * 970 * \sa nir_intrinsic_info::index_map 971 */ 972 typedef enum { 973 /** 974 * Generally instructions that take a offset src argument, can encode 975 * a constant 'base' value which is added to the offset. 976 */ 977 NIR_INTRINSIC_BASE = 1, 978 979 /** 980 * For store instructions, a writemask for the store. 981 */ 982 NIR_INTRINSIC_WRMASK = 2, 983 984 /** 985 * The stream-id for GS emit_vertex/end_primitive intrinsics. 986 */ 987 NIR_INTRINSIC_STREAM_ID = 3, 988 989 /** 990 * The clip-plane id for load_user_clip_plane intrinsic. 991 */ 992 NIR_INTRINSIC_UCP_ID = 4, 993 994 /** 995 * The amount of data, starting from BASE, that this instruction may 996 * access. This is used to provide bounds if the offset is not constant. 997 */ 998 NIR_INTRINSIC_RANGE = 5, 999 1000 /** 1001 * The Vulkan descriptor set for vulkan_resource_index intrinsic. 1002 */ 1003 NIR_INTRINSIC_DESC_SET = 6, 1004 1005 /** 1006 * The Vulkan descriptor set binding for vulkan_resource_index intrinsic. 1007 */ 1008 NIR_INTRINSIC_BINDING = 7, 1009 1010 /** 1011 * Component offset. 1012 */ 1013 NIR_INTRINSIC_COMPONENT = 8, 1014 1015 /** 1016 * Interpolation mode (only meaningful for FS inputs). 1017 */ 1018 NIR_INTRINSIC_INTERP_MODE = 9, 1019 1020 NIR_INTRINSIC_NUM_INDEX_FLAGS, 1021 1022 } nir_intrinsic_index_flag; 1023 1024 #define NIR_INTRINSIC_MAX_INPUTS 4 1025 1026 typedef struct { 1027 const char *name; 1028 1029 unsigned num_srcs; /** < number of register/SSA inputs */ 1030 1031 /** number of components of each input register 1032 * 1033 * If this value is 0, the number of components is given by the 1034 * num_components field of nir_intrinsic_instr. 1035 */ 1036 unsigned src_components[NIR_INTRINSIC_MAX_INPUTS]; 1037 1038 bool has_dest; 1039 1040 /** number of components of the output register 1041 * 1042 * If this value is 0, the number of components is given by the 1043 * num_components field of nir_intrinsic_instr. 1044 */ 1045 unsigned dest_components; 1046 1047 /** the number of inputs/outputs that are variables */ 1048 unsigned num_variables; 1049 1050 /** the number of constant indices used by the intrinsic */ 1051 unsigned num_indices; 1052 1053 /** indicates the usage of intr->const_index[n] */ 1054 unsigned index_map[NIR_INTRINSIC_NUM_INDEX_FLAGS]; 1055 1056 /** semantic flags for calls to this intrinsic */ 1057 nir_intrinsic_semantic_flag flags; 1058 } nir_intrinsic_info; 1059 1060 extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics]; 1061 1062 1063 #define INTRINSIC_IDX_ACCESSORS(name, flag, type) \ 1064 static inline type \ 1065 nir_intrinsic_##name(nir_intrinsic_instr *instr) \ 1066 { \ 1067 const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; \ 1068 assert(info->index_map[NIR_INTRINSIC_##flag] > 0); \ 1069 return instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1]; \ 1070 } \ 1071 static inline void \ 1072 nir_intrinsic_set_##name(nir_intrinsic_instr *instr, type val) \ 1073 { \ 1074 const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; \ 1075 assert(info->index_map[NIR_INTRINSIC_##flag] > 0); \ 1076 instr->const_index[info->index_map[NIR_INTRINSIC_##flag] - 1] = val; \ 1077 } 1078 1079 INTRINSIC_IDX_ACCESSORS(write_mask, WRMASK, unsigned) 1080 INTRINSIC_IDX_ACCESSORS(base, BASE, int) 1081 INTRINSIC_IDX_ACCESSORS(stream_id, STREAM_ID, unsigned) 1082 INTRINSIC_IDX_ACCESSORS(ucp_id, UCP_ID, unsigned) 1083 INTRINSIC_IDX_ACCESSORS(range, RANGE, unsigned) 1084 INTRINSIC_IDX_ACCESSORS(desc_set, DESC_SET, unsigned) 1085 INTRINSIC_IDX_ACCESSORS(binding, BINDING, unsigned) 1086 INTRINSIC_IDX_ACCESSORS(component, COMPONENT, unsigned) 1087 INTRINSIC_IDX_ACCESSORS(interp_mode, INTERP_MODE, unsigned) 1088 1089 /** 1090 * \group texture information 1091 * 1092 * This gives semantic information about textures which is useful to the 1093 * frontend, the backend, and lowering passes, but not the optimizer. 1094 */ 1095 1096 typedef enum { 1097 nir_tex_src_coord, 1098 nir_tex_src_projector, 1099 nir_tex_src_comparator, /* shadow comparator */ 1100 nir_tex_src_offset, 1101 nir_tex_src_bias, 1102 nir_tex_src_lod, 1103 nir_tex_src_ms_index, /* MSAA sample index */ 1104 nir_tex_src_ms_mcs, /* MSAA compression value */ 1105 nir_tex_src_ddx, 1106 nir_tex_src_ddy, 1107 nir_tex_src_texture_offset, /* < dynamically uniform indirect offset */ 1108 nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */ 1109 nir_tex_src_plane, /* < selects plane for planar textures */ 1110 nir_num_tex_src_types 1111 } nir_tex_src_type; 1112 1113 typedef struct { 1114 nir_src src; 1115 nir_tex_src_type src_type; 1116 } nir_tex_src; 1117 1118 typedef enum { 1119 nir_texop_tex, /**< Regular texture look-up */ 1120 nir_texop_txb, /**< Texture look-up with LOD bias */ 1121 nir_texop_txl, /**< Texture look-up with explicit LOD */ 1122 nir_texop_txd, /**< Texture look-up with partial derivatvies */ 1123 nir_texop_txf, /**< Texel fetch with explicit LOD */ 1124 nir_texop_txf_ms, /**< Multisample texture fetch */ 1125 nir_texop_txf_ms_mcs, /**< Multisample compression value fetch */ 1126 nir_texop_txs, /**< Texture size */ 1127 nir_texop_lod, /**< Texture lod query */ 1128 nir_texop_tg4, /**< Texture gather */ 1129 nir_texop_query_levels, /**< Texture levels query */ 1130 nir_texop_texture_samples, /**< Texture samples query */ 1131 nir_texop_samples_identical, /**< Query whether all samples are definitely 1132 * identical. 1133 */ 1134 } nir_texop; 1135 1136 typedef struct { 1137 nir_instr instr; 1138 1139 enum glsl_sampler_dim sampler_dim; 1140 nir_alu_type dest_type; 1141 1142 nir_texop op; 1143 nir_dest dest; 1144 nir_tex_src *src; 1145 unsigned num_srcs, coord_components; 1146 bool is_array, is_shadow; 1147 1148 /** 1149 * If is_shadow is true, whether this is the old-style shadow that outputs 4 1150 * components or the new-style shadow that outputs 1 component. 1151 */ 1152 bool is_new_style_shadow; 1153 1154 /* gather component selector */ 1155 unsigned component : 2; 1156 1157 /** The texture index 1158 * 1159 * If this texture instruction has a nir_tex_src_texture_offset source, 1160 * then the texture index is given by texture_index + texture_offset. 1161 */ 1162 unsigned texture_index; 1163 1164 /** The size of the texture array or 0 if it's not an array */ 1165 unsigned texture_array_size; 1166 1167 /** The texture deref 1168 * 1169 * If this is null, use texture_index instead. 1170 */ 1171 nir_deref_var *texture; 1172 1173 /** The sampler index 1174 * 1175 * The following operations do not require a sampler and, as such, this 1176 * field should be ignored: 1177 * - nir_texop_txf 1178 * - nir_texop_txf_ms 1179 * - nir_texop_txs 1180 * - nir_texop_lod 1181 * - nir_texop_tg4 1182 * - nir_texop_query_levels 1183 * - nir_texop_texture_samples 1184 * - nir_texop_samples_identical 1185 * 1186 * If this texture instruction has a nir_tex_src_sampler_offset source, 1187 * then the sampler index is given by sampler_index + sampler_offset. 1188 */ 1189 unsigned sampler_index; 1190 1191 /** The sampler deref 1192 * 1193 * If this is null, use sampler_index instead. 1194 */ 1195 nir_deref_var *sampler; 1196 } nir_tex_instr; 1197 1198 static inline unsigned 1199 nir_tex_instr_dest_size(nir_tex_instr *instr) 1200 { 1201 switch (instr->op) { 1202 case nir_texop_txs: { 1203 unsigned ret; 1204 switch (instr->sampler_dim) { 1205 case GLSL_SAMPLER_DIM_1D: 1206 case GLSL_SAMPLER_DIM_BUF: 1207 ret = 1; 1208 break; 1209 case GLSL_SAMPLER_DIM_2D: 1210 case GLSL_SAMPLER_DIM_CUBE: 1211 case GLSL_SAMPLER_DIM_MS: 1212 case GLSL_SAMPLER_DIM_RECT: 1213 case GLSL_SAMPLER_DIM_EXTERNAL: 1214 case GLSL_SAMPLER_DIM_SUBPASS: 1215 ret = 2; 1216 break; 1217 case GLSL_SAMPLER_DIM_3D: 1218 ret = 3; 1219 break; 1220 default: 1221 unreachable("not reached"); 1222 } 1223 if (instr->is_array) 1224 ret++; 1225 return ret; 1226 } 1227 1228 case nir_texop_lod: 1229 return 2; 1230 1231 case nir_texop_texture_samples: 1232 case nir_texop_query_levels: 1233 case nir_texop_samples_identical: 1234 return 1; 1235 1236 default: 1237 if (instr->is_shadow && instr->is_new_style_shadow) 1238 return 1; 1239 1240 return 4; 1241 } 1242 } 1243 1244 /* Returns true if this texture operation queries something about the texture 1245 * rather than actually sampling it. 1246 */ 1247 static inline bool 1248 nir_tex_instr_is_query(nir_tex_instr *instr) 1249 { 1250 switch (instr->op) { 1251 case nir_texop_txs: 1252 case nir_texop_lod: 1253 case nir_texop_texture_samples: 1254 case nir_texop_query_levels: 1255 case nir_texop_txf_ms_mcs: 1256 return true; 1257 case nir_texop_tex: 1258 case nir_texop_txb: 1259 case nir_texop_txl: 1260 case nir_texop_txd: 1261 case nir_texop_txf: 1262 case nir_texop_txf_ms: 1263 case nir_texop_tg4: 1264 return false; 1265 default: 1266 unreachable("Invalid texture opcode"); 1267 } 1268 } 1269 1270 static inline nir_alu_type 1271 nir_tex_instr_src_type(nir_tex_instr *instr, unsigned src) 1272 { 1273 switch (instr->src[src].src_type) { 1274 case nir_tex_src_coord: 1275 switch (instr->op) { 1276 case nir_texop_txf: 1277 case nir_texop_txf_ms: 1278 case nir_texop_txf_ms_mcs: 1279 case nir_texop_samples_identical: 1280 return nir_type_int; 1281 1282 default: 1283 return nir_type_float; 1284 } 1285 1286 case nir_tex_src_lod: 1287 switch (instr->op) { 1288 case nir_texop_txs: 1289 case nir_texop_txf: 1290 return nir_type_int; 1291 1292 default: 1293 return nir_type_float; 1294 } 1295 1296 case nir_tex_src_projector: 1297 case nir_tex_src_comparator: 1298 case nir_tex_src_bias: 1299 case nir_tex_src_ddx: 1300 case nir_tex_src_ddy: 1301 return nir_type_float; 1302 1303 case nir_tex_src_offset: 1304 case nir_tex_src_ms_index: 1305 case nir_tex_src_texture_offset: 1306 case nir_tex_src_sampler_offset: 1307 return nir_type_int; 1308 1309 default: 1310 unreachable("Invalid texture source type"); 1311 } 1312 } 1313 1314 static inline unsigned 1315 nir_tex_instr_src_size(nir_tex_instr *instr, unsigned src) 1316 { 1317 if (instr->src[src].src_type == nir_tex_src_coord) 1318 return instr->coord_components; 1319 1320 /* The MCS value is expected to be a vec4 returned by a txf_ms_mcs */ 1321 if (instr->src[src].src_type == nir_tex_src_ms_mcs) 1322 return 4; 1323 1324 if (instr->src[src].src_type == nir_tex_src_offset || 1325 instr->src[src].src_type == nir_tex_src_ddx || 1326 instr->src[src].src_type == nir_tex_src_ddy) { 1327 if (instr->is_array) 1328 return instr->coord_components - 1; 1329 else 1330 return instr->coord_components; 1331 } 1332 1333 return 1; 1334 } 1335 1336 static inline int 1337 nir_tex_instr_src_index(nir_tex_instr *instr, nir_tex_src_type type) 1338 { 1339 for (unsigned i = 0; i < instr->num_srcs; i++) 1340 if (instr->src[i].src_type == type) 1341 return (int) i; 1342 1343 return -1; 1344 } 1345 1346 void nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx); 1347 1348 typedef struct { 1349 nir_instr instr; 1350 1351 nir_const_value value; 1352 1353 nir_ssa_def def; 1354 } nir_load_const_instr; 1355 1356 typedef enum { 1357 nir_jump_return, 1358 nir_jump_break, 1359 nir_jump_continue, 1360 } nir_jump_type; 1361 1362 typedef struct { 1363 nir_instr instr; 1364 nir_jump_type type; 1365 } nir_jump_instr; 1366 1367 /* creates a new SSA variable in an undefined state */ 1368 1369 typedef struct { 1370 nir_instr instr; 1371 nir_ssa_def def; 1372 } nir_ssa_undef_instr; 1373 1374 typedef struct { 1375 struct exec_node node; 1376 1377 /* The predecessor block corresponding to this source */ 1378 struct nir_block *pred; 1379 1380 nir_src src; 1381 } nir_phi_src; 1382 1383 #define nir_foreach_phi_src(phi_src, phi) \ 1384 foreach_list_typed(nir_phi_src, phi_src, node, &(phi)->srcs) 1385 #define nir_foreach_phi_src_safe(phi_src, phi) \ 1386 foreach_list_typed_safe(nir_phi_src, phi_src, node, &(phi)->srcs) 1387 1388 typedef struct { 1389 nir_instr instr; 1390 1391 struct exec_list srcs; /** < list of nir_phi_src */ 1392 1393 nir_dest dest; 1394 } nir_phi_instr; 1395 1396 typedef struct { 1397 struct exec_node node; 1398 nir_src src; 1399 nir_dest dest; 1400 } nir_parallel_copy_entry; 1401 1402 #define nir_foreach_parallel_copy_entry(entry, pcopy) \ 1403 foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries) 1404 1405 typedef struct { 1406 nir_instr instr; 1407 1408 /* A list of nir_parallel_copy_entry's. The sources of all of the 1409 * entries are copied to the corresponding destinations "in parallel". 1410 * In other words, if we have two entries: a -> b and b -> a, the values 1411 * get swapped. 1412 */ 1413 struct exec_list entries; 1414 } nir_parallel_copy_instr; 1415 1416 NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr, 1417 type, nir_instr_type_alu) 1418 NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr, 1419 type, nir_instr_type_call) 1420 NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr, 1421 type, nir_instr_type_jump) 1422 NIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr, 1423 type, nir_instr_type_tex) 1424 NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr, 1425 type, nir_instr_type_intrinsic) 1426 NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr, 1427 type, nir_instr_type_load_const) 1428 NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr, 1429 type, nir_instr_type_ssa_undef) 1430 NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr, 1431 type, nir_instr_type_phi) 1432 NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr, 1433 nir_parallel_copy_instr, instr, 1434 type, nir_instr_type_parallel_copy) 1435 1436 /* 1437 * Control flow 1438 * 1439 * Control flow consists of a tree of control flow nodes, which include 1440 * if-statements and loops. The leaves of the tree are basic blocks, lists of 1441 * instructions that always run start-to-finish. Each basic block also keeps 1442 * track of its successors (blocks which may run immediately after the current 1443 * block) and predecessors (blocks which could have run immediately before the 1444 * current block). Each function also has a start block and an end block which 1445 * all return statements point to (which is always empty). Together, all the 1446 * blocks with their predecessors and successors make up the control flow 1447 * graph (CFG) of the function. There are helpers that modify the tree of 1448 * control flow nodes while modifying the CFG appropriately; these should be 1449 * used instead of modifying the tree directly. 1450 */ 1451 1452 typedef enum { 1453 nir_cf_node_block, 1454 nir_cf_node_if, 1455 nir_cf_node_loop, 1456 nir_cf_node_function 1457 } nir_cf_node_type; 1458 1459 typedef struct nir_cf_node { 1460 struct exec_node node; 1461 nir_cf_node_type type; 1462 struct nir_cf_node *parent; 1463 } nir_cf_node; 1464 1465 typedef struct nir_block { 1466 nir_cf_node cf_node; 1467 1468 struct exec_list instr_list; /** < list of nir_instr */ 1469 1470 /** generic block index; generated by nir_index_blocks */ 1471 unsigned index; 1472 1473 /* 1474 * Each block can only have up to 2 successors, so we put them in a simple 1475 * array - no need for anything more complicated. 1476 */ 1477 struct nir_block *successors[2]; 1478 1479 /* Set of nir_block predecessors in the CFG */ 1480 struct set *predecessors; 1481 1482 /* 1483 * this node's immediate dominator in the dominance tree - set to NULL for 1484 * the start block. 1485 */ 1486 struct nir_block *imm_dom; 1487 1488 /* This node's children in the dominance tree */ 1489 unsigned num_dom_children; 1490 struct nir_block **dom_children; 1491 1492 /* Set of nir_block's on the dominance frontier of this block */ 1493 struct set *dom_frontier; 1494 1495 /* 1496 * These two indices have the property that dom_{pre,post}_index for each 1497 * child of this block in the dominance tree will always be between 1498 * dom_pre_index and dom_post_index for this block, which makes testing if 1499 * a given block is dominated by another block an O(1) operation. 1500 */ 1501 unsigned dom_pre_index, dom_post_index; 1502 1503 /* live in and out for this block; used for liveness analysis */ 1504 BITSET_WORD *live_in; 1505 BITSET_WORD *live_out; 1506 } nir_block; 1507 1508 static inline nir_instr * 1509 nir_block_first_instr(nir_block *block) 1510 { 1511 struct exec_node *head = exec_list_get_head(&block->instr_list); 1512 return exec_node_data(nir_instr, head, node); 1513 } 1514 1515 static inline nir_instr * 1516 nir_block_last_instr(nir_block *block) 1517 { 1518 struct exec_node *tail = exec_list_get_tail(&block->instr_list); 1519 return exec_node_data(nir_instr, tail, node); 1520 } 1521 1522 #define nir_foreach_instr(instr, block) \ 1523 foreach_list_typed(nir_instr, instr, node, &(block)->instr_list) 1524 #define nir_foreach_instr_reverse(instr, block) \ 1525 foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list) 1526 #define nir_foreach_instr_safe(instr, block) \ 1527 foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list) 1528 #define nir_foreach_instr_reverse_safe(instr, block) \ 1529 foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list) 1530 1531 typedef struct nir_if { 1532 nir_cf_node cf_node; 1533 nir_src condition; 1534 1535 struct exec_list then_list; /** < list of nir_cf_node */ 1536 struct exec_list else_list; /** < list of nir_cf_node */ 1537 } nir_if; 1538 1539 typedef struct { 1540 nir_if *nif; 1541 1542 nir_instr *conditional_instr; 1543 1544 nir_block *break_block; 1545 nir_block *continue_from_block; 1546 1547 bool continue_from_then; 1548 1549 struct list_head loop_terminator_link; 1550 } nir_loop_terminator; 1551 1552 typedef struct { 1553 /* Number of instructions in the loop */ 1554 unsigned num_instructions; 1555 1556 /* How many times the loop is run (if known) */ 1557 unsigned trip_count; 1558 bool is_trip_count_known; 1559 1560 /* Unroll the loop regardless of its size */ 1561 bool force_unroll; 1562 1563 nir_loop_terminator *limiting_terminator; 1564 1565 /* A list of loop_terminators terminating this loop. */ 1566 struct list_head loop_terminator_list; 1567 } nir_loop_info; 1568 1569 typedef struct { 1570 nir_cf_node cf_node; 1571 1572 struct exec_list body; /** < list of nir_cf_node */ 1573 1574 nir_loop_info *info; 1575 } nir_loop; 1576 1577 /** 1578 * Various bits of metadata that can may be created or required by 1579 * optimization and analysis passes 1580 */ 1581 typedef enum { 1582 nir_metadata_none = 0x0, 1583 nir_metadata_block_index = 0x1, 1584 nir_metadata_dominance = 0x2, 1585 nir_metadata_live_ssa_defs = 0x4, 1586 nir_metadata_not_properly_reset = 0x8, 1587 nir_metadata_loop_analysis = 0x10, 1588 } nir_metadata; 1589 1590 typedef struct { 1591 nir_cf_node cf_node; 1592 1593 /** pointer to the function of which this is an implementation */ 1594 struct nir_function *function; 1595 1596 struct exec_list body; /** < list of nir_cf_node */ 1597 1598 nir_block *end_block; 1599 1600 /** list for all local variables in the function */ 1601 struct exec_list locals; 1602 1603 /** array of variables used as parameters */ 1604 unsigned num_params; 1605 nir_variable **params; 1606 1607 /** variable used to hold the result of the function */ 1608 nir_variable *return_var; 1609 1610 /** list of local registers in the function */ 1611 struct exec_list registers; 1612 1613 /** next available local register index */ 1614 unsigned reg_alloc; 1615 1616 /** next available SSA value index */ 1617 unsigned ssa_alloc; 1618 1619 /* total number of basic blocks, only valid when block_index_dirty = false */ 1620 unsigned num_blocks; 1621 1622 nir_metadata valid_metadata; 1623 } nir_function_impl; 1624 1625 ATTRIBUTE_RETURNS_NONNULL static inline nir_block * 1626 nir_start_block(nir_function_impl *impl) 1627 { 1628 return (nir_block *) impl->body.head_sentinel.next; 1629 } 1630 1631 ATTRIBUTE_RETURNS_NONNULL static inline nir_block * 1632 nir_impl_last_block(nir_function_impl *impl) 1633 { 1634 return (nir_block *) impl->body.tail_sentinel.prev; 1635 } 1636 1637 static inline nir_cf_node * 1638 nir_cf_node_next(nir_cf_node *node) 1639 { 1640 struct exec_node *next = exec_node_get_next(&node->node); 1641 if (exec_node_is_tail_sentinel(next)) 1642 return NULL; 1643 else 1644 return exec_node_data(nir_cf_node, next, node); 1645 } 1646 1647 static inline nir_cf_node * 1648 nir_cf_node_prev(nir_cf_node *node) 1649 { 1650 struct exec_node *prev = exec_node_get_prev(&node->node); 1651 if (exec_node_is_head_sentinel(prev)) 1652 return NULL; 1653 else 1654 return exec_node_data(nir_cf_node, prev, node); 1655 } 1656 1657 static inline bool 1658 nir_cf_node_is_first(const nir_cf_node *node) 1659 { 1660 return exec_node_is_head_sentinel(node->node.prev); 1661 } 1662 1663 static inline bool 1664 nir_cf_node_is_last(const nir_cf_node *node) 1665 { 1666 return exec_node_is_tail_sentinel(node->node.next); 1667 } 1668 1669 NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node, 1670 type, nir_cf_node_block) 1671 NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node, 1672 type, nir_cf_node_if) 1673 NIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node, 1674 type, nir_cf_node_loop) 1675 NIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node, 1676 nir_function_impl, cf_node, type, nir_cf_node_function) 1677 1678 static inline nir_block * 1679 nir_if_first_then_block(nir_if *if_stmt) 1680 { 1681 struct exec_node *head = exec_list_get_head(&if_stmt->then_list); 1682 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 1683 } 1684 1685 static inline nir_block * 1686 nir_if_last_then_block(nir_if *if_stmt) 1687 { 1688 struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list); 1689 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 1690 } 1691 1692 static inline nir_block * 1693 nir_if_first_else_block(nir_if *if_stmt) 1694 { 1695 struct exec_node *head = exec_list_get_head(&if_stmt->else_list); 1696 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 1697 } 1698 1699 static inline nir_block * 1700 nir_if_last_else_block(nir_if *if_stmt) 1701 { 1702 struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list); 1703 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 1704 } 1705 1706 static inline nir_block * 1707 nir_loop_first_block(nir_loop *loop) 1708 { 1709 struct exec_node *head = exec_list_get_head(&loop->body); 1710 return nir_cf_node_as_block(exec_node_data(nir_cf_node, head, node)); 1711 } 1712 1713 static inline nir_block * 1714 nir_loop_last_block(nir_loop *loop) 1715 { 1716 struct exec_node *tail = exec_list_get_tail(&loop->body); 1717 return nir_cf_node_as_block(exec_node_data(nir_cf_node, tail, node)); 1718 } 1719 1720 typedef enum { 1721 nir_parameter_in, 1722 nir_parameter_out, 1723 nir_parameter_inout, 1724 } nir_parameter_type; 1725 1726 typedef struct { 1727 nir_parameter_type param_type; 1728 const struct glsl_type *type; 1729 } nir_parameter; 1730 1731 typedef struct nir_function { 1732 struct exec_node node; 1733 1734 const char *name; 1735 struct nir_shader *shader; 1736 1737 unsigned num_params; 1738 nir_parameter *params; 1739 const struct glsl_type *return_type; 1740 1741 /** The implementation of this function. 1742 * 1743 * If the function is only declared and not implemented, this is NULL. 1744 */ 1745 nir_function_impl *impl; 1746 } nir_function; 1747 1748 typedef struct nir_shader_compiler_options { 1749 bool lower_fdiv; 1750 bool lower_ffma; 1751 bool fuse_ffma; 1752 bool lower_flrp32; 1753 /** Lowers flrp when it does not support doubles */ 1754 bool lower_flrp64; 1755 bool lower_fpow; 1756 bool lower_fsat; 1757 bool lower_fsqrt; 1758 bool lower_fmod32; 1759 bool lower_fmod64; 1760 bool lower_bitfield_extract; 1761 bool lower_bitfield_insert; 1762 bool lower_uadd_carry; 1763 bool lower_usub_borrow; 1764 /** lowers fneg and ineg to fsub and isub. */ 1765 bool lower_negate; 1766 /** lowers fsub and isub to fadd+fneg and iadd+ineg. */ 1767 bool lower_sub; 1768 1769 /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */ 1770 bool lower_scmp; 1771 1772 /** enables rules to lower idiv by power-of-two: */ 1773 bool lower_idiv; 1774 1775 /* Does the native fdot instruction replicate its result for four 1776 * components? If so, then opt_algebraic_late will turn all fdotN 1777 * instructions into fdot_replicatedN instructions. 1778 */ 1779 bool fdot_replicates; 1780 1781 /** lowers ffract to fsub+ffloor: */ 1782 bool lower_ffract; 1783 1784 bool lower_pack_half_2x16; 1785 bool lower_pack_unorm_2x16; 1786 bool lower_pack_snorm_2x16; 1787 bool lower_pack_unorm_4x8; 1788 bool lower_pack_snorm_4x8; 1789 bool lower_unpack_half_2x16; 1790 bool lower_unpack_unorm_2x16; 1791 bool lower_unpack_snorm_2x16; 1792 bool lower_unpack_unorm_4x8; 1793 bool lower_unpack_snorm_4x8; 1794 1795 bool lower_extract_byte; 1796 bool lower_extract_word; 1797 1798 /** 1799 * Does the driver support real 32-bit integers? (Otherwise, integers 1800 * are simulated by floats.) 1801 */ 1802 bool native_integers; 1803 1804 /* Indicates that the driver only has zero-based vertex id */ 1805 bool vertex_id_zero_based; 1806 1807 bool lower_cs_local_index_from_id; 1808 1809 /** 1810 * Should nir_lower_io() create load_interpolated_input intrinsics? 1811 * 1812 * If not, it generates regular load_input intrinsics and interpolation 1813 * information must be inferred from the list of input nir_variables. 1814 */ 1815 bool use_interpolated_input_intrinsics; 1816 1817 unsigned max_unroll_iterations; 1818 } nir_shader_compiler_options; 1819 1820 typedef struct nir_shader { 1821 /** list of uniforms (nir_variable) */ 1822 struct exec_list uniforms; 1823 1824 /** list of inputs (nir_variable) */ 1825 struct exec_list inputs; 1826 1827 /** list of outputs (nir_variable) */ 1828 struct exec_list outputs; 1829 1830 /** list of shared compute variables (nir_variable) */ 1831 struct exec_list shared; 1832 1833 /** Set of driver-specific options for the shader. 1834 * 1835 * The memory for the options is expected to be kept in a single static 1836 * copy by the driver. 1837 */ 1838 const struct nir_shader_compiler_options *options; 1839 1840 /** Various bits of compile-time information about a given shader */ 1841 struct shader_info *info; 1842 1843 /** list of global variables in the shader (nir_variable) */ 1844 struct exec_list globals; 1845 1846 /** list of system value variables in the shader (nir_variable) */ 1847 struct exec_list system_values; 1848 1849 struct exec_list functions; /** < list of nir_function */ 1850 1851 /** list of global register in the shader */ 1852 struct exec_list registers; 1853 1854 /** next available global register index */ 1855 unsigned reg_alloc; 1856 1857 /** 1858 * the highest index a load_input_*, load_uniform_*, etc. intrinsic can 1859 * access plus one 1860 */ 1861 unsigned num_inputs, num_uniforms, num_outputs, num_shared; 1862 1863 /** The shader stage, such as MESA_SHADER_VERTEX. */ 1864 gl_shader_stage stage; 1865 } nir_shader; 1866 1867 static inline nir_function_impl * 1868 nir_shader_get_entrypoint(nir_shader *shader) 1869 { 1870 assert(exec_list_length(&shader->functions) == 1); 1871 struct exec_node *func_node = exec_list_get_head(&shader->functions); 1872 nir_function *func = exec_node_data(nir_function, func_node, node); 1873 assert(func->return_type == glsl_void_type()); 1874 assert(func->num_params == 0); 1875 assert(func->impl); 1876 return func->impl; 1877 } 1878 1879 #define nir_foreach_function(func, shader) \ 1880 foreach_list_typed(nir_function, func, node, &(shader)->functions) 1881 1882 nir_shader *nir_shader_create(void *mem_ctx, 1883 gl_shader_stage stage, 1884 const nir_shader_compiler_options *options, 1885 shader_info *si); 1886 1887 /** creates a register, including assigning it an index and adding it to the list */ 1888 nir_register *nir_global_reg_create(nir_shader *shader); 1889 1890 nir_register *nir_local_reg_create(nir_function_impl *impl); 1891 1892 void nir_reg_remove(nir_register *reg); 1893 1894 /** Adds a variable to the appropreate list in nir_shader */ 1895 void nir_shader_add_variable(nir_shader *shader, nir_variable *var); 1896 1897 static inline void 1898 nir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var) 1899 { 1900 assert(var->data.mode == nir_var_local); 1901 exec_list_push_tail(&impl->locals, &var->node); 1902 } 1903 1904 /** creates a variable, sets a few defaults, and adds it to the list */ 1905 nir_variable *nir_variable_create(nir_shader *shader, 1906 nir_variable_mode mode, 1907 const struct glsl_type *type, 1908 const char *name); 1909 /** creates a local variable and adds it to the list */ 1910 nir_variable *nir_local_variable_create(nir_function_impl *impl, 1911 const struct glsl_type *type, 1912 const char *name); 1913 1914 /** creates a function and adds it to the shader's list of functions */ 1915 nir_function *nir_function_create(nir_shader *shader, const char *name); 1916 1917 nir_function_impl *nir_function_impl_create(nir_function *func); 1918 /** creates a function_impl that isn't tied to any particular function */ 1919 nir_function_impl *nir_function_impl_create_bare(nir_shader *shader); 1920 1921 nir_block *nir_block_create(nir_shader *shader); 1922 nir_if *nir_if_create(nir_shader *shader); 1923 nir_loop *nir_loop_create(nir_shader *shader); 1924 1925 nir_function_impl *nir_cf_node_get_function(nir_cf_node *node); 1926 1927 /** requests that the given pieces of metadata be generated */ 1928 void nir_metadata_require(nir_function_impl *impl, nir_metadata required, ...); 1929 /** dirties all but the preserved metadata */ 1930 void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved); 1931 1932 /** creates an instruction with default swizzle/writemask/etc. with NULL registers */ 1933 nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op); 1934 1935 nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type); 1936 1937 nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader, 1938 unsigned num_components, 1939 unsigned bit_size); 1940 1941 nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader, 1942 nir_intrinsic_op op); 1943 1944 nir_call_instr *nir_call_instr_create(nir_shader *shader, 1945 nir_function *callee); 1946 1947 nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs); 1948 1949 nir_phi_instr *nir_phi_instr_create(nir_shader *shader); 1950 1951 nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader); 1952 1953 nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader, 1954 unsigned num_components, 1955 unsigned bit_size); 1956 1957 nir_deref_var *nir_deref_var_create(void *mem_ctx, nir_variable *var); 1958 nir_deref_array *nir_deref_array_create(void *mem_ctx); 1959 nir_deref_struct *nir_deref_struct_create(void *mem_ctx, unsigned field_index); 1960 1961 typedef bool (*nir_deref_foreach_leaf_cb)(nir_deref_var *deref, void *state); 1962 bool nir_deref_foreach_leaf(nir_deref_var *deref, 1963 nir_deref_foreach_leaf_cb cb, void *state); 1964 1965 nir_load_const_instr * 1966 nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref); 1967 1968 /** 1969 * NIR Cursors and Instruction Insertion API 1970 * @{ 1971 * 1972 * A tiny struct representing a point to insert/extract instructions or 1973 * control flow nodes. Helps reduce the combinatorial explosion of possible 1974 * points to insert/extract. 1975 * 1976 * \sa nir_control_flow.h 1977 */ 1978 typedef enum { 1979 nir_cursor_before_block, 1980 nir_cursor_after_block, 1981 nir_cursor_before_instr, 1982 nir_cursor_after_instr, 1983 } nir_cursor_option; 1984 1985 typedef struct { 1986 nir_cursor_option option; 1987 union { 1988 nir_block *block; 1989 nir_instr *instr; 1990 }; 1991 } nir_cursor; 1992 1993 static inline nir_block * 1994 nir_cursor_current_block(nir_cursor cursor) 1995 { 1996 if (cursor.option == nir_cursor_before_instr || 1997 cursor.option == nir_cursor_after_instr) { 1998 return cursor.instr->block; 1999 } else { 2000 return cursor.block; 2001 } 2002 } 2003 2004 bool nir_cursors_equal(nir_cursor a, nir_cursor b); 2005 2006 static inline nir_cursor 2007 nir_before_block(nir_block *block) 2008 { 2009 nir_cursor cursor; 2010 cursor.option = nir_cursor_before_block; 2011 cursor.block = block; 2012 return cursor; 2013 } 2014 2015 static inline nir_cursor 2016 nir_after_block(nir_block *block) 2017 { 2018 nir_cursor cursor; 2019 cursor.option = nir_cursor_after_block; 2020 cursor.block = block; 2021 return cursor; 2022 } 2023 2024 static inline nir_cursor 2025 nir_before_instr(nir_instr *instr) 2026 { 2027 nir_cursor cursor; 2028 cursor.option = nir_cursor_before_instr; 2029 cursor.instr = instr; 2030 return cursor; 2031 } 2032 2033 static inline nir_cursor 2034 nir_after_instr(nir_instr *instr) 2035 { 2036 nir_cursor cursor; 2037 cursor.option = nir_cursor_after_instr; 2038 cursor.instr = instr; 2039 return cursor; 2040 } 2041 2042 static inline nir_cursor 2043 nir_after_block_before_jump(nir_block *block) 2044 { 2045 nir_instr *last_instr = nir_block_last_instr(block); 2046 if (last_instr && last_instr->type == nir_instr_type_jump) { 2047 return nir_before_instr(last_instr); 2048 } else { 2049 return nir_after_block(block); 2050 } 2051 } 2052 2053 static inline nir_cursor 2054 nir_before_cf_node(nir_cf_node *node) 2055 { 2056 if (node->type == nir_cf_node_block) 2057 return nir_before_block(nir_cf_node_as_block(node)); 2058 2059 return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node))); 2060 } 2061 2062 static inline nir_cursor 2063 nir_after_cf_node(nir_cf_node *node) 2064 { 2065 if (node->type == nir_cf_node_block) 2066 return nir_after_block(nir_cf_node_as_block(node)); 2067 2068 return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node))); 2069 } 2070 2071 static inline nir_cursor 2072 nir_after_phis(nir_block *block) 2073 { 2074 nir_foreach_instr(instr, block) { 2075 if (instr->type != nir_instr_type_phi) 2076 return nir_before_instr(instr); 2077 } 2078 return nir_after_block(block); 2079 } 2080 2081 static inline nir_cursor 2082 nir_after_cf_node_and_phis(nir_cf_node *node) 2083 { 2084 if (node->type == nir_cf_node_block) 2085 return nir_after_block(nir_cf_node_as_block(node)); 2086 2087 nir_block *block = nir_cf_node_as_block(nir_cf_node_next(node)); 2088 2089 return nir_after_phis(block); 2090 } 2091 2092 static inline nir_cursor 2093 nir_before_cf_list(struct exec_list *cf_list) 2094 { 2095 nir_cf_node *first_node = exec_node_data(nir_cf_node, 2096 exec_list_get_head(cf_list), node); 2097 return nir_before_cf_node(first_node); 2098 } 2099 2100 static inline nir_cursor 2101 nir_after_cf_list(struct exec_list *cf_list) 2102 { 2103 nir_cf_node *last_node = exec_node_data(nir_cf_node, 2104 exec_list_get_tail(cf_list), node); 2105 return nir_after_cf_node(last_node); 2106 } 2107 2108 /** 2109 * Insert a NIR instruction at the given cursor. 2110 * 2111 * Note: This does not update the cursor. 2112 */ 2113 void nir_instr_insert(nir_cursor cursor, nir_instr *instr); 2114 2115 static inline void 2116 nir_instr_insert_before(nir_instr *instr, nir_instr *before) 2117 { 2118 nir_instr_insert(nir_before_instr(instr), before); 2119 } 2120 2121 static inline void 2122 nir_instr_insert_after(nir_instr *instr, nir_instr *after) 2123 { 2124 nir_instr_insert(nir_after_instr(instr), after); 2125 } 2126 2127 static inline void 2128 nir_instr_insert_before_block(nir_block *block, nir_instr *before) 2129 { 2130 nir_instr_insert(nir_before_block(block), before); 2131 } 2132 2133 static inline void 2134 nir_instr_insert_after_block(nir_block *block, nir_instr *after) 2135 { 2136 nir_instr_insert(nir_after_block(block), after); 2137 } 2138 2139 static inline void 2140 nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before) 2141 { 2142 nir_instr_insert(nir_before_cf_node(node), before); 2143 } 2144 2145 static inline void 2146 nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after) 2147 { 2148 nir_instr_insert(nir_after_cf_node(node), after); 2149 } 2150 2151 static inline void 2152 nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before) 2153 { 2154 nir_instr_insert(nir_before_cf_list(list), before); 2155 } 2156 2157 static inline void 2158 nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after) 2159 { 2160 nir_instr_insert(nir_after_cf_list(list), after); 2161 } 2162 2163 void nir_instr_remove(nir_instr *instr); 2164 2165 /** @} */ 2166 2167 typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state); 2168 typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state); 2169 typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state); 2170 bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, 2171 void *state); 2172 bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state); 2173 bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state); 2174 2175 nir_const_value *nir_src_as_const_value(nir_src src); 2176 bool nir_src_is_dynamically_uniform(nir_src src); 2177 bool nir_srcs_equal(nir_src src1, nir_src src2); 2178 void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src); 2179 void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src); 2180 void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src); 2181 void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, 2182 nir_dest new_dest); 2183 2184 void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, 2185 unsigned num_components, unsigned bit_size, 2186 const char *name); 2187 void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, 2188 unsigned num_components, unsigned bit_size, 2189 const char *name); 2190 void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src); 2191 void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src, 2192 nir_instr *after_me); 2193 2194 uint8_t nir_ssa_def_components_read(nir_ssa_def *def); 2195 2196 /* 2197 * finds the next basic block in source-code order, returns NULL if there is 2198 * none 2199 */ 2200 2201 nir_block *nir_block_cf_tree_next(nir_block *block); 2202 2203 /* Performs the opposite of nir_block_cf_tree_next() */ 2204 2205 nir_block *nir_block_cf_tree_prev(nir_block *block); 2206 2207 /* Gets the first block in a CF node in source-code order */ 2208 2209 nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node); 2210 2211 /* Gets the last block in a CF node in source-code order */ 2212 2213 nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node); 2214 2215 /* Gets the next block after a CF node in source-code order */ 2216 2217 nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node); 2218 2219 /* Macros for loops that visit blocks in source-code order */ 2220 2221 #define nir_foreach_block(block, impl) \ 2222 for (nir_block *block = nir_start_block(impl); block != NULL; \ 2223 block = nir_block_cf_tree_next(block)) 2224 2225 #define nir_foreach_block_safe(block, impl) \ 2226 for (nir_block *block = nir_start_block(impl), \ 2227 *next = nir_block_cf_tree_next(block); \ 2228 block != NULL; \ 2229 block = next, next = nir_block_cf_tree_next(block)) 2230 2231 #define nir_foreach_block_reverse(block, impl) \ 2232 for (nir_block *block = nir_impl_last_block(impl); block != NULL; \ 2233 block = nir_block_cf_tree_prev(block)) 2234 2235 #define nir_foreach_block_reverse_safe(block, impl) \ 2236 for (nir_block *block = nir_impl_last_block(impl), \ 2237 *prev = nir_block_cf_tree_prev(block); \ 2238 block != NULL; \ 2239 block = prev, prev = nir_block_cf_tree_prev(block)) 2240 2241 #define nir_foreach_block_in_cf_node(block, node) \ 2242 for (nir_block *block = nir_cf_node_cf_tree_first(node); \ 2243 block != nir_cf_node_cf_tree_next(node); \ 2244 block = nir_block_cf_tree_next(block)) 2245 2246 /* If the following CF node is an if, this function returns that if. 2247 * Otherwise, it returns NULL. 2248 */ 2249 nir_if *nir_block_get_following_if(nir_block *block); 2250 2251 nir_loop *nir_block_get_following_loop(nir_block *block); 2252 2253 void nir_index_local_regs(nir_function_impl *impl); 2254 void nir_index_global_regs(nir_shader *shader); 2255 void nir_index_ssa_defs(nir_function_impl *impl); 2256 unsigned nir_index_instrs(nir_function_impl *impl); 2257 2258 void nir_index_blocks(nir_function_impl *impl); 2259 2260 void nir_print_shader(nir_shader *shader, FILE *fp); 2261 void nir_print_shader_annotated(nir_shader *shader, FILE *fp, struct hash_table *errors); 2262 void nir_print_instr(const nir_instr *instr, FILE *fp); 2263 2264 nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s); 2265 nir_function_impl *nir_function_impl_clone(const nir_function_impl *fi); 2266 nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var); 2267 nir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader); 2268 nir_deref *nir_deref_clone(const nir_deref *deref, void *mem_ctx); 2269 nir_deref_var *nir_deref_var_clone(const nir_deref_var *deref, void *mem_ctx); 2270 2271 #ifdef DEBUG 2272 void nir_validate_shader(nir_shader *shader); 2273 void nir_metadata_set_validation_flag(nir_shader *shader); 2274 void nir_metadata_check_validation_flag(nir_shader *shader); 2275 2276 #include "util/debug.h" 2277 static inline bool 2278 should_clone_nir(void) 2279 { 2280 static int should_clone = -1; 2281 if (should_clone < 0) 2282 should_clone = env_var_as_boolean("NIR_TEST_CLONE", false); 2283 2284 return should_clone; 2285 } 2286 #else 2287 static inline void nir_validate_shader(nir_shader *shader) { (void) shader; } 2288 static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; } 2289 static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; } 2290 static inline bool should_clone_nir(void) { return false; } 2291 #endif /* DEBUG */ 2292 2293 #define _PASS(nir, do_pass) do { \ 2294 do_pass \ 2295 nir_validate_shader(nir); \ 2296 if (should_clone_nir()) { \ 2297 nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \ 2298 ralloc_free(nir); \ 2299 nir = clone; \ 2300 } \ 2301 } while (0) 2302 2303 #define NIR_PASS(progress, nir, pass, ...) _PASS(nir, \ 2304 nir_metadata_set_validation_flag(nir); \ 2305 if (pass(nir, ##__VA_ARGS__)) { \ 2306 progress = true; \ 2307 nir_metadata_check_validation_flag(nir); \ 2308 } \ 2309 ) 2310 2311 #define NIR_PASS_V(nir, pass, ...) _PASS(nir, \ 2312 pass(nir, ##__VA_ARGS__); \ 2313 ) 2314 2315 void nir_calc_dominance_impl(nir_function_impl *impl); 2316 void nir_calc_dominance(nir_shader *shader); 2317 2318 nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2); 2319 bool nir_block_dominates(nir_block *parent, nir_block *child); 2320 2321 void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp); 2322 void nir_dump_dom_tree(nir_shader *shader, FILE *fp); 2323 2324 void nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp); 2325 void nir_dump_dom_frontier(nir_shader *shader, FILE *fp); 2326 2327 void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp); 2328 void nir_dump_cfg(nir_shader *shader, FILE *fp); 2329 2330 int nir_gs_count_vertices(const nir_shader *shader); 2331 2332 bool nir_split_var_copies(nir_shader *shader); 2333 2334 bool nir_lower_returns_impl(nir_function_impl *impl); 2335 bool nir_lower_returns(nir_shader *shader); 2336 2337 bool nir_inline_functions(nir_shader *shader); 2338 2339 bool nir_propagate_invariant(nir_shader *shader); 2340 2341 void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, nir_shader *shader); 2342 void nir_lower_var_copies(nir_shader *shader); 2343 2344 bool nir_lower_global_vars_to_local(nir_shader *shader); 2345 2346 bool nir_lower_indirect_derefs(nir_shader *shader, nir_variable_mode modes); 2347 2348 bool nir_lower_locals_to_regs(nir_shader *shader); 2349 2350 void nir_lower_io_to_temporaries(nir_shader *shader, 2351 nir_function_impl *entrypoint, 2352 bool outputs, bool inputs); 2353 2354 void nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint); 2355 2356 void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, 2357 int (*type_size)(const struct glsl_type *)); 2358 2359 typedef enum { 2360 /* If set, this forces all non-flat fragment shader inputs to be 2361 * interpolated as if with the "sample" qualifier. This requires 2362 * nir_shader_compiler_options::use_interpolated_input_intrinsics. 2363 */ 2364 nir_lower_io_force_sample_interpolation = (1 << 1), 2365 } nir_lower_io_options; 2366 void nir_lower_io(nir_shader *shader, 2367 nir_variable_mode modes, 2368 int (*type_size)(const struct glsl_type *), 2369 nir_lower_io_options); 2370 nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr); 2371 nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr); 2372 2373 bool nir_is_per_vertex_io(nir_variable *var, gl_shader_stage stage); 2374 2375 void nir_lower_io_types(nir_shader *shader); 2376 void nir_lower_regs_to_ssa_impl(nir_function_impl *impl); 2377 void nir_lower_regs_to_ssa(nir_shader *shader); 2378 void nir_lower_vars_to_ssa(nir_shader *shader); 2379 2380 bool nir_remove_dead_variables(nir_shader *shader, nir_variable_mode modes); 2381 bool nir_lower_constant_initializers(nir_shader *shader, 2382 nir_variable_mode modes); 2383 2384 void nir_move_vec_src_uses_to_dest(nir_shader *shader); 2385 bool nir_lower_vec_to_movs(nir_shader *shader); 2386 bool nir_lower_alu_to_scalar(nir_shader *shader); 2387 void nir_lower_load_const_to_scalar(nir_shader *shader); 2388 2389 bool nir_lower_phis_to_scalar(nir_shader *shader); 2390 void nir_lower_io_to_scalar(nir_shader *shader, nir_variable_mode mask); 2391 2392 void nir_lower_samplers(nir_shader *shader, 2393 const struct gl_shader_program *shader_program); 2394 2395 bool nir_lower_system_values(nir_shader *shader); 2396 2397 typedef struct nir_lower_tex_options { 2398 /** 2399 * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which 2400 * sampler types a texture projector is lowered. 2401 */ 2402 unsigned lower_txp; 2403 2404 /** 2405 * If true, lower away nir_tex_src_offset for all texelfetch instructions. 2406 */ 2407 bool lower_txf_offset; 2408 2409 /** 2410 * If true, lower away nir_tex_src_offset for all rect textures. 2411 */ 2412 bool lower_rect_offset; 2413 2414 /** 2415 * If true, lower rect textures to 2D, using txs to fetch the 2416 * texture dimensions and dividing the texture coords by the 2417 * texture dims to normalize. 2418 */ 2419 bool lower_rect; 2420 2421 /** 2422 * If true, convert yuv to rgb. 2423 */ 2424 unsigned lower_y_uv_external; 2425 unsigned lower_y_u_v_external; 2426 unsigned lower_yx_xuxv_external; 2427 2428 /** 2429 * To emulate certain texture wrap modes, this can be used 2430 * to saturate the specified tex coord to [0.0, 1.0]. The 2431 * bits are according to sampler #, ie. if, for example: 2432 * 2433 * (conf->saturate_s & (1 << n)) 2434 * 2435 * is true, then the s coord for sampler n is saturated. 2436 * 2437 * Note that clamping must happen *after* projector lowering 2438 * so any projected texture sample instruction with a clamped 2439 * coordinate gets automatically lowered, regardless of the 2440 * 'lower_txp' setting. 2441 */ 2442 unsigned saturate_s; 2443 unsigned saturate_t; 2444 unsigned saturate_r; 2445 2446 /* Bitmask of textures that need swizzling. 2447 * 2448 * If (swizzle_result & (1 << texture_index)), then the swizzle in 2449 * swizzles[texture_index] is applied to the result of the texturing 2450 * operation. 2451 */ 2452 unsigned swizzle_result; 2453 2454 /* A swizzle for each texture. Values 0-3 represent x, y, z, or w swizzles 2455 * while 4 and 5 represent 0 and 1 respectively. 2456 */ 2457 uint8_t swizzles[32][4]; 2458 2459 /** 2460 * Bitmap of textures that need srgb to linear conversion. If 2461 * (lower_srgb & (1 << texture_index)) then the rgb (xyz) components 2462 * of the texture are lowered to linear. 2463 */ 2464 unsigned lower_srgb; 2465 2466 /** 2467 * If true, lower nir_texop_txd on cube maps with nir_texop_txl. 2468 */ 2469 bool lower_txd_cube_map; 2470 2471 /** 2472 * If true, lower nir_texop_txd on shadow samplers (except cube maps) 2473 * with nir_texop_txl. Notice that cube map shadow samplers are lowered 2474 * with lower_txd_cube_map. 2475 */ 2476 bool lower_txd_shadow; 2477 } nir_lower_tex_options; 2478 2479 bool nir_lower_tex(nir_shader *shader, 2480 const nir_lower_tex_options *options); 2481 2482 bool nir_lower_idiv(nir_shader *shader); 2483 2484 void nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables); 2485 void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables); 2486 void nir_lower_clip_cull_distance_arrays(nir_shader *nir); 2487 2488 void nir_lower_two_sided_color(nir_shader *shader); 2489 2490 void nir_lower_clamp_color_outputs(nir_shader *shader); 2491 2492 void nir_lower_passthrough_edgeflags(nir_shader *shader); 2493 void nir_lower_tes_patch_vertices(nir_shader *tes, unsigned patch_vertices); 2494 2495 typedef struct nir_lower_wpos_ytransform_options { 2496 int state_tokens[5]; 2497 bool fs_coord_origin_upper_left :1; 2498 bool fs_coord_origin_lower_left :1; 2499 bool fs_coord_pixel_center_integer :1; 2500 bool fs_coord_pixel_center_half_integer :1; 2501 } nir_lower_wpos_ytransform_options; 2502 2503 bool nir_lower_wpos_ytransform(nir_shader *shader, 2504 const nir_lower_wpos_ytransform_options *options); 2505 bool nir_lower_wpos_center(nir_shader *shader); 2506 2507 typedef struct nir_lower_drawpixels_options { 2508 int texcoord_state_tokens[5]; 2509 int scale_state_tokens[5]; 2510 int bias_state_tokens[5]; 2511 unsigned drawpix_sampler; 2512 unsigned pixelmap_sampler; 2513 bool pixel_maps :1; 2514 bool scale_and_bias :1; 2515 } nir_lower_drawpixels_options; 2516 2517 void nir_lower_drawpixels(nir_shader *shader, 2518 const nir_lower_drawpixels_options *options); 2519 2520 typedef struct nir_lower_bitmap_options { 2521 unsigned sampler; 2522 bool swizzle_xxxx; 2523 } nir_lower_bitmap_options; 2524 2525 void nir_lower_bitmap(nir_shader *shader, const nir_lower_bitmap_options *options); 2526 2527 void nir_lower_atomics(nir_shader *shader, 2528 const struct gl_shader_program *shader_program); 2529 void nir_lower_to_source_mods(nir_shader *shader); 2530 2531 bool nir_lower_gs_intrinsics(nir_shader *shader); 2532 2533 typedef enum { 2534 nir_lower_drcp = (1 << 0), 2535 nir_lower_dsqrt = (1 << 1), 2536 nir_lower_drsq = (1 << 2), 2537 nir_lower_dtrunc = (1 << 3), 2538 nir_lower_dfloor = (1 << 4), 2539 nir_lower_dceil = (1 << 5), 2540 nir_lower_dfract = (1 << 6), 2541 nir_lower_dround_even = (1 << 7), 2542 nir_lower_dmod = (1 << 8) 2543 } nir_lower_doubles_options; 2544 2545 void nir_lower_doubles(nir_shader *shader, nir_lower_doubles_options options); 2546 void nir_lower_double_pack(nir_shader *shader); 2547 2548 bool nir_normalize_cubemap_coords(nir_shader *shader); 2549 2550 void nir_live_ssa_defs_impl(nir_function_impl *impl); 2551 2552 void nir_loop_analyze_impl(nir_function_impl *impl, 2553 nir_variable_mode indirect_mask); 2554 2555 bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b); 2556 2557 bool nir_repair_ssa_impl(nir_function_impl *impl); 2558 bool nir_repair_ssa(nir_shader *shader); 2559 2560 void nir_convert_loop_to_lcssa(nir_loop *loop); 2561 2562 /* If phi_webs_only is true, only convert SSA values involved in phi nodes to 2563 * registers. If false, convert all values (even those not involved in a phi 2564 * node) to registers. 2565 */ 2566 void nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only); 2567 2568 bool nir_lower_phis_to_regs_block(nir_block *block); 2569 bool nir_lower_ssa_defs_to_regs_block(nir_block *block); 2570 2571 bool nir_opt_algebraic(nir_shader *shader); 2572 bool nir_opt_algebraic_late(nir_shader *shader); 2573 bool nir_opt_constant_folding(nir_shader *shader); 2574 2575 bool nir_opt_global_to_local(nir_shader *shader); 2576 2577 bool nir_copy_prop(nir_shader *shader); 2578 2579 bool nir_opt_copy_prop_vars(nir_shader *shader); 2580 2581 bool nir_opt_cse(nir_shader *shader); 2582 2583 bool nir_opt_dce(nir_shader *shader); 2584 2585 bool nir_opt_dead_cf(nir_shader *shader); 2586 2587 bool nir_opt_gcm(nir_shader *shader, bool value_number); 2588 2589 bool nir_opt_if(nir_shader *shader); 2590 2591 bool nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask); 2592 2593 bool nir_opt_move_comparisons(nir_shader *shader); 2594 2595 bool nir_opt_peephole_select(nir_shader *shader, unsigned limit); 2596 2597 bool nir_opt_remove_phis(nir_shader *shader); 2598 2599 bool nir_opt_trivial_continues(nir_shader *shader); 2600 2601 bool nir_opt_undef(nir_shader *shader); 2602 2603 bool nir_opt_conditional_discard(nir_shader *shader); 2604 2605 void nir_sweep(nir_shader *shader); 2606 2607 nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val); 2608 gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin); 2609 2610 #ifdef __cplusplus 2611 } /* extern "C" */ 2612 #endif 2613