1 /* 2 * Copyright 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Connor Abbott (cwabbott0 (at) gmail.com) 25 * 26 */ 27 28 #include "glsl_to_nir.h" 29 #include "ir_visitor.h" 30 #include "ir_hierarchical_visitor.h" 31 #include "ir.h" 32 #include "compiler/nir/nir_control_flow.h" 33 #include "compiler/nir/nir_builder.h" 34 #include "main/imports.h" 35 36 /* 37 * pass to lower GLSL IR to NIR 38 * 39 * This will lower variable dereferences to loads/stores of corresponding 40 * variables in NIR - the variables will be converted to registers in a later 41 * pass. 42 */ 43 44 namespace { 45 46 class nir_visitor : public ir_visitor 47 { 48 public: 49 nir_visitor(nir_shader *shader); 50 ~nir_visitor(); 51 52 virtual void visit(ir_variable *); 53 virtual void visit(ir_function *); 54 virtual void visit(ir_function_signature *); 55 virtual void visit(ir_loop *); 56 virtual void visit(ir_if *); 57 virtual void visit(ir_discard *); 58 virtual void visit(ir_loop_jump *); 59 virtual void visit(ir_return *); 60 virtual void visit(ir_call *); 61 virtual void visit(ir_assignment *); 62 virtual void visit(ir_emit_vertex *); 63 virtual void visit(ir_end_primitive *); 64 virtual void visit(ir_expression *); 65 virtual void visit(ir_swizzle *); 66 virtual void visit(ir_texture *); 67 virtual void visit(ir_constant *); 68 virtual void visit(ir_dereference_variable *); 69 virtual void visit(ir_dereference_record *); 70 virtual void visit(ir_dereference_array *); 71 virtual void visit(ir_barrier *); 72 73 void create_function(ir_function_signature *ir); 74 75 private: 76 void add_instr(nir_instr *instr, unsigned num_components, unsigned bit_size); 77 nir_ssa_def *evaluate_rvalue(ir_rvalue *ir); 78 79 nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def **srcs); 80 nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1); 81 nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1, 82 nir_ssa_def *src2); 83 nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1, 84 nir_ssa_def *src2, nir_ssa_def *src3); 85 86 bool supports_ints; 87 88 nir_shader *shader; 89 nir_function_impl *impl; 90 nir_builder b; 91 nir_ssa_def *result; /* result of the expression tree last visited */ 92 93 nir_deref_var *evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir); 94 95 /* the head of the dereference chain we're creating */ 96 nir_deref_var *deref_head; 97 /* the tail of the dereference chain we're creating */ 98 nir_deref *deref_tail; 99 100 nir_variable *var; /* variable created by ir_variable visitor */ 101 102 /* whether the IR we're operating on is per-function or global */ 103 bool is_global; 104 105 /* map of ir_variable -> nir_variable */ 106 struct hash_table *var_table; 107 108 /* map of ir_function_signature -> nir_function_overload */ 109 struct hash_table *overload_table; 110 }; 111 112 /* 113 * This visitor runs before the main visitor, calling create_function() for 114 * each function so that the main visitor can resolve forward references in 115 * calls. 116 */ 117 118 class nir_function_visitor : public ir_hierarchical_visitor 119 { 120 public: 121 nir_function_visitor(nir_visitor *v) : visitor(v) 122 { 123 } 124 virtual ir_visitor_status visit_enter(ir_function *); 125 126 private: 127 nir_visitor *visitor; 128 }; 129 130 } /* end of anonymous namespace */ 131 132 static void 133 nir_remap_attributes(nir_shader *shader) 134 { 135 nir_foreach_variable(var, &shader->inputs) { 136 var->data.location += _mesa_bitcount_64(shader->info.double_inputs_read & 137 BITFIELD64_MASK(var->data.location)); 138 } 139 140 /* Once the remap is done, reset double_inputs_read, so later it will have 141 * which location/slots are doubles */ 142 shader->info.double_inputs_read = 0; 143 } 144 145 nir_shader * 146 glsl_to_nir(const struct gl_shader_program *shader_prog, 147 gl_shader_stage stage, 148 const nir_shader_compiler_options *options) 149 { 150 struct gl_linked_shader *sh = shader_prog->_LinkedShaders[stage]; 151 152 nir_shader *shader = nir_shader_create(NULL, stage, options, 153 &sh->Program->info); 154 155 nir_visitor v1(shader); 156 nir_function_visitor v2(&v1); 157 v2.run(sh->ir); 158 visit_exec_list(sh->ir, &v1); 159 160 nir_lower_constant_initializers(shader, (nir_variable_mode)~0); 161 162 /* Remap the locations to slots so those requiring two slots will occupy 163 * two locations. For instance, if we have in the IR code a dvec3 attr0 in 164 * location 0 and vec4 attr1 in location 1, in NIR attr0 will use 165 * locations/slots 0 and 1, and attr1 will use location/slot 2 */ 166 if (shader->info.stage == MESA_SHADER_VERTEX) 167 nir_remap_attributes(shader); 168 169 shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name); 170 if (shader_prog->Label) 171 shader->info.label = ralloc_strdup(shader, shader_prog->Label); 172 173 /* Check for transform feedback varyings specified via the API */ 174 shader->info.has_transform_feedback_varyings = 175 shader_prog->TransformFeedback.NumVarying > 0; 176 177 /* Check for transform feedback varyings specified in the Shader */ 178 if (shader_prog->last_vert_prog) 179 shader->info.has_transform_feedback_varyings |= 180 shader_prog->last_vert_prog->sh.LinkedTransformFeedback->NumVarying > 0; 181 182 return shader; 183 } 184 185 nir_visitor::nir_visitor(nir_shader *shader) 186 { 187 this->supports_ints = shader->options->native_integers; 188 this->shader = shader; 189 this->is_global = true; 190 this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, 191 _mesa_key_pointer_equal); 192 this->overload_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, 193 _mesa_key_pointer_equal); 194 this->result = NULL; 195 this->impl = NULL; 196 this->var = NULL; 197 this->deref_head = NULL; 198 this->deref_tail = NULL; 199 memset(&this->b, 0, sizeof(this->b)); 200 } 201 202 nir_visitor::~nir_visitor() 203 { 204 _mesa_hash_table_destroy(this->var_table, NULL); 205 _mesa_hash_table_destroy(this->overload_table, NULL); 206 } 207 208 nir_deref_var * 209 nir_visitor::evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir) 210 { 211 ir->accept(this); 212 ralloc_steal(mem_ctx, this->deref_head); 213 return this->deref_head; 214 } 215 216 static nir_constant * 217 constant_copy(ir_constant *ir, void *mem_ctx) 218 { 219 if (ir == NULL) 220 return NULL; 221 222 nir_constant *ret = rzalloc(mem_ctx, nir_constant); 223 224 const unsigned rows = ir->type->vector_elements; 225 const unsigned cols = ir->type->matrix_columns; 226 unsigned i; 227 228 ret->num_elements = 0; 229 switch (ir->type->base_type) { 230 case GLSL_TYPE_UINT: 231 /* Only float base types can be matrices. */ 232 assert(cols == 1); 233 234 for (unsigned r = 0; r < rows; r++) 235 ret->values[0].u32[r] = ir->value.u[r]; 236 237 break; 238 239 case GLSL_TYPE_INT: 240 /* Only float base types can be matrices. */ 241 assert(cols == 1); 242 243 for (unsigned r = 0; r < rows; r++) 244 ret->values[0].i32[r] = ir->value.i[r]; 245 246 break; 247 248 case GLSL_TYPE_FLOAT: 249 for (unsigned c = 0; c < cols; c++) { 250 for (unsigned r = 0; r < rows; r++) 251 ret->values[c].f32[r] = ir->value.f[c * rows + r]; 252 } 253 break; 254 255 case GLSL_TYPE_DOUBLE: 256 for (unsigned c = 0; c < cols; c++) { 257 for (unsigned r = 0; r < rows; r++) 258 ret->values[c].f64[r] = ir->value.d[c * rows + r]; 259 } 260 break; 261 262 case GLSL_TYPE_UINT64: 263 /* Only float base types can be matrices. */ 264 assert(cols == 1); 265 266 for (unsigned r = 0; r < rows; r++) 267 ret->values[0].u64[r] = ir->value.u64[r]; 268 break; 269 270 case GLSL_TYPE_INT64: 271 /* Only float base types can be matrices. */ 272 assert(cols == 1); 273 274 for (unsigned r = 0; r < rows; r++) 275 ret->values[0].i64[r] = ir->value.i64[r]; 276 break; 277 278 case GLSL_TYPE_BOOL: 279 /* Only float base types can be matrices. */ 280 assert(cols == 1); 281 282 for (unsigned r = 0; r < rows; r++) 283 ret->values[0].u32[r] = ir->value.b[r] ? NIR_TRUE : NIR_FALSE; 284 285 break; 286 287 case GLSL_TYPE_STRUCT: 288 case GLSL_TYPE_ARRAY: 289 ret->elements = ralloc_array(mem_ctx, nir_constant *, 290 ir->type->length); 291 ret->num_elements = ir->type->length; 292 293 for (i = 0; i < ir->type->length; i++) 294 ret->elements[i] = constant_copy(ir->const_elements[i], mem_ctx); 295 break; 296 297 default: 298 unreachable("not reached"); 299 } 300 301 return ret; 302 } 303 304 void 305 nir_visitor::visit(ir_variable *ir) 306 { 307 /* TODO: In future we should switch to using the NIR lowering pass but for 308 * now just ignore these variables as GLSL IR should have lowered them. 309 * Anything remaining are just dead vars that weren't cleaned up. 310 */ 311 if (ir->data.mode == ir_var_shader_shared) 312 return; 313 314 nir_variable *var = rzalloc(shader, nir_variable); 315 var->type = ir->type; 316 var->name = ralloc_strdup(var, ir->name); 317 318 var->data.always_active_io = ir->data.always_active_io; 319 var->data.read_only = ir->data.read_only; 320 var->data.centroid = ir->data.centroid; 321 var->data.sample = ir->data.sample; 322 var->data.patch = ir->data.patch; 323 var->data.invariant = ir->data.invariant; 324 var->data.location = ir->data.location; 325 var->data.stream = ir->data.stream; 326 var->data.compact = false; 327 328 switch(ir->data.mode) { 329 case ir_var_auto: 330 case ir_var_temporary: 331 if (is_global) 332 var->data.mode = nir_var_global; 333 else 334 var->data.mode = nir_var_local; 335 break; 336 337 case ir_var_function_in: 338 case ir_var_function_out: 339 case ir_var_function_inout: 340 case ir_var_const_in: 341 var->data.mode = nir_var_local; 342 break; 343 344 case ir_var_shader_in: 345 if (shader->info.stage == MESA_SHADER_FRAGMENT && 346 ir->data.location == VARYING_SLOT_FACE) { 347 /* For whatever reason, GLSL IR makes gl_FrontFacing an input */ 348 var->data.location = SYSTEM_VALUE_FRONT_FACE; 349 var->data.mode = nir_var_system_value; 350 } else if (shader->info.stage == MESA_SHADER_GEOMETRY && 351 ir->data.location == VARYING_SLOT_PRIMITIVE_ID) { 352 /* For whatever reason, GLSL IR makes gl_PrimitiveIDIn an input */ 353 var->data.location = SYSTEM_VALUE_PRIMITIVE_ID; 354 var->data.mode = nir_var_system_value; 355 } else { 356 var->data.mode = nir_var_shader_in; 357 358 if (shader->info.stage == MESA_SHADER_TESS_EVAL && 359 (ir->data.location == VARYING_SLOT_TESS_LEVEL_INNER || 360 ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) { 361 var->data.compact = ir->type->without_array()->is_scalar(); 362 } 363 } 364 365 /* Mark all the locations that require two slots */ 366 if (glsl_type_is_dual_slot(glsl_without_array(var->type))) { 367 for (uint i = 0; i < glsl_count_attribute_slots(var->type, true); i++) { 368 uint64_t bitfield = BITFIELD64_BIT(var->data.location + i); 369 shader->info.double_inputs_read |= bitfield; 370 } 371 } 372 break; 373 374 case ir_var_shader_out: 375 var->data.mode = nir_var_shader_out; 376 if (shader->info.stage == MESA_SHADER_TESS_CTRL && 377 (ir->data.location == VARYING_SLOT_TESS_LEVEL_INNER || 378 ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) { 379 var->data.compact = ir->type->without_array()->is_scalar(); 380 } 381 break; 382 383 case ir_var_uniform: 384 var->data.mode = nir_var_uniform; 385 break; 386 387 case ir_var_shader_storage: 388 var->data.mode = nir_var_shader_storage; 389 break; 390 391 case ir_var_system_value: 392 var->data.mode = nir_var_system_value; 393 break; 394 395 default: 396 unreachable("not reached"); 397 } 398 399 var->data.interpolation = ir->data.interpolation; 400 var->data.origin_upper_left = ir->data.origin_upper_left; 401 var->data.pixel_center_integer = ir->data.pixel_center_integer; 402 var->data.location_frac = ir->data.location_frac; 403 404 switch (ir->data.depth_layout) { 405 case ir_depth_layout_none: 406 var->data.depth_layout = nir_depth_layout_none; 407 break; 408 case ir_depth_layout_any: 409 var->data.depth_layout = nir_depth_layout_any; 410 break; 411 case ir_depth_layout_greater: 412 var->data.depth_layout = nir_depth_layout_greater; 413 break; 414 case ir_depth_layout_less: 415 var->data.depth_layout = nir_depth_layout_less; 416 break; 417 case ir_depth_layout_unchanged: 418 var->data.depth_layout = nir_depth_layout_unchanged; 419 break; 420 default: 421 unreachable("not reached"); 422 } 423 424 var->data.index = ir->data.index; 425 var->data.descriptor_set = 0; 426 var->data.binding = ir->data.binding; 427 var->data.offset = ir->data.offset; 428 var->data.image.read_only = ir->data.memory_read_only; 429 var->data.image.write_only = ir->data.memory_write_only; 430 var->data.image.coherent = ir->data.memory_coherent; 431 var->data.image._volatile = ir->data.memory_volatile; 432 var->data.image.restrict_flag = ir->data.memory_restrict; 433 var->data.image.format = ir->data.image_format; 434 var->data.fb_fetch_output = ir->data.fb_fetch_output; 435 436 var->num_state_slots = ir->get_num_state_slots(); 437 if (var->num_state_slots > 0) { 438 var->state_slots = ralloc_array(var, nir_state_slot, 439 var->num_state_slots); 440 441 ir_state_slot *state_slots = ir->get_state_slots(); 442 for (unsigned i = 0; i < var->num_state_slots; i++) { 443 for (unsigned j = 0; j < 5; j++) 444 var->state_slots[i].tokens[j] = state_slots[i].tokens[j]; 445 var->state_slots[i].swizzle = state_slots[i].swizzle; 446 } 447 } else { 448 var->state_slots = NULL; 449 } 450 451 var->constant_initializer = constant_copy(ir->constant_initializer, var); 452 453 var->interface_type = ir->get_interface_type(); 454 455 if (var->data.mode == nir_var_local) 456 nir_function_impl_add_variable(impl, var); 457 else 458 nir_shader_add_variable(shader, var); 459 460 _mesa_hash_table_insert(var_table, ir, var); 461 this->var = var; 462 } 463 464 ir_visitor_status 465 nir_function_visitor::visit_enter(ir_function *ir) 466 { 467 foreach_in_list(ir_function_signature, sig, &ir->signatures) { 468 visitor->create_function(sig); 469 } 470 return visit_continue_with_parent; 471 } 472 473 void 474 nir_visitor::create_function(ir_function_signature *ir) 475 { 476 if (ir->is_intrinsic()) 477 return; 478 479 nir_function *func = nir_function_create(shader, ir->function_name()); 480 481 assert(ir->parameters.is_empty()); 482 assert(ir->return_type == glsl_type::void_type); 483 484 _mesa_hash_table_insert(this->overload_table, ir, func); 485 } 486 487 void 488 nir_visitor::visit(ir_function *ir) 489 { 490 foreach_in_list(ir_function_signature, sig, &ir->signatures) 491 sig->accept(this); 492 } 493 494 void 495 nir_visitor::visit(ir_function_signature *ir) 496 { 497 if (ir->is_intrinsic()) 498 return; 499 500 struct hash_entry *entry = 501 _mesa_hash_table_search(this->overload_table, ir); 502 503 assert(entry); 504 nir_function *func = (nir_function *) entry->data; 505 506 if (ir->is_defined) { 507 nir_function_impl *impl = nir_function_impl_create(func); 508 this->impl = impl; 509 510 assert(strcmp(func->name, "main") == 0); 511 assert(ir->parameters.is_empty()); 512 assert(func->return_type == glsl_type::void_type); 513 514 this->is_global = false; 515 516 nir_builder_init(&b, impl); 517 b.cursor = nir_after_cf_list(&impl->body); 518 visit_exec_list(&ir->body, this); 519 520 this->is_global = true; 521 } else { 522 func->impl = NULL; 523 } 524 } 525 526 void 527 nir_visitor::visit(ir_loop *ir) 528 { 529 nir_push_loop(&b); 530 visit_exec_list(&ir->body_instructions, this); 531 nir_pop_loop(&b, NULL); 532 } 533 534 void 535 nir_visitor::visit(ir_if *ir) 536 { 537 nir_push_if(&b, evaluate_rvalue(ir->condition)); 538 visit_exec_list(&ir->then_instructions, this); 539 nir_push_else(&b, NULL); 540 visit_exec_list(&ir->else_instructions, this); 541 nir_pop_if(&b, NULL); 542 } 543 544 void 545 nir_visitor::visit(ir_discard *ir) 546 { 547 /* 548 * discards aren't treated as control flow, because before we lower them 549 * they can appear anywhere in the shader and the stuff after them may still 550 * be executed (yay, crazy GLSL rules!). However, after lowering, all the 551 * discards will be immediately followed by a return. 552 */ 553 554 nir_intrinsic_instr *discard; 555 if (ir->condition) { 556 discard = nir_intrinsic_instr_create(this->shader, 557 nir_intrinsic_discard_if); 558 discard->src[0] = 559 nir_src_for_ssa(evaluate_rvalue(ir->condition)); 560 } else { 561 discard = nir_intrinsic_instr_create(this->shader, nir_intrinsic_discard); 562 } 563 564 nir_builder_instr_insert(&b, &discard->instr); 565 } 566 567 void 568 nir_visitor::visit(ir_emit_vertex *ir) 569 { 570 nir_intrinsic_instr *instr = 571 nir_intrinsic_instr_create(this->shader, nir_intrinsic_emit_vertex); 572 nir_intrinsic_set_stream_id(instr, ir->stream_id()); 573 nir_builder_instr_insert(&b, &instr->instr); 574 } 575 576 void 577 nir_visitor::visit(ir_end_primitive *ir) 578 { 579 nir_intrinsic_instr *instr = 580 nir_intrinsic_instr_create(this->shader, nir_intrinsic_end_primitive); 581 nir_intrinsic_set_stream_id(instr, ir->stream_id()); 582 nir_builder_instr_insert(&b, &instr->instr); 583 } 584 585 void 586 nir_visitor::visit(ir_loop_jump *ir) 587 { 588 nir_jump_type type; 589 switch (ir->mode) { 590 case ir_loop_jump::jump_break: 591 type = nir_jump_break; 592 break; 593 case ir_loop_jump::jump_continue: 594 type = nir_jump_continue; 595 break; 596 default: 597 unreachable("not reached"); 598 } 599 600 nir_jump_instr *instr = nir_jump_instr_create(this->shader, type); 601 nir_builder_instr_insert(&b, &instr->instr); 602 } 603 604 void 605 nir_visitor::visit(ir_return *ir) 606 { 607 if (ir->value != NULL) { 608 nir_intrinsic_instr *copy = 609 nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var); 610 611 copy->variables[0] = nir_deref_var_create(copy, this->impl->return_var); 612 copy->variables[1] = evaluate_deref(©->instr, ir->value); 613 } 614 615 nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return); 616 nir_builder_instr_insert(&b, &instr->instr); 617 } 618 619 void 620 nir_visitor::visit(ir_call *ir) 621 { 622 if (ir->callee->is_intrinsic()) { 623 nir_intrinsic_op op; 624 625 switch (ir->callee->intrinsic_id) { 626 case ir_intrinsic_atomic_counter_read: 627 op = nir_intrinsic_atomic_counter_read_var; 628 break; 629 case ir_intrinsic_atomic_counter_increment: 630 op = nir_intrinsic_atomic_counter_inc_var; 631 break; 632 case ir_intrinsic_atomic_counter_predecrement: 633 op = nir_intrinsic_atomic_counter_dec_var; 634 break; 635 case ir_intrinsic_atomic_counter_add: 636 op = nir_intrinsic_atomic_counter_add_var; 637 break; 638 case ir_intrinsic_atomic_counter_and: 639 op = nir_intrinsic_atomic_counter_and_var; 640 break; 641 case ir_intrinsic_atomic_counter_or: 642 op = nir_intrinsic_atomic_counter_or_var; 643 break; 644 case ir_intrinsic_atomic_counter_xor: 645 op = nir_intrinsic_atomic_counter_xor_var; 646 break; 647 case ir_intrinsic_atomic_counter_min: 648 op = nir_intrinsic_atomic_counter_min_var; 649 break; 650 case ir_intrinsic_atomic_counter_max: 651 op = nir_intrinsic_atomic_counter_max_var; 652 break; 653 case ir_intrinsic_atomic_counter_exchange: 654 op = nir_intrinsic_atomic_counter_exchange_var; 655 break; 656 case ir_intrinsic_atomic_counter_comp_swap: 657 op = nir_intrinsic_atomic_counter_comp_swap_var; 658 break; 659 case ir_intrinsic_image_load: 660 op = nir_intrinsic_image_load; 661 break; 662 case ir_intrinsic_image_store: 663 op = nir_intrinsic_image_store; 664 break; 665 case ir_intrinsic_image_atomic_add: 666 op = nir_intrinsic_image_atomic_add; 667 break; 668 case ir_intrinsic_image_atomic_min: 669 op = nir_intrinsic_image_atomic_min; 670 break; 671 case ir_intrinsic_image_atomic_max: 672 op = nir_intrinsic_image_atomic_max; 673 break; 674 case ir_intrinsic_image_atomic_and: 675 op = nir_intrinsic_image_atomic_and; 676 break; 677 case ir_intrinsic_image_atomic_or: 678 op = nir_intrinsic_image_atomic_or; 679 break; 680 case ir_intrinsic_image_atomic_xor: 681 op = nir_intrinsic_image_atomic_xor; 682 break; 683 case ir_intrinsic_image_atomic_exchange: 684 op = nir_intrinsic_image_atomic_exchange; 685 break; 686 case ir_intrinsic_image_atomic_comp_swap: 687 op = nir_intrinsic_image_atomic_comp_swap; 688 break; 689 case ir_intrinsic_memory_barrier: 690 op = nir_intrinsic_memory_barrier; 691 break; 692 case ir_intrinsic_image_size: 693 op = nir_intrinsic_image_size; 694 break; 695 case ir_intrinsic_image_samples: 696 op = nir_intrinsic_image_samples; 697 break; 698 case ir_intrinsic_ssbo_store: 699 op = nir_intrinsic_store_ssbo; 700 break; 701 case ir_intrinsic_ssbo_load: 702 op = nir_intrinsic_load_ssbo; 703 break; 704 case ir_intrinsic_ssbo_atomic_add: 705 op = nir_intrinsic_ssbo_atomic_add; 706 break; 707 case ir_intrinsic_ssbo_atomic_and: 708 op = nir_intrinsic_ssbo_atomic_and; 709 break; 710 case ir_intrinsic_ssbo_atomic_or: 711 op = nir_intrinsic_ssbo_atomic_or; 712 break; 713 case ir_intrinsic_ssbo_atomic_xor: 714 op = nir_intrinsic_ssbo_atomic_xor; 715 break; 716 case ir_intrinsic_ssbo_atomic_min: 717 assert(ir->return_deref); 718 if (ir->return_deref->type == glsl_type::int_type) 719 op = nir_intrinsic_ssbo_atomic_imin; 720 else if (ir->return_deref->type == glsl_type::uint_type) 721 op = nir_intrinsic_ssbo_atomic_umin; 722 else 723 unreachable("Invalid type"); 724 break; 725 case ir_intrinsic_ssbo_atomic_max: 726 assert(ir->return_deref); 727 if (ir->return_deref->type == glsl_type::int_type) 728 op = nir_intrinsic_ssbo_atomic_imax; 729 else if (ir->return_deref->type == glsl_type::uint_type) 730 op = nir_intrinsic_ssbo_atomic_umax; 731 else 732 unreachable("Invalid type"); 733 break; 734 case ir_intrinsic_ssbo_atomic_exchange: 735 op = nir_intrinsic_ssbo_atomic_exchange; 736 break; 737 case ir_intrinsic_ssbo_atomic_comp_swap: 738 op = nir_intrinsic_ssbo_atomic_comp_swap; 739 break; 740 case ir_intrinsic_shader_clock: 741 op = nir_intrinsic_shader_clock; 742 break; 743 case ir_intrinsic_group_memory_barrier: 744 op = nir_intrinsic_group_memory_barrier; 745 break; 746 case ir_intrinsic_memory_barrier_atomic_counter: 747 op = nir_intrinsic_memory_barrier_atomic_counter; 748 break; 749 case ir_intrinsic_memory_barrier_buffer: 750 op = nir_intrinsic_memory_barrier_buffer; 751 break; 752 case ir_intrinsic_memory_barrier_image: 753 op = nir_intrinsic_memory_barrier_image; 754 break; 755 case ir_intrinsic_memory_barrier_shared: 756 op = nir_intrinsic_memory_barrier_shared; 757 break; 758 case ir_intrinsic_shared_load: 759 op = nir_intrinsic_load_shared; 760 break; 761 case ir_intrinsic_shared_store: 762 op = nir_intrinsic_store_shared; 763 break; 764 case ir_intrinsic_shared_atomic_add: 765 op = nir_intrinsic_shared_atomic_add; 766 break; 767 case ir_intrinsic_shared_atomic_and: 768 op = nir_intrinsic_shared_atomic_and; 769 break; 770 case ir_intrinsic_shared_atomic_or: 771 op = nir_intrinsic_shared_atomic_or; 772 break; 773 case ir_intrinsic_shared_atomic_xor: 774 op = nir_intrinsic_shared_atomic_xor; 775 break; 776 case ir_intrinsic_shared_atomic_min: 777 assert(ir->return_deref); 778 if (ir->return_deref->type == glsl_type::int_type) 779 op = nir_intrinsic_shared_atomic_imin; 780 else if (ir->return_deref->type == glsl_type::uint_type) 781 op = nir_intrinsic_shared_atomic_umin; 782 else 783 unreachable("Invalid type"); 784 break; 785 case ir_intrinsic_shared_atomic_max: 786 assert(ir->return_deref); 787 if (ir->return_deref->type == glsl_type::int_type) 788 op = nir_intrinsic_shared_atomic_imax; 789 else if (ir->return_deref->type == glsl_type::uint_type) 790 op = nir_intrinsic_shared_atomic_umax; 791 else 792 unreachable("Invalid type"); 793 break; 794 case ir_intrinsic_shared_atomic_exchange: 795 op = nir_intrinsic_shared_atomic_exchange; 796 break; 797 case ir_intrinsic_shared_atomic_comp_swap: 798 op = nir_intrinsic_shared_atomic_comp_swap; 799 break; 800 case ir_intrinsic_vote_any: 801 op = nir_intrinsic_vote_any; 802 break; 803 case ir_intrinsic_vote_all: 804 op = nir_intrinsic_vote_all; 805 break; 806 case ir_intrinsic_vote_eq: 807 op = nir_intrinsic_vote_eq; 808 break; 809 case ir_intrinsic_ballot: 810 op = nir_intrinsic_ballot; 811 break; 812 case ir_intrinsic_read_invocation: 813 op = nir_intrinsic_read_invocation; 814 break; 815 case ir_intrinsic_read_first_invocation: 816 op = nir_intrinsic_read_first_invocation; 817 break; 818 default: 819 unreachable("not reached"); 820 } 821 822 nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op); 823 nir_dest *dest = &instr->dest; 824 825 switch (op) { 826 case nir_intrinsic_atomic_counter_read_var: 827 case nir_intrinsic_atomic_counter_inc_var: 828 case nir_intrinsic_atomic_counter_dec_var: 829 case nir_intrinsic_atomic_counter_add_var: 830 case nir_intrinsic_atomic_counter_min_var: 831 case nir_intrinsic_atomic_counter_max_var: 832 case nir_intrinsic_atomic_counter_and_var: 833 case nir_intrinsic_atomic_counter_or_var: 834 case nir_intrinsic_atomic_counter_xor_var: 835 case nir_intrinsic_atomic_counter_exchange_var: 836 case nir_intrinsic_atomic_counter_comp_swap_var: { 837 /* Set the counter variable dereference. */ 838 exec_node *param = ir->actual_parameters.get_head(); 839 ir_dereference *counter = (ir_dereference *)param; 840 841 instr->variables[0] = evaluate_deref(&instr->instr, counter); 842 param = param->get_next(); 843 844 /* Set the intrinsic destination. */ 845 if (ir->return_deref) { 846 nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL); 847 } 848 849 /* Set the intrinsic parameters. */ 850 if (!param->is_tail_sentinel()) { 851 instr->src[0] = 852 nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); 853 param = param->get_next(); 854 } 855 856 if (!param->is_tail_sentinel()) { 857 instr->src[1] = 858 nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); 859 param = param->get_next(); 860 } 861 862 nir_builder_instr_insert(&b, &instr->instr); 863 break; 864 } 865 case nir_intrinsic_image_load: 866 case nir_intrinsic_image_store: 867 case nir_intrinsic_image_atomic_add: 868 case nir_intrinsic_image_atomic_min: 869 case nir_intrinsic_image_atomic_max: 870 case nir_intrinsic_image_atomic_and: 871 case nir_intrinsic_image_atomic_or: 872 case nir_intrinsic_image_atomic_xor: 873 case nir_intrinsic_image_atomic_exchange: 874 case nir_intrinsic_image_atomic_comp_swap: 875 case nir_intrinsic_image_samples: 876 case nir_intrinsic_image_size: { 877 nir_ssa_undef_instr *instr_undef = 878 nir_ssa_undef_instr_create(shader, 1, 32); 879 nir_builder_instr_insert(&b, &instr_undef->instr); 880 881 /* Set the image variable dereference. */ 882 exec_node *param = ir->actual_parameters.get_head(); 883 ir_dereference *image = (ir_dereference *)param; 884 const glsl_type *type = 885 image->variable_referenced()->type->without_array(); 886 887 instr->variables[0] = evaluate_deref(&instr->instr, image); 888 param = param->get_next(); 889 890 /* Set the intrinsic destination. */ 891 if (ir->return_deref) { 892 unsigned num_components = ir->return_deref->type->vector_elements; 893 if (instr->intrinsic == nir_intrinsic_image_size) 894 instr->num_components = num_components; 895 nir_ssa_dest_init(&instr->instr, &instr->dest, 896 num_components, 32, NULL); 897 } 898 899 if (op == nir_intrinsic_image_size || 900 op == nir_intrinsic_image_samples) { 901 nir_builder_instr_insert(&b, &instr->instr); 902 break; 903 } 904 905 /* Set the address argument, extending the coordinate vector to four 906 * components. 907 */ 908 nir_ssa_def *src_addr = 909 evaluate_rvalue((ir_dereference *)param); 910 nir_ssa_def *srcs[4]; 911 912 for (int i = 0; i < 4; i++) { 913 if (i < type->coordinate_components()) 914 srcs[i] = nir_channel(&b, src_addr, i); 915 else 916 srcs[i] = &instr_undef->def; 917 } 918 919 instr->src[0] = nir_src_for_ssa(nir_vec(&b, srcs, 4)); 920 param = param->get_next(); 921 922 /* Set the sample argument, which is undefined for single-sample 923 * images. 924 */ 925 if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) { 926 instr->src[1] = 927 nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); 928 param = param->get_next(); 929 } else { 930 instr->src[1] = nir_src_for_ssa(&instr_undef->def); 931 } 932 933 /* Set the intrinsic parameters. */ 934 if (!param->is_tail_sentinel()) { 935 instr->src[2] = 936 nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); 937 param = param->get_next(); 938 } 939 940 if (!param->is_tail_sentinel()) { 941 instr->src[3] = 942 nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); 943 param = param->get_next(); 944 } 945 nir_builder_instr_insert(&b, &instr->instr); 946 break; 947 } 948 case nir_intrinsic_memory_barrier: 949 case nir_intrinsic_group_memory_barrier: 950 case nir_intrinsic_memory_barrier_atomic_counter: 951 case nir_intrinsic_memory_barrier_buffer: 952 case nir_intrinsic_memory_barrier_image: 953 case nir_intrinsic_memory_barrier_shared: 954 nir_builder_instr_insert(&b, &instr->instr); 955 break; 956 case nir_intrinsic_shader_clock: 957 nir_ssa_dest_init(&instr->instr, &instr->dest, 2, 32, NULL); 958 instr->num_components = 2; 959 nir_builder_instr_insert(&b, &instr->instr); 960 break; 961 case nir_intrinsic_store_ssbo: { 962 exec_node *param = ir->actual_parameters.get_head(); 963 ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); 964 965 param = param->get_next(); 966 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); 967 968 param = param->get_next(); 969 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); 970 971 param = param->get_next(); 972 ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); 973 assert(write_mask); 974 975 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val)); 976 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block)); 977 instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset)); 978 nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]); 979 instr->num_components = val->type->vector_elements; 980 981 nir_builder_instr_insert(&b, &instr->instr); 982 break; 983 } 984 case nir_intrinsic_load_ssbo: { 985 exec_node *param = ir->actual_parameters.get_head(); 986 ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); 987 988 param = param->get_next(); 989 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); 990 991 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(block)); 992 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); 993 994 const glsl_type *type = ir->return_deref->var->type; 995 instr->num_components = type->vector_elements; 996 997 /* Setup destination register */ 998 unsigned bit_size = glsl_get_bit_size(type); 999 nir_ssa_dest_init(&instr->instr, &instr->dest, 1000 type->vector_elements, bit_size, NULL); 1001 1002 /* Insert the created nir instruction now since in the case of boolean 1003 * result we will need to emit another instruction after it 1004 */ 1005 nir_builder_instr_insert(&b, &instr->instr); 1006 1007 /* 1008 * In SSBO/UBO's, a true boolean value is any non-zero value, but we 1009 * consider a true boolean to be ~0. Fix this up with a != 0 1010 * comparison. 1011 */ 1012 if (type->is_boolean()) { 1013 nir_alu_instr *load_ssbo_compare = 1014 nir_alu_instr_create(shader, nir_op_ine); 1015 load_ssbo_compare->src[0].src.is_ssa = true; 1016 load_ssbo_compare->src[0].src.ssa = &instr->dest.ssa; 1017 load_ssbo_compare->src[1].src = 1018 nir_src_for_ssa(nir_imm_int(&b, 0)); 1019 for (unsigned i = 0; i < type->vector_elements; i++) 1020 load_ssbo_compare->src[1].swizzle[i] = 0; 1021 nir_ssa_dest_init(&load_ssbo_compare->instr, 1022 &load_ssbo_compare->dest.dest, 1023 type->vector_elements, bit_size, NULL); 1024 load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) - 1; 1025 nir_builder_instr_insert(&b, &load_ssbo_compare->instr); 1026 dest = &load_ssbo_compare->dest.dest; 1027 } 1028 break; 1029 } 1030 case nir_intrinsic_ssbo_atomic_add: 1031 case nir_intrinsic_ssbo_atomic_imin: 1032 case nir_intrinsic_ssbo_atomic_umin: 1033 case nir_intrinsic_ssbo_atomic_imax: 1034 case nir_intrinsic_ssbo_atomic_umax: 1035 case nir_intrinsic_ssbo_atomic_and: 1036 case nir_intrinsic_ssbo_atomic_or: 1037 case nir_intrinsic_ssbo_atomic_xor: 1038 case nir_intrinsic_ssbo_atomic_exchange: 1039 case nir_intrinsic_ssbo_atomic_comp_swap: { 1040 int param_count = ir->actual_parameters.length(); 1041 assert(param_count == 3 || param_count == 4); 1042 1043 /* Block index */ 1044 exec_node *param = ir->actual_parameters.get_head(); 1045 ir_instruction *inst = (ir_instruction *) param; 1046 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); 1047 1048 /* Offset */ 1049 param = param->get_next(); 1050 inst = (ir_instruction *) param; 1051 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); 1052 1053 /* data1 parameter (this is always present) */ 1054 param = param->get_next(); 1055 inst = (ir_instruction *) param; 1056 instr->src[2] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); 1057 1058 /* data2 parameter (only with atomic_comp_swap) */ 1059 if (param_count == 4) { 1060 assert(op == nir_intrinsic_ssbo_atomic_comp_swap); 1061 param = param->get_next(); 1062 inst = (ir_instruction *) param; 1063 instr->src[3] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); 1064 } 1065 1066 /* Atomic result */ 1067 assert(ir->return_deref); 1068 nir_ssa_dest_init(&instr->instr, &instr->dest, 1069 ir->return_deref->type->vector_elements, 32, NULL); 1070 nir_builder_instr_insert(&b, &instr->instr); 1071 break; 1072 } 1073 case nir_intrinsic_load_shared: { 1074 exec_node *param = ir->actual_parameters.get_head(); 1075 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); 1076 1077 nir_intrinsic_set_base(instr, 0); 1078 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset)); 1079 1080 const glsl_type *type = ir->return_deref->var->type; 1081 instr->num_components = type->vector_elements; 1082 1083 /* Setup destination register */ 1084 unsigned bit_size = glsl_get_bit_size(type); 1085 nir_ssa_dest_init(&instr->instr, &instr->dest, 1086 type->vector_elements, bit_size, NULL); 1087 1088 nir_builder_instr_insert(&b, &instr->instr); 1089 break; 1090 } 1091 case nir_intrinsic_store_shared: { 1092 exec_node *param = ir->actual_parameters.get_head(); 1093 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); 1094 1095 param = param->get_next(); 1096 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); 1097 1098 param = param->get_next(); 1099 ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); 1100 assert(write_mask); 1101 1102 nir_intrinsic_set_base(instr, 0); 1103 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); 1104 1105 nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]); 1106 1107 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val)); 1108 instr->num_components = val->type->vector_elements; 1109 1110 nir_builder_instr_insert(&b, &instr->instr); 1111 break; 1112 } 1113 case nir_intrinsic_shared_atomic_add: 1114 case nir_intrinsic_shared_atomic_imin: 1115 case nir_intrinsic_shared_atomic_umin: 1116 case nir_intrinsic_shared_atomic_imax: 1117 case nir_intrinsic_shared_atomic_umax: 1118 case nir_intrinsic_shared_atomic_and: 1119 case nir_intrinsic_shared_atomic_or: 1120 case nir_intrinsic_shared_atomic_xor: 1121 case nir_intrinsic_shared_atomic_exchange: 1122 case nir_intrinsic_shared_atomic_comp_swap: { 1123 int param_count = ir->actual_parameters.length(); 1124 assert(param_count == 2 || param_count == 3); 1125 1126 /* Offset */ 1127 exec_node *param = ir->actual_parameters.get_head(); 1128 ir_instruction *inst = (ir_instruction *) param; 1129 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); 1130 1131 /* data1 parameter (this is always present) */ 1132 param = param->get_next(); 1133 inst = (ir_instruction *) param; 1134 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); 1135 1136 /* data2 parameter (only with atomic_comp_swap) */ 1137 if (param_count == 3) { 1138 assert(op == nir_intrinsic_shared_atomic_comp_swap); 1139 param = param->get_next(); 1140 inst = (ir_instruction *) param; 1141 instr->src[2] = 1142 nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); 1143 } 1144 1145 /* Atomic result */ 1146 assert(ir->return_deref); 1147 unsigned bit_size = glsl_get_bit_size(ir->return_deref->type); 1148 nir_ssa_dest_init(&instr->instr, &instr->dest, 1149 ir->return_deref->type->vector_elements, 1150 bit_size, NULL); 1151 nir_builder_instr_insert(&b, &instr->instr); 1152 break; 1153 } 1154 case nir_intrinsic_vote_any: 1155 case nir_intrinsic_vote_all: 1156 case nir_intrinsic_vote_eq: { 1157 nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL); 1158 1159 ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head(); 1160 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value)); 1161 1162 nir_builder_instr_insert(&b, &instr->instr); 1163 break; 1164 } 1165 1166 case nir_intrinsic_ballot: { 1167 nir_ssa_dest_init(&instr->instr, &instr->dest, 1168 ir->return_deref->type->vector_elements, 64, NULL); 1169 instr->num_components = ir->return_deref->type->vector_elements; 1170 1171 ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head(); 1172 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value)); 1173 1174 nir_builder_instr_insert(&b, &instr->instr); 1175 break; 1176 } 1177 case nir_intrinsic_read_invocation: { 1178 nir_ssa_dest_init(&instr->instr, &instr->dest, 1179 ir->return_deref->type->vector_elements, 32, NULL); 1180 instr->num_components = ir->return_deref->type->vector_elements; 1181 1182 ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head(); 1183 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value)); 1184 1185 ir_rvalue *invocation = (ir_rvalue *) ir->actual_parameters.get_head()->next; 1186 instr->src[1] = nir_src_for_ssa(evaluate_rvalue(invocation)); 1187 1188 nir_builder_instr_insert(&b, &instr->instr); 1189 break; 1190 } 1191 case nir_intrinsic_read_first_invocation: { 1192 nir_ssa_dest_init(&instr->instr, &instr->dest, 1193 ir->return_deref->type->vector_elements, 32, NULL); 1194 instr->num_components = ir->return_deref->type->vector_elements; 1195 1196 ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head(); 1197 instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value)); 1198 1199 nir_builder_instr_insert(&b, &instr->instr); 1200 break; 1201 } 1202 default: 1203 unreachable("not reached"); 1204 } 1205 1206 if (ir->return_deref) { 1207 nir_intrinsic_instr *store_instr = 1208 nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); 1209 store_instr->num_components = ir->return_deref->type->vector_elements; 1210 nir_intrinsic_set_write_mask(store_instr, 1211 (1 << store_instr->num_components) - 1); 1212 1213 store_instr->variables[0] = 1214 evaluate_deref(&store_instr->instr, ir->return_deref); 1215 store_instr->src[0] = nir_src_for_ssa(&dest->ssa); 1216 1217 nir_builder_instr_insert(&b, &store_instr->instr); 1218 } 1219 1220 return; 1221 } 1222 1223 struct hash_entry *entry = 1224 _mesa_hash_table_search(this->overload_table, ir->callee); 1225 assert(entry); 1226 nir_function *callee = (nir_function *) entry->data; 1227 1228 nir_call_instr *instr = nir_call_instr_create(this->shader, callee); 1229 1230 unsigned i = 0; 1231 foreach_in_list(ir_dereference, param, &ir->actual_parameters) { 1232 instr->params[i] = evaluate_deref(&instr->instr, param); 1233 i++; 1234 } 1235 1236 instr->return_deref = evaluate_deref(&instr->instr, ir->return_deref); 1237 nir_builder_instr_insert(&b, &instr->instr); 1238 } 1239 1240 void 1241 nir_visitor::visit(ir_assignment *ir) 1242 { 1243 unsigned num_components = ir->lhs->type->vector_elements; 1244 1245 b.exact = ir->lhs->variable_referenced()->data.invariant || 1246 ir->lhs->variable_referenced()->data.precise; 1247 1248 if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) && 1249 (ir->write_mask == (1 << num_components) - 1 || ir->write_mask == 0)) { 1250 /* We're doing a plain-as-can-be copy, so emit a copy_var */ 1251 nir_intrinsic_instr *copy = 1252 nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var); 1253 1254 copy->variables[0] = evaluate_deref(©->instr, ir->lhs); 1255 copy->variables[1] = evaluate_deref(©->instr, ir->rhs); 1256 1257 if (ir->condition) { 1258 nir_push_if(&b, evaluate_rvalue(ir->condition)); 1259 nir_builder_instr_insert(&b, ©->instr); 1260 nir_pop_if(&b, NULL); 1261 } else { 1262 nir_builder_instr_insert(&b, ©->instr); 1263 } 1264 return; 1265 } 1266 1267 assert(ir->rhs->type->is_scalar() || ir->rhs->type->is_vector()); 1268 1269 ir->lhs->accept(this); 1270 nir_deref_var *lhs_deref = this->deref_head; 1271 nir_ssa_def *src = evaluate_rvalue(ir->rhs); 1272 1273 if (ir->write_mask != (1 << num_components) - 1 && ir->write_mask != 0) { 1274 /* GLSL IR will give us the input to the write-masked assignment in a 1275 * single packed vector. So, for example, if the writemask is xzw, then 1276 * we have to swizzle x -> x, y -> z, and z -> w and get the y component 1277 * from the load. 1278 */ 1279 unsigned swiz[4]; 1280 unsigned component = 0; 1281 for (unsigned i = 0; i < 4; i++) { 1282 swiz[i] = ir->write_mask & (1 << i) ? component++ : 0; 1283 } 1284 src = nir_swizzle(&b, src, swiz, num_components, !supports_ints); 1285 } 1286 1287 nir_intrinsic_instr *store = 1288 nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var); 1289 store->num_components = ir->lhs->type->vector_elements; 1290 nir_intrinsic_set_write_mask(store, ir->write_mask); 1291 store->variables[0] = nir_deref_var_clone(lhs_deref, store); 1292 store->src[0] = nir_src_for_ssa(src); 1293 1294 if (ir->condition) { 1295 nir_push_if(&b, evaluate_rvalue(ir->condition)); 1296 nir_builder_instr_insert(&b, &store->instr); 1297 nir_pop_if(&b, NULL); 1298 } else { 1299 nir_builder_instr_insert(&b, &store->instr); 1300 } 1301 } 1302 1303 /* 1304 * Given an instruction, returns a pointer to its destination or NULL if there 1305 * is no destination. 1306 * 1307 * Note that this only handles instructions we generate at this level. 1308 */ 1309 static nir_dest * 1310 get_instr_dest(nir_instr *instr) 1311 { 1312 nir_alu_instr *alu_instr; 1313 nir_intrinsic_instr *intrinsic_instr; 1314 nir_tex_instr *tex_instr; 1315 1316 switch (instr->type) { 1317 case nir_instr_type_alu: 1318 alu_instr = nir_instr_as_alu(instr); 1319 return &alu_instr->dest.dest; 1320 1321 case nir_instr_type_intrinsic: 1322 intrinsic_instr = nir_instr_as_intrinsic(instr); 1323 if (nir_intrinsic_infos[intrinsic_instr->intrinsic].has_dest) 1324 return &intrinsic_instr->dest; 1325 else 1326 return NULL; 1327 1328 case nir_instr_type_tex: 1329 tex_instr = nir_instr_as_tex(instr); 1330 return &tex_instr->dest; 1331 1332 default: 1333 unreachable("not reached"); 1334 } 1335 1336 return NULL; 1337 } 1338 1339 void 1340 nir_visitor::add_instr(nir_instr *instr, unsigned num_components, 1341 unsigned bit_size) 1342 { 1343 nir_dest *dest = get_instr_dest(instr); 1344 1345 if (dest) 1346 nir_ssa_dest_init(instr, dest, num_components, bit_size, NULL); 1347 1348 nir_builder_instr_insert(&b, instr); 1349 1350 if (dest) { 1351 assert(dest->is_ssa); 1352 this->result = &dest->ssa; 1353 } 1354 } 1355 1356 nir_ssa_def * 1357 nir_visitor::evaluate_rvalue(ir_rvalue* ir) 1358 { 1359 ir->accept(this); 1360 if (ir->as_dereference() || ir->as_constant()) { 1361 /* 1362 * A dereference is being used on the right hand side, which means we 1363 * must emit a variable load. 1364 */ 1365 1366 nir_intrinsic_instr *load_instr = 1367 nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var); 1368 load_instr->num_components = ir->type->vector_elements; 1369 load_instr->variables[0] = this->deref_head; 1370 ralloc_steal(load_instr, load_instr->variables[0]); 1371 unsigned bit_size = glsl_get_bit_size(ir->type); 1372 add_instr(&load_instr->instr, ir->type->vector_elements, bit_size); 1373 } 1374 1375 return this->result; 1376 } 1377 1378 static bool 1379 type_is_float(glsl_base_type type) 1380 { 1381 return type == GLSL_TYPE_FLOAT || type == GLSL_TYPE_DOUBLE || 1382 type == GLSL_TYPE_FLOAT16; 1383 } 1384 1385 static bool 1386 type_is_signed(glsl_base_type type) 1387 { 1388 return type == GLSL_TYPE_INT || type == GLSL_TYPE_INT64 || 1389 type == GLSL_TYPE_INT16; 1390 } 1391 1392 void 1393 nir_visitor::visit(ir_expression *ir) 1394 { 1395 /* Some special cases */ 1396 switch (ir->operation) { 1397 case ir_binop_ubo_load: { 1398 nir_intrinsic_instr *load = 1399 nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo); 1400 unsigned bit_size = glsl_get_bit_size(ir->type); 1401 load->num_components = ir->type->vector_elements; 1402 load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0])); 1403 load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1])); 1404 add_instr(&load->instr, ir->type->vector_elements, bit_size); 1405 1406 /* 1407 * In UBO's, a true boolean value is any non-zero value, but we consider 1408 * a true boolean to be ~0. Fix this up with a != 0 comparison. 1409 */ 1410 1411 if (ir->type->is_boolean()) 1412 this->result = nir_ine(&b, &load->dest.ssa, nir_imm_int(&b, 0)); 1413 1414 return; 1415 } 1416 1417 case ir_unop_interpolate_at_centroid: 1418 case ir_binop_interpolate_at_offset: 1419 case ir_binop_interpolate_at_sample: { 1420 ir_dereference *deref = ir->operands[0]->as_dereference(); 1421 ir_swizzle *swizzle = NULL; 1422 if (!deref) { 1423 /* the api does not allow a swizzle here, but the varying packing code 1424 * may have pushed one into here. 1425 */ 1426 swizzle = ir->operands[0]->as_swizzle(); 1427 assert(swizzle); 1428 deref = swizzle->val->as_dereference(); 1429 assert(deref); 1430 } 1431 1432 deref->accept(this); 1433 1434 nir_intrinsic_op op; 1435 if (this->deref_head->var->data.mode == nir_var_shader_in) { 1436 switch (ir->operation) { 1437 case ir_unop_interpolate_at_centroid: 1438 op = nir_intrinsic_interp_var_at_centroid; 1439 break; 1440 case ir_binop_interpolate_at_offset: 1441 op = nir_intrinsic_interp_var_at_offset; 1442 break; 1443 case ir_binop_interpolate_at_sample: 1444 op = nir_intrinsic_interp_var_at_sample; 1445 break; 1446 default: 1447 unreachable("Invalid interpolation intrinsic"); 1448 } 1449 } else { 1450 /* This case can happen if the vertex shader does not write the 1451 * given varying. In this case, the linker will lower it to a 1452 * global variable. Since interpolating a variable makes no 1453 * sense, we'll just turn it into a load which will probably 1454 * eventually end up as an SSA definition. 1455 */ 1456 assert(this->deref_head->var->data.mode == nir_var_global); 1457 op = nir_intrinsic_load_var; 1458 } 1459 1460 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op); 1461 intrin->num_components = deref->type->vector_elements; 1462 intrin->variables[0] = this->deref_head; 1463 ralloc_steal(intrin, intrin->variables[0]); 1464 1465 if (intrin->intrinsic == nir_intrinsic_interp_var_at_offset || 1466 intrin->intrinsic == nir_intrinsic_interp_var_at_sample) 1467 intrin->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1])); 1468 1469 unsigned bit_size = glsl_get_bit_size(deref->type); 1470 add_instr(&intrin->instr, deref->type->vector_elements, bit_size); 1471 1472 if (swizzle) { 1473 unsigned swiz[4] = { 1474 swizzle->mask.x, swizzle->mask.y, swizzle->mask.z, swizzle->mask.w 1475 }; 1476 1477 result = nir_swizzle(&b, result, swiz, 1478 swizzle->type->vector_elements, false); 1479 } 1480 1481 return; 1482 } 1483 1484 default: 1485 break; 1486 } 1487 1488 nir_ssa_def *srcs[4]; 1489 for (unsigned i = 0; i < ir->num_operands; i++) 1490 srcs[i] = evaluate_rvalue(ir->operands[i]); 1491 1492 glsl_base_type types[4]; 1493 for (unsigned i = 0; i < ir->num_operands; i++) 1494 if (supports_ints) 1495 types[i] = ir->operands[i]->type->base_type; 1496 else 1497 types[i] = GLSL_TYPE_FLOAT; 1498 1499 glsl_base_type out_type; 1500 if (supports_ints) 1501 out_type = ir->type->base_type; 1502 else 1503 out_type = GLSL_TYPE_FLOAT; 1504 1505 switch (ir->operation) { 1506 case ir_unop_bit_not: result = nir_inot(&b, srcs[0]); break; 1507 case ir_unop_logic_not: 1508 result = supports_ints ? nir_inot(&b, srcs[0]) : nir_fnot(&b, srcs[0]); 1509 break; 1510 case ir_unop_neg: 1511 result = type_is_float(types[0]) ? nir_fneg(&b, srcs[0]) 1512 : nir_ineg(&b, srcs[0]); 1513 break; 1514 case ir_unop_abs: 1515 result = type_is_float(types[0]) ? nir_fabs(&b, srcs[0]) 1516 : nir_iabs(&b, srcs[0]); 1517 break; 1518 case ir_unop_saturate: 1519 assert(type_is_float(types[0])); 1520 result = nir_fsat(&b, srcs[0]); 1521 break; 1522 case ir_unop_sign: 1523 result = type_is_float(types[0]) ? nir_fsign(&b, srcs[0]) 1524 : nir_isign(&b, srcs[0]); 1525 break; 1526 case ir_unop_rcp: result = nir_frcp(&b, srcs[0]); break; 1527 case ir_unop_rsq: result = nir_frsq(&b, srcs[0]); break; 1528 case ir_unop_sqrt: result = nir_fsqrt(&b, srcs[0]); break; 1529 case ir_unop_exp: unreachable("ir_unop_exp should have been lowered"); 1530 case ir_unop_log: unreachable("ir_unop_log should have been lowered"); 1531 case ir_unop_exp2: result = nir_fexp2(&b, srcs[0]); break; 1532 case ir_unop_log2: result = nir_flog2(&b, srcs[0]); break; 1533 case ir_unop_i2f: 1534 result = supports_ints ? nir_i2f32(&b, srcs[0]) : nir_fmov(&b, srcs[0]); 1535 break; 1536 case ir_unop_u2f: 1537 result = supports_ints ? nir_u2f32(&b, srcs[0]) : nir_fmov(&b, srcs[0]); 1538 break; 1539 case ir_unop_b2f: 1540 result = supports_ints ? nir_b2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]); 1541 break; 1542 case ir_unop_f2i: 1543 case ir_unop_f2u: 1544 case ir_unop_f2b: 1545 case ir_unop_i2b: 1546 case ir_unop_b2i: 1547 case ir_unop_b2i64: 1548 case ir_unop_d2f: 1549 case ir_unop_f2d: 1550 case ir_unop_d2i: 1551 case ir_unop_d2u: 1552 case ir_unop_d2b: 1553 case ir_unop_i2d: 1554 case ir_unop_u2d: 1555 case ir_unop_i642i: 1556 case ir_unop_i642u: 1557 case ir_unop_i642f: 1558 case ir_unop_i642b: 1559 case ir_unop_i642d: 1560 case ir_unop_u642i: 1561 case ir_unop_u642u: 1562 case ir_unop_u642f: 1563 case ir_unop_u642d: 1564 case ir_unop_i2i64: 1565 case ir_unop_u2i64: 1566 case ir_unop_f2i64: 1567 case ir_unop_d2i64: 1568 case ir_unop_i2u64: 1569 case ir_unop_u2u64: 1570 case ir_unop_f2u64: 1571 case ir_unop_d2u64: 1572 case ir_unop_i2u: 1573 case ir_unop_u2i: 1574 case ir_unop_i642u64: 1575 case ir_unop_u642i64: { 1576 nir_alu_type src_type = nir_get_nir_type_for_glsl_base_type(types[0]); 1577 nir_alu_type dst_type = nir_get_nir_type_for_glsl_base_type(out_type); 1578 result = nir_build_alu(&b, nir_type_conversion_op(src_type, dst_type, 1579 nir_rounding_mode_undef), 1580 srcs[0], NULL, NULL, NULL); 1581 /* b2i and b2f don't have fixed bit-size versions so the builder will 1582 * just assume 32 and we have to fix it up here. 1583 */ 1584 result->bit_size = nir_alu_type_get_type_size(dst_type); 1585 break; 1586 } 1587 1588 case ir_unop_bitcast_i2f: 1589 case ir_unop_bitcast_f2i: 1590 case ir_unop_bitcast_u2f: 1591 case ir_unop_bitcast_f2u: 1592 case ir_unop_bitcast_i642d: 1593 case ir_unop_bitcast_d2i64: 1594 case ir_unop_bitcast_u642d: 1595 case ir_unop_bitcast_d2u64: 1596 case ir_unop_subroutine_to_int: 1597 /* no-op */ 1598 result = nir_imov(&b, srcs[0]); 1599 break; 1600 case ir_unop_trunc: result = nir_ftrunc(&b, srcs[0]); break; 1601 case ir_unop_ceil: result = nir_fceil(&b, srcs[0]); break; 1602 case ir_unop_floor: result = nir_ffloor(&b, srcs[0]); break; 1603 case ir_unop_fract: result = nir_ffract(&b, srcs[0]); break; 1604 case ir_unop_round_even: result = nir_fround_even(&b, srcs[0]); break; 1605 case ir_unop_sin: result = nir_fsin(&b, srcs[0]); break; 1606 case ir_unop_cos: result = nir_fcos(&b, srcs[0]); break; 1607 case ir_unop_dFdx: result = nir_fddx(&b, srcs[0]); break; 1608 case ir_unop_dFdy: result = nir_fddy(&b, srcs[0]); break; 1609 case ir_unop_dFdx_fine: result = nir_fddx_fine(&b, srcs[0]); break; 1610 case ir_unop_dFdy_fine: result = nir_fddy_fine(&b, srcs[0]); break; 1611 case ir_unop_dFdx_coarse: result = nir_fddx_coarse(&b, srcs[0]); break; 1612 case ir_unop_dFdy_coarse: result = nir_fddy_coarse(&b, srcs[0]); break; 1613 case ir_unop_pack_snorm_2x16: 1614 result = nir_pack_snorm_2x16(&b, srcs[0]); 1615 break; 1616 case ir_unop_pack_snorm_4x8: 1617 result = nir_pack_snorm_4x8(&b, srcs[0]); 1618 break; 1619 case ir_unop_pack_unorm_2x16: 1620 result = nir_pack_unorm_2x16(&b, srcs[0]); 1621 break; 1622 case ir_unop_pack_unorm_4x8: 1623 result = nir_pack_unorm_4x8(&b, srcs[0]); 1624 break; 1625 case ir_unop_pack_half_2x16: 1626 result = nir_pack_half_2x16(&b, srcs[0]); 1627 break; 1628 case ir_unop_unpack_snorm_2x16: 1629 result = nir_unpack_snorm_2x16(&b, srcs[0]); 1630 break; 1631 case ir_unop_unpack_snorm_4x8: 1632 result = nir_unpack_snorm_4x8(&b, srcs[0]); 1633 break; 1634 case ir_unop_unpack_unorm_2x16: 1635 result = nir_unpack_unorm_2x16(&b, srcs[0]); 1636 break; 1637 case ir_unop_unpack_unorm_4x8: 1638 result = nir_unpack_unorm_4x8(&b, srcs[0]); 1639 break; 1640 case ir_unop_unpack_half_2x16: 1641 result = nir_unpack_half_2x16(&b, srcs[0]); 1642 break; 1643 case ir_unop_pack_sampler_2x32: 1644 case ir_unop_pack_image_2x32: 1645 case ir_unop_pack_double_2x32: 1646 case ir_unop_pack_int_2x32: 1647 case ir_unop_pack_uint_2x32: 1648 result = nir_pack_64_2x32(&b, srcs[0]); 1649 break; 1650 case ir_unop_unpack_sampler_2x32: 1651 case ir_unop_unpack_image_2x32: 1652 case ir_unop_unpack_double_2x32: 1653 case ir_unop_unpack_int_2x32: 1654 case ir_unop_unpack_uint_2x32: 1655 result = nir_unpack_64_2x32(&b, srcs[0]); 1656 break; 1657 case ir_unop_bitfield_reverse: 1658 result = nir_bitfield_reverse(&b, srcs[0]); 1659 break; 1660 case ir_unop_bit_count: 1661 result = nir_bit_count(&b, srcs[0]); 1662 break; 1663 case ir_unop_find_msb: 1664 switch (types[0]) { 1665 case GLSL_TYPE_UINT: 1666 result = nir_ufind_msb(&b, srcs[0]); 1667 break; 1668 case GLSL_TYPE_INT: 1669 result = nir_ifind_msb(&b, srcs[0]); 1670 break; 1671 default: 1672 unreachable("Invalid type for findMSB()"); 1673 } 1674 break; 1675 case ir_unop_find_lsb: 1676 result = nir_find_lsb(&b, srcs[0]); 1677 break; 1678 1679 case ir_unop_noise: 1680 switch (ir->type->vector_elements) { 1681 case 1: 1682 switch (ir->operands[0]->type->vector_elements) { 1683 case 1: result = nir_fnoise1_1(&b, srcs[0]); break; 1684 case 2: result = nir_fnoise1_2(&b, srcs[0]); break; 1685 case 3: result = nir_fnoise1_3(&b, srcs[0]); break; 1686 case 4: result = nir_fnoise1_4(&b, srcs[0]); break; 1687 default: unreachable("not reached"); 1688 } 1689 break; 1690 case 2: 1691 switch (ir->operands[0]->type->vector_elements) { 1692 case 1: result = nir_fnoise2_1(&b, srcs[0]); break; 1693 case 2: result = nir_fnoise2_2(&b, srcs[0]); break; 1694 case 3: result = nir_fnoise2_3(&b, srcs[0]); break; 1695 case 4: result = nir_fnoise2_4(&b, srcs[0]); break; 1696 default: unreachable("not reached"); 1697 } 1698 break; 1699 case 3: 1700 switch (ir->operands[0]->type->vector_elements) { 1701 case 1: result = nir_fnoise3_1(&b, srcs[0]); break; 1702 case 2: result = nir_fnoise3_2(&b, srcs[0]); break; 1703 case 3: result = nir_fnoise3_3(&b, srcs[0]); break; 1704 case 4: result = nir_fnoise3_4(&b, srcs[0]); break; 1705 default: unreachable("not reached"); 1706 } 1707 break; 1708 case 4: 1709 switch (ir->operands[0]->type->vector_elements) { 1710 case 1: result = nir_fnoise4_1(&b, srcs[0]); break; 1711 case 2: result = nir_fnoise4_2(&b, srcs[0]); break; 1712 case 3: result = nir_fnoise4_3(&b, srcs[0]); break; 1713 case 4: result = nir_fnoise4_4(&b, srcs[0]); break; 1714 default: unreachable("not reached"); 1715 } 1716 break; 1717 default: 1718 unreachable("not reached"); 1719 } 1720 break; 1721 case ir_unop_get_buffer_size: { 1722 nir_intrinsic_instr *load = nir_intrinsic_instr_create( 1723 this->shader, 1724 nir_intrinsic_get_buffer_size); 1725 load->num_components = ir->type->vector_elements; 1726 load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0])); 1727 unsigned bit_size = glsl_get_bit_size(ir->type); 1728 add_instr(&load->instr, ir->type->vector_elements, bit_size); 1729 return; 1730 } 1731 1732 case ir_binop_add: 1733 result = type_is_float(out_type) ? nir_fadd(&b, srcs[0], srcs[1]) 1734 : nir_iadd(&b, srcs[0], srcs[1]); 1735 break; 1736 case ir_binop_sub: 1737 result = type_is_float(out_type) ? nir_fsub(&b, srcs[0], srcs[1]) 1738 : nir_isub(&b, srcs[0], srcs[1]); 1739 break; 1740 case ir_binop_mul: 1741 result = type_is_float(out_type) ? nir_fmul(&b, srcs[0], srcs[1]) 1742 : nir_imul(&b, srcs[0], srcs[1]); 1743 break; 1744 case ir_binop_div: 1745 if (type_is_float(out_type)) 1746 result = nir_fdiv(&b, srcs[0], srcs[1]); 1747 else if (type_is_signed(out_type)) 1748 result = nir_idiv(&b, srcs[0], srcs[1]); 1749 else 1750 result = nir_udiv(&b, srcs[0], srcs[1]); 1751 break; 1752 case ir_binop_mod: 1753 result = type_is_float(out_type) ? nir_fmod(&b, srcs[0], srcs[1]) 1754 : nir_umod(&b, srcs[0], srcs[1]); 1755 break; 1756 case ir_binop_min: 1757 if (type_is_float(out_type)) 1758 result = nir_fmin(&b, srcs[0], srcs[1]); 1759 else if (type_is_signed(out_type)) 1760 result = nir_imin(&b, srcs[0], srcs[1]); 1761 else 1762 result = nir_umin(&b, srcs[0], srcs[1]); 1763 break; 1764 case ir_binop_max: 1765 if (type_is_float(out_type)) 1766 result = nir_fmax(&b, srcs[0], srcs[1]); 1767 else if (type_is_signed(out_type)) 1768 result = nir_imax(&b, srcs[0], srcs[1]); 1769 else 1770 result = nir_umax(&b, srcs[0], srcs[1]); 1771 break; 1772 case ir_binop_pow: result = nir_fpow(&b, srcs[0], srcs[1]); break; 1773 case ir_binop_bit_and: result = nir_iand(&b, srcs[0], srcs[1]); break; 1774 case ir_binop_bit_or: result = nir_ior(&b, srcs[0], srcs[1]); break; 1775 case ir_binop_bit_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break; 1776 case ir_binop_logic_and: 1777 result = supports_ints ? nir_iand(&b, srcs[0], srcs[1]) 1778 : nir_fand(&b, srcs[0], srcs[1]); 1779 break; 1780 case ir_binop_logic_or: 1781 result = supports_ints ? nir_ior(&b, srcs[0], srcs[1]) 1782 : nir_for(&b, srcs[0], srcs[1]); 1783 break; 1784 case ir_binop_logic_xor: 1785 result = supports_ints ? nir_ixor(&b, srcs[0], srcs[1]) 1786 : nir_fxor(&b, srcs[0], srcs[1]); 1787 break; 1788 case ir_binop_lshift: result = nir_ishl(&b, srcs[0], srcs[1]); break; 1789 case ir_binop_rshift: 1790 result = (type_is_signed(out_type)) ? nir_ishr(&b, srcs[0], srcs[1]) 1791 : nir_ushr(&b, srcs[0], srcs[1]); 1792 break; 1793 case ir_binop_imul_high: 1794 result = (out_type == GLSL_TYPE_INT) ? nir_imul_high(&b, srcs[0], srcs[1]) 1795 : nir_umul_high(&b, srcs[0], srcs[1]); 1796 break; 1797 case ir_binop_carry: result = nir_uadd_carry(&b, srcs[0], srcs[1]); break; 1798 case ir_binop_borrow: result = nir_usub_borrow(&b, srcs[0], srcs[1]); break; 1799 case ir_binop_less: 1800 if (supports_ints) { 1801 if (type_is_float(types[0])) 1802 result = nir_flt(&b, srcs[0], srcs[1]); 1803 else if (type_is_signed(types[0])) 1804 result = nir_ilt(&b, srcs[0], srcs[1]); 1805 else 1806 result = nir_ult(&b, srcs[0], srcs[1]); 1807 } else { 1808 result = nir_slt(&b, srcs[0], srcs[1]); 1809 } 1810 break; 1811 case ir_binop_gequal: 1812 if (supports_ints) { 1813 if (type_is_float(types[0])) 1814 result = nir_fge(&b, srcs[0], srcs[1]); 1815 else if (type_is_signed(types[0])) 1816 result = nir_ige(&b, srcs[0], srcs[1]); 1817 else 1818 result = nir_uge(&b, srcs[0], srcs[1]); 1819 } else { 1820 result = nir_slt(&b, srcs[0], srcs[1]); 1821 } 1822 break; 1823 case ir_binop_equal: 1824 if (supports_ints) { 1825 if (type_is_float(types[0])) 1826 result = nir_feq(&b, srcs[0], srcs[1]); 1827 else 1828 result = nir_ieq(&b, srcs[0], srcs[1]); 1829 } else { 1830 result = nir_seq(&b, srcs[0], srcs[1]); 1831 } 1832 break; 1833 case ir_binop_nequal: 1834 if (supports_ints) { 1835 if (type_is_float(types[0])) 1836 result = nir_fne(&b, srcs[0], srcs[1]); 1837 else 1838 result = nir_ine(&b, srcs[0], srcs[1]); 1839 } else { 1840 result = nir_sne(&b, srcs[0], srcs[1]); 1841 } 1842 break; 1843 case ir_binop_all_equal: 1844 if (supports_ints) { 1845 if (type_is_float(types[0])) { 1846 switch (ir->operands[0]->type->vector_elements) { 1847 case 1: result = nir_feq(&b, srcs[0], srcs[1]); break; 1848 case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break; 1849 case 3: result = nir_ball_fequal3(&b, srcs[0], srcs[1]); break; 1850 case 4: result = nir_ball_fequal4(&b, srcs[0], srcs[1]); break; 1851 default: 1852 unreachable("not reached"); 1853 } 1854 } else { 1855 switch (ir->operands[0]->type->vector_elements) { 1856 case 1: result = nir_ieq(&b, srcs[0], srcs[1]); break; 1857 case 2: result = nir_ball_iequal2(&b, srcs[0], srcs[1]); break; 1858 case 3: result = nir_ball_iequal3(&b, srcs[0], srcs[1]); break; 1859 case 4: result = nir_ball_iequal4(&b, srcs[0], srcs[1]); break; 1860 default: 1861 unreachable("not reached"); 1862 } 1863 } 1864 } else { 1865 switch (ir->operands[0]->type->vector_elements) { 1866 case 1: result = nir_seq(&b, srcs[0], srcs[1]); break; 1867 case 2: result = nir_fall_equal2(&b, srcs[0], srcs[1]); break; 1868 case 3: result = nir_fall_equal3(&b, srcs[0], srcs[1]); break; 1869 case 4: result = nir_fall_equal4(&b, srcs[0], srcs[1]); break; 1870 default: 1871 unreachable("not reached"); 1872 } 1873 } 1874 break; 1875 case ir_binop_any_nequal: 1876 if (supports_ints) { 1877 if (type_is_float(types[0])) { 1878 switch (ir->operands[0]->type->vector_elements) { 1879 case 1: result = nir_fne(&b, srcs[0], srcs[1]); break; 1880 case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break; 1881 case 3: result = nir_bany_fnequal3(&b, srcs[0], srcs[1]); break; 1882 case 4: result = nir_bany_fnequal4(&b, srcs[0], srcs[1]); break; 1883 default: 1884 unreachable("not reached"); 1885 } 1886 } else { 1887 switch (ir->operands[0]->type->vector_elements) { 1888 case 1: result = nir_ine(&b, srcs[0], srcs[1]); break; 1889 case 2: result = nir_bany_inequal2(&b, srcs[0], srcs[1]); break; 1890 case 3: result = nir_bany_inequal3(&b, srcs[0], srcs[1]); break; 1891 case 4: result = nir_bany_inequal4(&b, srcs[0], srcs[1]); break; 1892 default: 1893 unreachable("not reached"); 1894 } 1895 } 1896 } else { 1897 switch (ir->operands[0]->type->vector_elements) { 1898 case 1: result = nir_sne(&b, srcs[0], srcs[1]); break; 1899 case 2: result = nir_fany_nequal2(&b, srcs[0], srcs[1]); break; 1900 case 3: result = nir_fany_nequal3(&b, srcs[0], srcs[1]); break; 1901 case 4: result = nir_fany_nequal4(&b, srcs[0], srcs[1]); break; 1902 default: 1903 unreachable("not reached"); 1904 } 1905 } 1906 break; 1907 case ir_binop_dot: 1908 switch (ir->operands[0]->type->vector_elements) { 1909 case 2: result = nir_fdot2(&b, srcs[0], srcs[1]); break; 1910 case 3: result = nir_fdot3(&b, srcs[0], srcs[1]); break; 1911 case 4: result = nir_fdot4(&b, srcs[0], srcs[1]); break; 1912 default: 1913 unreachable("not reached"); 1914 } 1915 break; 1916 1917 case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break; 1918 case ir_triop_fma: 1919 result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]); 1920 break; 1921 case ir_triop_lrp: 1922 result = nir_flrp(&b, srcs[0], srcs[1], srcs[2]); 1923 break; 1924 case ir_triop_csel: 1925 if (supports_ints) 1926 result = nir_bcsel(&b, srcs[0], srcs[1], srcs[2]); 1927 else 1928 result = nir_fcsel(&b, srcs[0], srcs[1], srcs[2]); 1929 break; 1930 case ir_triop_bitfield_extract: 1931 result = (out_type == GLSL_TYPE_INT) ? 1932 nir_ibitfield_extract(&b, srcs[0], srcs[1], srcs[2]) : 1933 nir_ubitfield_extract(&b, srcs[0], srcs[1], srcs[2]); 1934 break; 1935 case ir_quadop_bitfield_insert: 1936 result = nir_bitfield_insert(&b, srcs[0], srcs[1], srcs[2], srcs[3]); 1937 break; 1938 case ir_quadop_vector: 1939 result = nir_vec(&b, srcs, ir->type->vector_elements); 1940 break; 1941 1942 default: 1943 unreachable("not reached"); 1944 } 1945 } 1946 1947 void 1948 nir_visitor::visit(ir_swizzle *ir) 1949 { 1950 unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w }; 1951 result = nir_swizzle(&b, evaluate_rvalue(ir->val), swizzle, 1952 ir->type->vector_elements, !supports_ints); 1953 } 1954 1955 void 1956 nir_visitor::visit(ir_texture *ir) 1957 { 1958 unsigned num_srcs; 1959 nir_texop op; 1960 switch (ir->op) { 1961 case ir_tex: 1962 op = nir_texop_tex; 1963 num_srcs = 1; /* coordinate */ 1964 break; 1965 1966 case ir_txb: 1967 case ir_txl: 1968 op = (ir->op == ir_txb) ? nir_texop_txb : nir_texop_txl; 1969 num_srcs = 2; /* coordinate, bias/lod */ 1970 break; 1971 1972 case ir_txd: 1973 op = nir_texop_txd; /* coordinate, dPdx, dPdy */ 1974 num_srcs = 3; 1975 break; 1976 1977 case ir_txf: 1978 op = nir_texop_txf; 1979 if (ir->lod_info.lod != NULL) 1980 num_srcs = 2; /* coordinate, lod */ 1981 else 1982 num_srcs = 1; /* coordinate */ 1983 break; 1984 1985 case ir_txf_ms: 1986 op = nir_texop_txf_ms; 1987 num_srcs = 2; /* coordinate, sample_index */ 1988 break; 1989 1990 case ir_txs: 1991 op = nir_texop_txs; 1992 if (ir->lod_info.lod != NULL) 1993 num_srcs = 1; /* lod */ 1994 else 1995 num_srcs = 0; 1996 break; 1997 1998 case ir_lod: 1999 op = nir_texop_lod; 2000 num_srcs = 1; /* coordinate */ 2001 break; 2002 2003 case ir_tg4: 2004 op = nir_texop_tg4; 2005 num_srcs = 1; /* coordinate */ 2006 break; 2007 2008 case ir_query_levels: 2009 op = nir_texop_query_levels; 2010 num_srcs = 0; 2011 break; 2012 2013 case ir_texture_samples: 2014 op = nir_texop_texture_samples; 2015 num_srcs = 0; 2016 break; 2017 2018 case ir_samples_identical: 2019 op = nir_texop_samples_identical; 2020 num_srcs = 1; /* coordinate */ 2021 break; 2022 2023 default: 2024 unreachable("not reached"); 2025 } 2026 2027 if (ir->projector != NULL) 2028 num_srcs++; 2029 if (ir->shadow_comparator != NULL) 2030 num_srcs++; 2031 if (ir->offset != NULL) 2032 num_srcs++; 2033 2034 nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs); 2035 2036 instr->op = op; 2037 instr->sampler_dim = 2038 (glsl_sampler_dim) ir->sampler->type->sampler_dimensionality; 2039 instr->is_array = ir->sampler->type->sampler_array; 2040 instr->is_shadow = ir->sampler->type->sampler_shadow; 2041 if (instr->is_shadow) 2042 instr->is_new_style_shadow = (ir->type->vector_elements == 1); 2043 switch (ir->type->base_type) { 2044 case GLSL_TYPE_FLOAT: 2045 instr->dest_type = nir_type_float; 2046 break; 2047 case GLSL_TYPE_INT: 2048 instr->dest_type = nir_type_int; 2049 break; 2050 case GLSL_TYPE_BOOL: 2051 case GLSL_TYPE_UINT: 2052 instr->dest_type = nir_type_uint; 2053 break; 2054 default: 2055 unreachable("not reached"); 2056 } 2057 2058 instr->texture = evaluate_deref(&instr->instr, ir->sampler); 2059 2060 unsigned src_number = 0; 2061 2062 if (ir->coordinate != NULL) { 2063 instr->coord_components = ir->coordinate->type->vector_elements; 2064 instr->src[src_number].src = 2065 nir_src_for_ssa(evaluate_rvalue(ir->coordinate)); 2066 instr->src[src_number].src_type = nir_tex_src_coord; 2067 src_number++; 2068 } 2069 2070 if (ir->projector != NULL) { 2071 instr->src[src_number].src = 2072 nir_src_for_ssa(evaluate_rvalue(ir->projector)); 2073 instr->src[src_number].src_type = nir_tex_src_projector; 2074 src_number++; 2075 } 2076 2077 if (ir->shadow_comparator != NULL) { 2078 instr->src[src_number].src = 2079 nir_src_for_ssa(evaluate_rvalue(ir->shadow_comparator)); 2080 instr->src[src_number].src_type = nir_tex_src_comparator; 2081 src_number++; 2082 } 2083 2084 if (ir->offset != NULL) { 2085 /* we don't support multiple offsets yet */ 2086 assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar()); 2087 2088 instr->src[src_number].src = 2089 nir_src_for_ssa(evaluate_rvalue(ir->offset)); 2090 instr->src[src_number].src_type = nir_tex_src_offset; 2091 src_number++; 2092 } 2093 2094 switch (ir->op) { 2095 case ir_txb: 2096 instr->src[src_number].src = 2097 nir_src_for_ssa(evaluate_rvalue(ir->lod_info.bias)); 2098 instr->src[src_number].src_type = nir_tex_src_bias; 2099 src_number++; 2100 break; 2101 2102 case ir_txl: 2103 case ir_txf: 2104 case ir_txs: 2105 if (ir->lod_info.lod != NULL) { 2106 instr->src[src_number].src = 2107 nir_src_for_ssa(evaluate_rvalue(ir->lod_info.lod)); 2108 instr->src[src_number].src_type = nir_tex_src_lod; 2109 src_number++; 2110 } 2111 break; 2112 2113 case ir_txd: 2114 instr->src[src_number].src = 2115 nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdx)); 2116 instr->src[src_number].src_type = nir_tex_src_ddx; 2117 src_number++; 2118 instr->src[src_number].src = 2119 nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdy)); 2120 instr->src[src_number].src_type = nir_tex_src_ddy; 2121 src_number++; 2122 break; 2123 2124 case ir_txf_ms: 2125 instr->src[src_number].src = 2126 nir_src_for_ssa(evaluate_rvalue(ir->lod_info.sample_index)); 2127 instr->src[src_number].src_type = nir_tex_src_ms_index; 2128 src_number++; 2129 break; 2130 2131 case ir_tg4: 2132 instr->component = ir->lod_info.component->as_constant()->value.u[0]; 2133 break; 2134 2135 default: 2136 break; 2137 } 2138 2139 assert(src_number == num_srcs); 2140 2141 unsigned bit_size = glsl_get_bit_size(ir->type); 2142 add_instr(&instr->instr, nir_tex_instr_dest_size(instr), bit_size); 2143 } 2144 2145 void 2146 nir_visitor::visit(ir_constant *ir) 2147 { 2148 /* 2149 * We don't know if this variable is an array or struct that gets 2150 * dereferenced, so do the safe thing an make it a variable with a 2151 * constant initializer and return a dereference. 2152 */ 2153 2154 nir_variable *var = 2155 nir_local_variable_create(this->impl, ir->type, "const_temp"); 2156 var->data.read_only = true; 2157 var->constant_initializer = constant_copy(ir, var); 2158 2159 this->deref_head = nir_deref_var_create(this->shader, var); 2160 this->deref_tail = &this->deref_head->deref; 2161 } 2162 2163 void 2164 nir_visitor::visit(ir_dereference_variable *ir) 2165 { 2166 struct hash_entry *entry = 2167 _mesa_hash_table_search(this->var_table, ir->var); 2168 assert(entry); 2169 nir_variable *var = (nir_variable *) entry->data; 2170 2171 nir_deref_var *deref = nir_deref_var_create(this->shader, var); 2172 this->deref_head = deref; 2173 this->deref_tail = &deref->deref; 2174 } 2175 2176 void 2177 nir_visitor::visit(ir_dereference_record *ir) 2178 { 2179 ir->record->accept(this); 2180 2181 int field_index = ir->field_idx; 2182 assert(field_index >= 0); 2183 2184 nir_deref_struct *deref = nir_deref_struct_create(this->deref_tail, field_index); 2185 deref->deref.type = ir->type; 2186 this->deref_tail->child = &deref->deref; 2187 this->deref_tail = &deref->deref; 2188 } 2189 2190 void 2191 nir_visitor::visit(ir_dereference_array *ir) 2192 { 2193 nir_deref_array *deref = nir_deref_array_create(this->shader); 2194 deref->deref.type = ir->type; 2195 2196 ir_constant *const_index = ir->array_index->as_constant(); 2197 if (const_index != NULL) { 2198 deref->deref_array_type = nir_deref_array_type_direct; 2199 deref->base_offset = const_index->value.u[0]; 2200 } else { 2201 deref->deref_array_type = nir_deref_array_type_indirect; 2202 deref->indirect = 2203 nir_src_for_ssa(evaluate_rvalue(ir->array_index)); 2204 } 2205 2206 ir->array->accept(this); 2207 2208 this->deref_tail->child = &deref->deref; 2209 ralloc_steal(this->deref_tail, deref); 2210 this->deref_tail = &deref->deref; 2211 } 2212 2213 void 2214 nir_visitor::visit(ir_barrier *) 2215 { 2216 nir_intrinsic_instr *instr = 2217 nir_intrinsic_instr_create(this->shader, nir_intrinsic_barrier); 2218 nir_builder_instr_insert(&b, &instr->instr); 2219 } 2220