1 /* 2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. 3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved. 4 * Copyright 2010 Intel Corporation 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 * DEALINGS IN THE SOFTWARE. 24 */ 25 26 /** 27 * \file ir_to_mesa.cpp 28 * 29 * Translate GLSL IR to Mesa's gl_program representation. 30 */ 31 32 #include <stdio.h> 33 #include "main/macros.h" 34 #include "main/mtypes.h" 35 #include "main/shaderapi.h" 36 #include "main/shaderobj.h" 37 #include "main/uniforms.h" 38 #include "compiler/glsl/ast.h" 39 #include "compiler/glsl/ir.h" 40 #include "compiler/glsl/ir_expression_flattening.h" 41 #include "compiler/glsl/ir_visitor.h" 42 #include "compiler/glsl/ir_optimization.h" 43 #include "compiler/glsl/ir_uniform.h" 44 #include "compiler/glsl/glsl_parser_extras.h" 45 #include "compiler/glsl_types.h" 46 #include "compiler/glsl/linker.h" 47 #include "compiler/glsl/program.h" 48 #include "compiler/glsl/shader_cache.h" 49 #include "compiler/glsl/string_to_uint_map.h" 50 #include "program/prog_instruction.h" 51 #include "program/prog_optimize.h" 52 #include "program/prog_print.h" 53 #include "program/program.h" 54 #include "program/prog_parameter.h" 55 56 57 static int swizzle_for_size(int size); 58 59 namespace { 60 61 class src_reg; 62 class dst_reg; 63 64 /** 65 * This struct is a corresponding struct to Mesa prog_src_register, with 66 * wider fields. 67 */ 68 class src_reg { 69 public: 70 src_reg(gl_register_file file, int index, const glsl_type *type) 71 { 72 this->file = file; 73 this->index = index; 74 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) 75 this->swizzle = swizzle_for_size(type->vector_elements); 76 else 77 this->swizzle = SWIZZLE_XYZW; 78 this->negate = 0; 79 this->reladdr = NULL; 80 } 81 82 src_reg() 83 { 84 this->file = PROGRAM_UNDEFINED; 85 this->index = 0; 86 this->swizzle = 0; 87 this->negate = 0; 88 this->reladdr = NULL; 89 } 90 91 explicit src_reg(dst_reg reg); 92 93 gl_register_file file; /**< PROGRAM_* from Mesa */ 94 int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ 95 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ 96 int negate; /**< NEGATE_XYZW mask from mesa */ 97 /** Register index should be offset by the integer in this reg. */ 98 src_reg *reladdr; 99 }; 100 101 class dst_reg { 102 public: 103 dst_reg(gl_register_file file, int writemask) 104 { 105 this->file = file; 106 this->index = 0; 107 this->writemask = writemask; 108 this->reladdr = NULL; 109 } 110 111 dst_reg() 112 { 113 this->file = PROGRAM_UNDEFINED; 114 this->index = 0; 115 this->writemask = 0; 116 this->reladdr = NULL; 117 } 118 119 explicit dst_reg(src_reg reg); 120 121 gl_register_file file; /**< PROGRAM_* from Mesa */ 122 int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ 123 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ 124 /** Register index should be offset by the integer in this reg. */ 125 src_reg *reladdr; 126 }; 127 128 } /* anonymous namespace */ 129 130 src_reg::src_reg(dst_reg reg) 131 { 132 this->file = reg.file; 133 this->index = reg.index; 134 this->swizzle = SWIZZLE_XYZW; 135 this->negate = 0; 136 this->reladdr = reg.reladdr; 137 } 138 139 dst_reg::dst_reg(src_reg reg) 140 { 141 this->file = reg.file; 142 this->index = reg.index; 143 this->writemask = WRITEMASK_XYZW; 144 this->reladdr = reg.reladdr; 145 } 146 147 namespace { 148 149 class ir_to_mesa_instruction : public exec_node { 150 public: 151 DECLARE_RALLOC_CXX_OPERATORS(ir_to_mesa_instruction) 152 153 enum prog_opcode op; 154 dst_reg dst; 155 src_reg src[3]; 156 /** Pointer to the ir source this tree came from for debugging */ 157 ir_instruction *ir; 158 bool saturate; 159 int sampler; /**< sampler index */ 160 int tex_target; /**< One of TEXTURE_*_INDEX */ 161 GLboolean tex_shadow; 162 }; 163 164 class variable_storage : public exec_node { 165 public: 166 variable_storage(ir_variable *var, gl_register_file file, int index) 167 : file(file), index(index), var(var) 168 { 169 /* empty */ 170 } 171 172 gl_register_file file; 173 int index; 174 ir_variable *var; /* variable that maps to this, if any */ 175 }; 176 177 class function_entry : public exec_node { 178 public: 179 ir_function_signature *sig; 180 181 /** 182 * identifier of this function signature used by the program. 183 * 184 * At the point that Mesa instructions for function calls are 185 * generated, we don't know the address of the first instruction of 186 * the function body. So we make the BranchTarget that is called a 187 * small integer and rewrite them during set_branchtargets(). 188 */ 189 int sig_id; 190 191 /** 192 * Pointer to first instruction of the function body. 193 * 194 * Set during function body emits after main() is processed. 195 */ 196 ir_to_mesa_instruction *bgn_inst; 197 198 /** 199 * Index of the first instruction of the function body in actual 200 * Mesa IR. 201 * 202 * Set after convertion from ir_to_mesa_instruction to prog_instruction. 203 */ 204 int inst; 205 206 /** Storage for the return value. */ 207 src_reg return_reg; 208 }; 209 210 class ir_to_mesa_visitor : public ir_visitor { 211 public: 212 ir_to_mesa_visitor(); 213 ~ir_to_mesa_visitor(); 214 215 function_entry *current_function; 216 217 struct gl_context *ctx; 218 struct gl_program *prog; 219 struct gl_shader_program *shader_program; 220 struct gl_shader_compiler_options *options; 221 222 int next_temp; 223 224 variable_storage *find_variable_storage(const ir_variable *var); 225 226 src_reg get_temp(const glsl_type *type); 227 void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr); 228 229 src_reg src_reg_for_float(float val); 230 231 /** 232 * \name Visit methods 233 * 234 * As typical for the visitor pattern, there must be one \c visit method for 235 * each concrete subclass of \c ir_instruction. Virtual base classes within 236 * the hierarchy should not have \c visit methods. 237 */ 238 /*@{*/ 239 virtual void visit(ir_variable *); 240 virtual void visit(ir_loop *); 241 virtual void visit(ir_loop_jump *); 242 virtual void visit(ir_function_signature *); 243 virtual void visit(ir_function *); 244 virtual void visit(ir_expression *); 245 virtual void visit(ir_swizzle *); 246 virtual void visit(ir_dereference_variable *); 247 virtual void visit(ir_dereference_array *); 248 virtual void visit(ir_dereference_record *); 249 virtual void visit(ir_assignment *); 250 virtual void visit(ir_constant *); 251 virtual void visit(ir_call *); 252 virtual void visit(ir_return *); 253 virtual void visit(ir_discard *); 254 virtual void visit(ir_texture *); 255 virtual void visit(ir_if *); 256 virtual void visit(ir_emit_vertex *); 257 virtual void visit(ir_end_primitive *); 258 virtual void visit(ir_barrier *); 259 /*@}*/ 260 261 src_reg result; 262 263 /** List of variable_storage */ 264 exec_list variables; 265 266 /** List of function_entry */ 267 exec_list function_signatures; 268 int next_signature_id; 269 270 /** List of ir_to_mesa_instruction */ 271 exec_list instructions; 272 273 ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op); 274 275 ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, 276 dst_reg dst, src_reg src0); 277 278 ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, 279 dst_reg dst, src_reg src0, src_reg src1); 280 281 ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, 282 dst_reg dst, 283 src_reg src0, src_reg src1, src_reg src2); 284 285 /** 286 * Emit the correct dot-product instruction for the type of arguments 287 */ 288 ir_to_mesa_instruction * emit_dp(ir_instruction *ir, 289 dst_reg dst, 290 src_reg src0, 291 src_reg src1, 292 unsigned elements); 293 294 void emit_scalar(ir_instruction *ir, enum prog_opcode op, 295 dst_reg dst, src_reg src0); 296 297 void emit_scalar(ir_instruction *ir, enum prog_opcode op, 298 dst_reg dst, src_reg src0, src_reg src1); 299 300 bool try_emit_mad(ir_expression *ir, 301 int mul_operand); 302 bool try_emit_mad_for_and_not(ir_expression *ir, 303 int mul_operand); 304 305 void emit_swz(ir_expression *ir); 306 307 void emit_equality_comparison(ir_expression *ir, enum prog_opcode op, 308 dst_reg dst, 309 const src_reg &src0, const src_reg &src1); 310 311 inline void emit_sne(ir_expression *ir, dst_reg dst, 312 const src_reg &src0, const src_reg &src1) 313 { 314 emit_equality_comparison(ir, OPCODE_SLT, dst, src0, src1); 315 } 316 317 inline void emit_seq(ir_expression *ir, dst_reg dst, 318 const src_reg &src0, const src_reg &src1) 319 { 320 emit_equality_comparison(ir, OPCODE_SGE, dst, src0, src1); 321 } 322 323 bool process_move_condition(ir_rvalue *ir); 324 325 void copy_propagate(void); 326 327 void *mem_ctx; 328 }; 329 330 } /* anonymous namespace */ 331 332 static src_reg undef_src = src_reg(PROGRAM_UNDEFINED, 0, NULL); 333 334 static dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP); 335 336 static dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X); 337 338 static int 339 swizzle_for_size(int size) 340 { 341 static const int size_swizzles[4] = { 342 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), 343 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), 344 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), 345 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), 346 }; 347 348 assert((size >= 1) && (size <= 4)); 349 return size_swizzles[size - 1]; 350 } 351 352 ir_to_mesa_instruction * 353 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, 354 dst_reg dst, 355 src_reg src0, src_reg src1, src_reg src2) 356 { 357 ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction(); 358 int num_reladdr = 0; 359 360 /* If we have to do relative addressing, we want to load the ARL 361 * reg directly for one of the regs, and preload the other reladdr 362 * sources into temps. 363 */ 364 num_reladdr += dst.reladdr != NULL; 365 num_reladdr += src0.reladdr != NULL; 366 num_reladdr += src1.reladdr != NULL; 367 num_reladdr += src2.reladdr != NULL; 368 369 reladdr_to_temp(ir, &src2, &num_reladdr); 370 reladdr_to_temp(ir, &src1, &num_reladdr); 371 reladdr_to_temp(ir, &src0, &num_reladdr); 372 373 if (dst.reladdr) { 374 emit(ir, OPCODE_ARL, address_reg, *dst.reladdr); 375 num_reladdr--; 376 } 377 assert(num_reladdr == 0); 378 379 inst->op = op; 380 inst->dst = dst; 381 inst->src[0] = src0; 382 inst->src[1] = src1; 383 inst->src[2] = src2; 384 inst->ir = ir; 385 386 this->instructions.push_tail(inst); 387 388 return inst; 389 } 390 391 392 ir_to_mesa_instruction * 393 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, 394 dst_reg dst, src_reg src0, src_reg src1) 395 { 396 return emit(ir, op, dst, src0, src1, undef_src); 397 } 398 399 ir_to_mesa_instruction * 400 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, 401 dst_reg dst, src_reg src0) 402 { 403 assert(dst.writemask != 0); 404 return emit(ir, op, dst, src0, undef_src, undef_src); 405 } 406 407 ir_to_mesa_instruction * 408 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op) 409 { 410 return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); 411 } 412 413 ir_to_mesa_instruction * 414 ir_to_mesa_visitor::emit_dp(ir_instruction *ir, 415 dst_reg dst, src_reg src0, src_reg src1, 416 unsigned elements) 417 { 418 static const enum prog_opcode dot_opcodes[] = { 419 OPCODE_DP2, OPCODE_DP3, OPCODE_DP4 420 }; 421 422 return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); 423 } 424 425 /** 426 * Emits Mesa scalar opcodes to produce unique answers across channels. 427 * 428 * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X 429 * channel determines the result across all channels. So to do a vec4 430 * of this operation, we want to emit a scalar per source channel used 431 * to produce dest channels. 432 */ 433 void 434 ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, 435 dst_reg dst, 436 src_reg orig_src0, src_reg orig_src1) 437 { 438 int i, j; 439 int done_mask = ~dst.writemask; 440 441 /* Mesa RCP is a scalar operation splatting results to all channels, 442 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our 443 * dst channels. 444 */ 445 for (i = 0; i < 4; i++) { 446 GLuint this_mask = (1 << i); 447 ir_to_mesa_instruction *inst; 448 src_reg src0 = orig_src0; 449 src_reg src1 = orig_src1; 450 451 if (done_mask & this_mask) 452 continue; 453 454 GLuint src0_swiz = GET_SWZ(src0.swizzle, i); 455 GLuint src1_swiz = GET_SWZ(src1.swizzle, i); 456 for (j = i + 1; j < 4; j++) { 457 /* If there is another enabled component in the destination that is 458 * derived from the same inputs, generate its value on this pass as 459 * well. 460 */ 461 if (!(done_mask & (1 << j)) && 462 GET_SWZ(src0.swizzle, j) == src0_swiz && 463 GET_SWZ(src1.swizzle, j) == src1_swiz) { 464 this_mask |= (1 << j); 465 } 466 } 467 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 468 src0_swiz, src0_swiz); 469 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, 470 src1_swiz, src1_swiz); 471 472 inst = emit(ir, op, dst, src0, src1); 473 inst->dst.writemask = this_mask; 474 done_mask |= this_mask; 475 } 476 } 477 478 void 479 ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, 480 dst_reg dst, src_reg src0) 481 { 482 src_reg undef = undef_src; 483 484 undef.swizzle = SWIZZLE_XXXX; 485 486 emit_scalar(ir, op, dst, src0, undef); 487 } 488 489 src_reg 490 ir_to_mesa_visitor::src_reg_for_float(float val) 491 { 492 src_reg src(PROGRAM_CONSTANT, -1, NULL); 493 494 src.index = _mesa_add_unnamed_constant(this->prog->Parameters, 495 (const gl_constant_value *)&val, 1, &src.swizzle); 496 497 return src; 498 } 499 500 static int 501 storage_type_size(const struct glsl_type *type, bool bindless) 502 { 503 unsigned int i; 504 int size; 505 506 switch (type->base_type) { 507 case GLSL_TYPE_UINT: 508 case GLSL_TYPE_INT: 509 case GLSL_TYPE_UINT16: 510 case GLSL_TYPE_INT16: 511 case GLSL_TYPE_FLOAT: 512 case GLSL_TYPE_FLOAT16: 513 case GLSL_TYPE_BOOL: 514 if (type->is_matrix()) { 515 return type->matrix_columns; 516 } else { 517 /* Regardless of size of vector, it gets a vec4. This is bad 518 * packing for things like floats, but otherwise arrays become a 519 * mess. Hopefully a later pass over the code can pack scalars 520 * down if appropriate. 521 */ 522 return 1; 523 } 524 break; 525 case GLSL_TYPE_DOUBLE: 526 if (type->is_matrix()) { 527 if (type->vector_elements > 2) 528 return type->matrix_columns * 2; 529 else 530 return type->matrix_columns; 531 } else { 532 if (type->vector_elements > 2) 533 return 2; 534 else 535 return 1; 536 } 537 break; 538 case GLSL_TYPE_UINT64: 539 case GLSL_TYPE_INT64: 540 if (type->vector_elements > 2) 541 return 2; 542 else 543 return 1; 544 case GLSL_TYPE_ARRAY: 545 assert(type->length > 0); 546 return storage_type_size(type->fields.array, bindless) * type->length; 547 case GLSL_TYPE_STRUCT: 548 size = 0; 549 for (i = 0; i < type->length; i++) { 550 size += storage_type_size(type->fields.structure[i].type, bindless); 551 } 552 return size; 553 case GLSL_TYPE_SAMPLER: 554 case GLSL_TYPE_IMAGE: 555 if (!bindless) 556 return 0; 557 /* fall through */ 558 case GLSL_TYPE_SUBROUTINE: 559 return 1; 560 case GLSL_TYPE_ATOMIC_UINT: 561 case GLSL_TYPE_VOID: 562 case GLSL_TYPE_ERROR: 563 case GLSL_TYPE_INTERFACE: 564 case GLSL_TYPE_FUNCTION: 565 assert(!"Invalid type in type_size"); 566 break; 567 } 568 569 return 0; 570 } 571 572 static int 573 type_size(const struct glsl_type *type) 574 { 575 return storage_type_size(type, false); 576 } 577 578 /** 579 * In the initial pass of codegen, we assign temporary numbers to 580 * intermediate results. (not SSA -- variable assignments will reuse 581 * storage). Actual register allocation for the Mesa VM occurs in a 582 * pass over the Mesa IR later. 583 */ 584 src_reg 585 ir_to_mesa_visitor::get_temp(const glsl_type *type) 586 { 587 src_reg src; 588 589 src.file = PROGRAM_TEMPORARY; 590 src.index = next_temp; 591 src.reladdr = NULL; 592 next_temp += type_size(type); 593 594 if (type->is_array() || type->is_record()) { 595 src.swizzle = SWIZZLE_NOOP; 596 } else { 597 src.swizzle = swizzle_for_size(type->vector_elements); 598 } 599 src.negate = 0; 600 601 return src; 602 } 603 604 variable_storage * 605 ir_to_mesa_visitor::find_variable_storage(const ir_variable *var) 606 { 607 foreach_in_list(variable_storage, entry, &this->variables) { 608 if (entry->var == var) 609 return entry; 610 } 611 612 return NULL; 613 } 614 615 void 616 ir_to_mesa_visitor::visit(ir_variable *ir) 617 { 618 if (strcmp(ir->name, "gl_FragCoord") == 0) { 619 this->prog->OriginUpperLeft = ir->data.origin_upper_left; 620 this->prog->PixelCenterInteger = ir->data.pixel_center_integer; 621 } 622 623 if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { 624 unsigned int i; 625 const ir_state_slot *const slots = ir->get_state_slots(); 626 assert(slots != NULL); 627 628 /* Check if this statevar's setup in the STATE file exactly 629 * matches how we'll want to reference it as a 630 * struct/array/whatever. If not, then we need to move it into 631 * temporary storage and hope that it'll get copy-propagated 632 * out. 633 */ 634 for (i = 0; i < ir->get_num_state_slots(); i++) { 635 if (slots[i].swizzle != SWIZZLE_XYZW) { 636 break; 637 } 638 } 639 640 variable_storage *storage; 641 dst_reg dst; 642 if (i == ir->get_num_state_slots()) { 643 /* We'll set the index later. */ 644 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); 645 this->variables.push_tail(storage); 646 647 dst = undef_dst; 648 } else { 649 /* The variable_storage constructor allocates slots based on the size 650 * of the type. However, this had better match the number of state 651 * elements that we're going to copy into the new temporary. 652 */ 653 assert((int) ir->get_num_state_slots() == type_size(ir->type)); 654 655 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, 656 this->next_temp); 657 this->variables.push_tail(storage); 658 this->next_temp += type_size(ir->type); 659 660 dst = dst_reg(src_reg(PROGRAM_TEMPORARY, storage->index, NULL)); 661 } 662 663 664 for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) { 665 int index = _mesa_add_state_reference(this->prog->Parameters, 666 (gl_state_index *)slots[i].tokens); 667 668 if (storage->file == PROGRAM_STATE_VAR) { 669 if (storage->index == -1) { 670 storage->index = index; 671 } else { 672 assert(index == storage->index + (int)i); 673 } 674 } else { 675 src_reg src(PROGRAM_STATE_VAR, index, NULL); 676 src.swizzle = slots[i].swizzle; 677 emit(ir, OPCODE_MOV, dst, src); 678 /* even a float takes up a whole vec4 reg in a struct/array. */ 679 dst.index++; 680 } 681 } 682 683 if (storage->file == PROGRAM_TEMPORARY && 684 dst.index != storage->index + (int) ir->get_num_state_slots()) { 685 linker_error(this->shader_program, 686 "failed to load builtin uniform `%s' " 687 "(%d/%d regs loaded)\n", 688 ir->name, dst.index - storage->index, 689 type_size(ir->type)); 690 } 691 } 692 } 693 694 void 695 ir_to_mesa_visitor::visit(ir_loop *ir) 696 { 697 emit(NULL, OPCODE_BGNLOOP); 698 699 visit_exec_list(&ir->body_instructions, this); 700 701 emit(NULL, OPCODE_ENDLOOP); 702 } 703 704 void 705 ir_to_mesa_visitor::visit(ir_loop_jump *ir) 706 { 707 switch (ir->mode) { 708 case ir_loop_jump::jump_break: 709 emit(NULL, OPCODE_BRK); 710 break; 711 case ir_loop_jump::jump_continue: 712 emit(NULL, OPCODE_CONT); 713 break; 714 } 715 } 716 717 718 void 719 ir_to_mesa_visitor::visit(ir_function_signature *ir) 720 { 721 assert(0); 722 (void)ir; 723 } 724 725 void 726 ir_to_mesa_visitor::visit(ir_function *ir) 727 { 728 /* Ignore function bodies other than main() -- we shouldn't see calls to 729 * them since they should all be inlined before we get to ir_to_mesa. 730 */ 731 if (strcmp(ir->name, "main") == 0) { 732 const ir_function_signature *sig; 733 exec_list empty; 734 735 sig = ir->matching_signature(NULL, &empty, false); 736 737 assert(sig); 738 739 foreach_in_list(ir_instruction, ir, &sig->body) { 740 ir->accept(this); 741 } 742 } 743 } 744 745 bool 746 ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand) 747 { 748 int nonmul_operand = 1 - mul_operand; 749 src_reg a, b, c; 750 751 ir_expression *expr = ir->operands[mul_operand]->as_expression(); 752 if (!expr || expr->operation != ir_binop_mul) 753 return false; 754 755 expr->operands[0]->accept(this); 756 a = this->result; 757 expr->operands[1]->accept(this); 758 b = this->result; 759 ir->operands[nonmul_operand]->accept(this); 760 c = this->result; 761 762 this->result = get_temp(ir->type); 763 emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, c); 764 765 return true; 766 } 767 768 /** 769 * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b)) 770 * 771 * The logic values are 1.0 for true and 0.0 for false. Logical-and is 772 * implemented using multiplication, and logical-or is implemented using 773 * addition. Logical-not can be implemented as (true - x), or (1.0 - x). 774 * As result, the logical expression (a & !b) can be rewritten as: 775 * 776 * - a * !b 777 * - a * (1 - b) 778 * - (a * 1) - (a * b) 779 * - a + -(a * b) 780 * - a + (a * -b) 781 * 782 * This final expression can be implemented as a single MAD(a, -b, a) 783 * instruction. 784 */ 785 bool 786 ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) 787 { 788 const int other_operand = 1 - try_operand; 789 src_reg a, b; 790 791 ir_expression *expr = ir->operands[try_operand]->as_expression(); 792 if (!expr || expr->operation != ir_unop_logic_not) 793 return false; 794 795 ir->operands[other_operand]->accept(this); 796 a = this->result; 797 expr->operands[0]->accept(this); 798 b = this->result; 799 800 b.negate = ~b.negate; 801 802 this->result = get_temp(ir->type); 803 emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a); 804 805 return true; 806 } 807 808 void 809 ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir, 810 src_reg *reg, int *num_reladdr) 811 { 812 if (!reg->reladdr) 813 return; 814 815 emit(ir, OPCODE_ARL, address_reg, *reg->reladdr); 816 817 if (*num_reladdr != 1) { 818 src_reg temp = get_temp(glsl_type::vec4_type); 819 820 emit(ir, OPCODE_MOV, dst_reg(temp), *reg); 821 *reg = temp; 822 } 823 824 (*num_reladdr)--; 825 } 826 827 void 828 ir_to_mesa_visitor::emit_swz(ir_expression *ir) 829 { 830 /* Assume that the vector operator is in a form compatible with OPCODE_SWZ. 831 * This means that each of the operands is either an immediate value of -1, 832 * 0, or 1, or is a component from one source register (possibly with 833 * negation). 834 */ 835 uint8_t components[4] = { 0 }; 836 bool negate[4] = { false }; 837 ir_variable *var = NULL; 838 839 for (unsigned i = 0; i < ir->type->vector_elements; i++) { 840 ir_rvalue *op = ir->operands[i]; 841 842 assert(op->type->is_scalar()); 843 844 while (op != NULL) { 845 switch (op->ir_type) { 846 case ir_type_constant: { 847 848 assert(op->type->is_scalar()); 849 850 const ir_constant *const c = op->as_constant(); 851 if (c->is_one()) { 852 components[i] = SWIZZLE_ONE; 853 } else if (c->is_zero()) { 854 components[i] = SWIZZLE_ZERO; 855 } else if (c->is_negative_one()) { 856 components[i] = SWIZZLE_ONE; 857 negate[i] = true; 858 } else { 859 assert(!"SWZ constant must be 0.0 or 1.0."); 860 } 861 862 op = NULL; 863 break; 864 } 865 866 case ir_type_dereference_variable: { 867 ir_dereference_variable *const deref = 868 (ir_dereference_variable *) op; 869 870 assert((var == NULL) || (deref->var == var)); 871 components[i] = SWIZZLE_X; 872 var = deref->var; 873 op = NULL; 874 break; 875 } 876 877 case ir_type_expression: { 878 ir_expression *const expr = (ir_expression *) op; 879 880 assert(expr->operation == ir_unop_neg); 881 negate[i] = true; 882 883 op = expr->operands[0]; 884 break; 885 } 886 887 case ir_type_swizzle: { 888 ir_swizzle *const swiz = (ir_swizzle *) op; 889 890 components[i] = swiz->mask.x; 891 op = swiz->val; 892 break; 893 } 894 895 default: 896 assert(!"Should not get here."); 897 return; 898 } 899 } 900 } 901 902 assert(var != NULL); 903 904 ir_dereference_variable *const deref = 905 new(mem_ctx) ir_dereference_variable(var); 906 907 this->result.file = PROGRAM_UNDEFINED; 908 deref->accept(this); 909 if (this->result.file == PROGRAM_UNDEFINED) { 910 printf("Failed to get tree for expression operand:\n"); 911 deref->print(); 912 printf("\n"); 913 exit(1); 914 } 915 916 src_reg src; 917 918 src = this->result; 919 src.swizzle = MAKE_SWIZZLE4(components[0], 920 components[1], 921 components[2], 922 components[3]); 923 src.negate = ((unsigned(negate[0]) << 0) 924 | (unsigned(negate[1]) << 1) 925 | (unsigned(negate[2]) << 2) 926 | (unsigned(negate[3]) << 3)); 927 928 /* Storage for our result. Ideally for an assignment we'd be using the 929 * actual storage for the result here, instead. 930 */ 931 const src_reg result_src = get_temp(ir->type); 932 dst_reg result_dst = dst_reg(result_src); 933 934 /* Limit writes to the channels that will be used by result_src later. 935 * This does limit this temp's use as a temporary for multi-instruction 936 * sequences. 937 */ 938 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 939 940 emit(ir, OPCODE_SWZ, result_dst, src); 941 this->result = result_src; 942 } 943 944 void 945 ir_to_mesa_visitor::emit_equality_comparison(ir_expression *ir, 946 enum prog_opcode op, 947 dst_reg dst, 948 const src_reg &src0, 949 const src_reg &src1) 950 { 951 src_reg difference; 952 src_reg abs_difference = get_temp(glsl_type::vec4_type); 953 const src_reg zero = src_reg_for_float(0.0); 954 955 /* x == y is equivalent to -abs(x-y) >= 0. Since all of the code that 956 * consumes the generated IR is pretty dumb, take special care when one 957 * of the operands is zero. 958 * 959 * Similarly, x != y is equivalent to -abs(x-y) < 0. 960 */ 961 if (src0.file == zero.file && 962 src0.index == zero.index && 963 src0.swizzle == zero.swizzle) { 964 difference = src1; 965 } else if (src1.file == zero.file && 966 src1.index == zero.index && 967 src1.swizzle == zero.swizzle) { 968 difference = src0; 969 } else { 970 difference = get_temp(glsl_type::vec4_type); 971 972 src_reg tmp_src = src0; 973 tmp_src.negate = ~tmp_src.negate; 974 975 emit(ir, OPCODE_ADD, dst_reg(difference), tmp_src, src1); 976 } 977 978 emit(ir, OPCODE_ABS, dst_reg(abs_difference), difference); 979 980 abs_difference.negate = ~abs_difference.negate; 981 emit(ir, op, dst, abs_difference, zero); 982 } 983 984 void 985 ir_to_mesa_visitor::visit(ir_expression *ir) 986 { 987 unsigned int operand; 988 src_reg op[ARRAY_SIZE(ir->operands)]; 989 src_reg result_src; 990 dst_reg result_dst; 991 992 /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c) 993 */ 994 if (ir->operation == ir_binop_add) { 995 if (try_emit_mad(ir, 1)) 996 return; 997 if (try_emit_mad(ir, 0)) 998 return; 999 } 1000 1001 /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) 1002 */ 1003 if (ir->operation == ir_binop_logic_and) { 1004 if (try_emit_mad_for_and_not(ir, 1)) 1005 return; 1006 if (try_emit_mad_for_and_not(ir, 0)) 1007 return; 1008 } 1009 1010 if (ir->operation == ir_quadop_vector) { 1011 this->emit_swz(ir); 1012 return; 1013 } 1014 1015 for (operand = 0; operand < ir->num_operands; operand++) { 1016 this->result.file = PROGRAM_UNDEFINED; 1017 ir->operands[operand]->accept(this); 1018 if (this->result.file == PROGRAM_UNDEFINED) { 1019 printf("Failed to get tree for expression operand:\n"); 1020 ir->operands[operand]->print(); 1021 printf("\n"); 1022 exit(1); 1023 } 1024 op[operand] = this->result; 1025 1026 /* Matrix expression operands should have been broken down to vector 1027 * operations already. 1028 */ 1029 assert(!ir->operands[operand]->type->is_matrix()); 1030 } 1031 1032 int vector_elements = ir->operands[0]->type->vector_elements; 1033 if (ir->operands[1]) { 1034 vector_elements = MAX2(vector_elements, 1035 ir->operands[1]->type->vector_elements); 1036 } 1037 1038 this->result.file = PROGRAM_UNDEFINED; 1039 1040 /* Storage for our result. Ideally for an assignment we'd be using 1041 * the actual storage for the result here, instead. 1042 */ 1043 result_src = get_temp(ir->type); 1044 /* convenience for the emit functions below. */ 1045 result_dst = dst_reg(result_src); 1046 /* Limit writes to the channels that will be used by result_src later. 1047 * This does limit this temp's use as a temporary for multi-instruction 1048 * sequences. 1049 */ 1050 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1051 1052 switch (ir->operation) { 1053 case ir_unop_logic_not: 1054 /* Previously 'SEQ dst, src, 0.0' was used for this. However, many 1055 * older GPUs implement SEQ using multiple instructions (i915 uses two 1056 * SGE instructions and a MUL instruction). Since our logic values are 1057 * 0.0 and 1.0, 1-x also implements !x. 1058 */ 1059 op[0].negate = ~op[0].negate; 1060 emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0)); 1061 break; 1062 case ir_unop_neg: 1063 op[0].negate = ~op[0].negate; 1064 result_src = op[0]; 1065 break; 1066 case ir_unop_abs: 1067 emit(ir, OPCODE_ABS, result_dst, op[0]); 1068 break; 1069 case ir_unop_sign: 1070 emit(ir, OPCODE_SSG, result_dst, op[0]); 1071 break; 1072 case ir_unop_rcp: 1073 emit_scalar(ir, OPCODE_RCP, result_dst, op[0]); 1074 break; 1075 1076 case ir_unop_exp2: 1077 emit_scalar(ir, OPCODE_EX2, result_dst, op[0]); 1078 break; 1079 case ir_unop_exp: 1080 assert(!"not reached: should be handled by exp_to_exp2"); 1081 break; 1082 case ir_unop_log: 1083 assert(!"not reached: should be handled by log_to_log2"); 1084 break; 1085 case ir_unop_log2: 1086 emit_scalar(ir, OPCODE_LG2, result_dst, op[0]); 1087 break; 1088 case ir_unop_sin: 1089 emit_scalar(ir, OPCODE_SIN, result_dst, op[0]); 1090 break; 1091 case ir_unop_cos: 1092 emit_scalar(ir, OPCODE_COS, result_dst, op[0]); 1093 break; 1094 1095 case ir_unop_dFdx: 1096 emit(ir, OPCODE_DDX, result_dst, op[0]); 1097 break; 1098 case ir_unop_dFdy: 1099 emit(ir, OPCODE_DDY, result_dst, op[0]); 1100 break; 1101 1102 case ir_unop_saturate: { 1103 ir_to_mesa_instruction *inst = emit(ir, OPCODE_MOV, 1104 result_dst, op[0]); 1105 inst->saturate = true; 1106 break; 1107 } 1108 case ir_unop_noise: { 1109 const enum prog_opcode opcode = 1110 prog_opcode(OPCODE_NOISE1 1111 + (ir->operands[0]->type->vector_elements) - 1); 1112 assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4)); 1113 1114 emit(ir, opcode, result_dst, op[0]); 1115 break; 1116 } 1117 1118 case ir_binop_add: 1119 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); 1120 break; 1121 case ir_binop_sub: 1122 emit(ir, OPCODE_SUB, result_dst, op[0], op[1]); 1123 break; 1124 1125 case ir_binop_mul: 1126 emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); 1127 break; 1128 case ir_binop_div: 1129 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 1130 break; 1131 case ir_binop_mod: 1132 /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */ 1133 assert(ir->type->is_integer()); 1134 emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); 1135 break; 1136 1137 case ir_binop_less: 1138 emit(ir, OPCODE_SLT, result_dst, op[0], op[1]); 1139 break; 1140 case ir_binop_gequal: 1141 emit(ir, OPCODE_SGE, result_dst, op[0], op[1]); 1142 break; 1143 case ir_binop_equal: 1144 emit_seq(ir, result_dst, op[0], op[1]); 1145 break; 1146 case ir_binop_nequal: 1147 emit_sne(ir, result_dst, op[0], op[1]); 1148 break; 1149 case ir_binop_all_equal: 1150 /* "==" operator producing a scalar boolean. */ 1151 if (ir->operands[0]->type->is_vector() || 1152 ir->operands[1]->type->is_vector()) { 1153 src_reg temp = get_temp(glsl_type::vec4_type); 1154 emit_sne(ir, dst_reg(temp), op[0], op[1]); 1155 1156 /* After the dot-product, the value will be an integer on the 1157 * range [0,4]. Zero becomes 1.0, and positive values become zero. 1158 */ 1159 emit_dp(ir, result_dst, temp, temp, vector_elements); 1160 1161 /* Negating the result of the dot-product gives values on the range 1162 * [-4, 0]. Zero becomes 1.0, and negative values become zero. This 1163 * achieved using SGE. 1164 */ 1165 src_reg sge_src = result_src; 1166 sge_src.negate = ~sge_src.negate; 1167 emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0)); 1168 } else { 1169 emit_seq(ir, result_dst, op[0], op[1]); 1170 } 1171 break; 1172 case ir_binop_any_nequal: 1173 /* "!=" operator producing a scalar boolean. */ 1174 if (ir->operands[0]->type->is_vector() || 1175 ir->operands[1]->type->is_vector()) { 1176 src_reg temp = get_temp(glsl_type::vec4_type); 1177 if (ir->operands[0]->type->is_boolean() && 1178 ir->operands[1]->as_constant() && 1179 ir->operands[1]->as_constant()->is_zero()) { 1180 temp = op[0]; 1181 } else { 1182 emit_sne(ir, dst_reg(temp), op[0], op[1]); 1183 } 1184 1185 /* After the dot-product, the value will be an integer on the 1186 * range [0,4]. Zero stays zero, and positive values become 1.0. 1187 */ 1188 ir_to_mesa_instruction *const dp = 1189 emit_dp(ir, result_dst, temp, temp, vector_elements); 1190 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1191 /* The clamping to [0,1] can be done for free in the fragment 1192 * shader with a saturate. 1193 */ 1194 dp->saturate = true; 1195 } else { 1196 /* Negating the result of the dot-product gives values on the range 1197 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1198 * achieved using SLT. 1199 */ 1200 src_reg slt_src = result_src; 1201 slt_src.negate = ~slt_src.negate; 1202 emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); 1203 } 1204 } else { 1205 emit_sne(ir, result_dst, op[0], op[1]); 1206 } 1207 break; 1208 1209 case ir_binop_logic_xor: 1210 emit_sne(ir, result_dst, op[0], op[1]); 1211 break; 1212 1213 case ir_binop_logic_or: { 1214 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1215 /* After the addition, the value will be an integer on the 1216 * range [0,2]. Zero stays zero, and positive values become 1.0. 1217 */ 1218 ir_to_mesa_instruction *add = 1219 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); 1220 add->saturate = true; 1221 } else { 1222 /* The Boolean arguments are stored as float 0.0 and 1.0. If either 1223 * value is 1.0, the result of the logcal-or should be 1.0. If both 1224 * values are 0.0, the result should be 0.0. This is exactly what 1225 * MAX does. 1226 */ 1227 emit(ir, OPCODE_MAX, result_dst, op[0], op[1]); 1228 } 1229 break; 1230 } 1231 1232 case ir_binop_logic_and: 1233 /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ 1234 emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); 1235 break; 1236 1237 case ir_binop_dot: 1238 assert(ir->operands[0]->type->is_vector()); 1239 assert(ir->operands[0]->type == ir->operands[1]->type); 1240 emit_dp(ir, result_dst, op[0], op[1], 1241 ir->operands[0]->type->vector_elements); 1242 break; 1243 1244 case ir_unop_sqrt: 1245 /* sqrt(x) = x * rsq(x). */ 1246 emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); 1247 emit(ir, OPCODE_MUL, result_dst, result_src, op[0]); 1248 /* For incoming channels <= 0, set the result to 0. */ 1249 op[0].negate = ~op[0].negate; 1250 emit(ir, OPCODE_CMP, result_dst, 1251 op[0], result_src, src_reg_for_float(0.0)); 1252 break; 1253 case ir_unop_rsq: 1254 emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); 1255 break; 1256 case ir_unop_i2f: 1257 case ir_unop_u2f: 1258 case ir_unop_b2f: 1259 case ir_unop_b2i: 1260 case ir_unop_i2u: 1261 case ir_unop_u2i: 1262 /* Mesa IR lacks types, ints are stored as truncated floats. */ 1263 result_src = op[0]; 1264 break; 1265 case ir_unop_f2i: 1266 case ir_unop_f2u: 1267 emit(ir, OPCODE_TRUNC, result_dst, op[0]); 1268 break; 1269 case ir_unop_f2b: 1270 case ir_unop_i2b: 1271 emit_sne(ir, result_dst, op[0], src_reg_for_float(0.0)); 1272 break; 1273 case ir_unop_bitcast_f2i: // Ignore these 4, they can't happen here anyway 1274 case ir_unop_bitcast_f2u: 1275 case ir_unop_bitcast_i2f: 1276 case ir_unop_bitcast_u2f: 1277 break; 1278 case ir_unop_trunc: 1279 emit(ir, OPCODE_TRUNC, result_dst, op[0]); 1280 break; 1281 case ir_unop_ceil: 1282 op[0].negate = ~op[0].negate; 1283 emit(ir, OPCODE_FLR, result_dst, op[0]); 1284 result_src.negate = ~result_src.negate; 1285 break; 1286 case ir_unop_floor: 1287 emit(ir, OPCODE_FLR, result_dst, op[0]); 1288 break; 1289 case ir_unop_fract: 1290 emit(ir, OPCODE_FRC, result_dst, op[0]); 1291 break; 1292 case ir_unop_pack_snorm_2x16: 1293 case ir_unop_pack_snorm_4x8: 1294 case ir_unop_pack_unorm_2x16: 1295 case ir_unop_pack_unorm_4x8: 1296 case ir_unop_pack_half_2x16: 1297 case ir_unop_pack_double_2x32: 1298 case ir_unop_unpack_snorm_2x16: 1299 case ir_unop_unpack_snorm_4x8: 1300 case ir_unop_unpack_unorm_2x16: 1301 case ir_unop_unpack_unorm_4x8: 1302 case ir_unop_unpack_half_2x16: 1303 case ir_unop_unpack_double_2x32: 1304 case ir_unop_bitfield_reverse: 1305 case ir_unop_bit_count: 1306 case ir_unop_find_msb: 1307 case ir_unop_find_lsb: 1308 case ir_unop_d2f: 1309 case ir_unop_f2d: 1310 case ir_unop_d2i: 1311 case ir_unop_i2d: 1312 case ir_unop_d2u: 1313 case ir_unop_u2d: 1314 case ir_unop_d2b: 1315 case ir_unop_frexp_sig: 1316 case ir_unop_frexp_exp: 1317 assert(!"not supported"); 1318 break; 1319 case ir_binop_min: 1320 emit(ir, OPCODE_MIN, result_dst, op[0], op[1]); 1321 break; 1322 case ir_binop_max: 1323 emit(ir, OPCODE_MAX, result_dst, op[0], op[1]); 1324 break; 1325 case ir_binop_pow: 1326 emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]); 1327 break; 1328 1329 /* GLSL 1.30 integer ops are unsupported in Mesa IR, but since 1330 * hardware backends have no way to avoid Mesa IR generation 1331 * even if they don't use it, we need to emit "something" and 1332 * continue. 1333 */ 1334 case ir_binop_lshift: 1335 case ir_binop_rshift: 1336 case ir_binop_bit_and: 1337 case ir_binop_bit_xor: 1338 case ir_binop_bit_or: 1339 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); 1340 break; 1341 1342 case ir_unop_bit_not: 1343 case ir_unop_round_even: 1344 emit(ir, OPCODE_MOV, result_dst, op[0]); 1345 break; 1346 1347 case ir_binop_ubo_load: 1348 assert(!"not supported"); 1349 break; 1350 1351 case ir_triop_lrp: 1352 /* ir_triop_lrp operands are (x, y, a) while 1353 * OPCODE_LRP operands are (a, y, x) to match ARB_fragment_program. 1354 */ 1355 emit(ir, OPCODE_LRP, result_dst, op[2], op[1], op[0]); 1356 break; 1357 1358 case ir_triop_csel: 1359 /* We assume that boolean true and false are 1.0 and 0.0. OPCODE_CMP 1360 * selects src1 if src0 is < 0, src2 otherwise. 1361 */ 1362 op[0].negate = ~op[0].negate; 1363 emit(ir, OPCODE_CMP, result_dst, op[0], op[1], op[2]); 1364 break; 1365 1366 case ir_binop_vector_extract: 1367 case ir_triop_fma: 1368 case ir_triop_bitfield_extract: 1369 case ir_triop_vector_insert: 1370 case ir_quadop_bitfield_insert: 1371 case ir_binop_ldexp: 1372 case ir_binop_carry: 1373 case ir_binop_borrow: 1374 case ir_binop_imul_high: 1375 case ir_unop_interpolate_at_centroid: 1376 case ir_binop_interpolate_at_offset: 1377 case ir_binop_interpolate_at_sample: 1378 case ir_unop_dFdx_coarse: 1379 case ir_unop_dFdx_fine: 1380 case ir_unop_dFdy_coarse: 1381 case ir_unop_dFdy_fine: 1382 case ir_unop_subroutine_to_int: 1383 case ir_unop_get_buffer_size: 1384 case ir_unop_bitcast_u642d: 1385 case ir_unop_bitcast_i642d: 1386 case ir_unop_bitcast_d2u64: 1387 case ir_unop_bitcast_d2i64: 1388 case ir_unop_i642i: 1389 case ir_unop_u642i: 1390 case ir_unop_i642u: 1391 case ir_unop_u642u: 1392 case ir_unop_i642b: 1393 case ir_unop_i642f: 1394 case ir_unop_u642f: 1395 case ir_unop_i642d: 1396 case ir_unop_u642d: 1397 case ir_unop_i2i64: 1398 case ir_unop_u2i64: 1399 case ir_unop_b2i64: 1400 case ir_unop_f2i64: 1401 case ir_unop_d2i64: 1402 case ir_unop_i2u64: 1403 case ir_unop_u2u64: 1404 case ir_unop_f2u64: 1405 case ir_unop_d2u64: 1406 case ir_unop_u642i64: 1407 case ir_unop_i642u64: 1408 case ir_unop_pack_int_2x32: 1409 case ir_unop_unpack_int_2x32: 1410 case ir_unop_pack_uint_2x32: 1411 case ir_unop_unpack_uint_2x32: 1412 case ir_unop_pack_sampler_2x32: 1413 case ir_unop_unpack_sampler_2x32: 1414 case ir_unop_pack_image_2x32: 1415 case ir_unop_unpack_image_2x32: 1416 assert(!"not supported"); 1417 break; 1418 1419 case ir_unop_ssbo_unsized_array_length: 1420 case ir_quadop_vector: 1421 /* This operation should have already been handled. 1422 */ 1423 assert(!"Should not get here."); 1424 break; 1425 } 1426 1427 this->result = result_src; 1428 } 1429 1430 1431 void 1432 ir_to_mesa_visitor::visit(ir_swizzle *ir) 1433 { 1434 src_reg src; 1435 int i; 1436 int swizzle[4]; 1437 1438 /* Note that this is only swizzles in expressions, not those on the left 1439 * hand side of an assignment, which do write masking. See ir_assignment 1440 * for that. 1441 */ 1442 1443 ir->val->accept(this); 1444 src = this->result; 1445 assert(src.file != PROGRAM_UNDEFINED); 1446 assert(ir->type->vector_elements > 0); 1447 1448 for (i = 0; i < 4; i++) { 1449 if (i < ir->type->vector_elements) { 1450 switch (i) { 1451 case 0: 1452 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); 1453 break; 1454 case 1: 1455 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); 1456 break; 1457 case 2: 1458 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); 1459 break; 1460 case 3: 1461 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); 1462 break; 1463 } 1464 } else { 1465 /* If the type is smaller than a vec4, replicate the last 1466 * channel out. 1467 */ 1468 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1469 } 1470 } 1471 1472 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1473 1474 this->result = src; 1475 } 1476 1477 void 1478 ir_to_mesa_visitor::visit(ir_dereference_variable *ir) 1479 { 1480 variable_storage *entry = find_variable_storage(ir->var); 1481 ir_variable *var = ir->var; 1482 1483 if (!entry) { 1484 switch (var->data.mode) { 1485 case ir_var_uniform: 1486 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, 1487 var->data.param_index); 1488 this->variables.push_tail(entry); 1489 break; 1490 case ir_var_shader_in: 1491 /* The linker assigns locations for varyings and attributes, 1492 * including deprecated builtins (like gl_Color), 1493 * user-assigned generic attributes (glBindVertexLocation), 1494 * and user-defined varyings. 1495 */ 1496 assert(var->data.location != -1); 1497 entry = new(mem_ctx) variable_storage(var, 1498 PROGRAM_INPUT, 1499 var->data.location); 1500 break; 1501 case ir_var_shader_out: 1502 assert(var->data.location != -1); 1503 entry = new(mem_ctx) variable_storage(var, 1504 PROGRAM_OUTPUT, 1505 var->data.location); 1506 break; 1507 case ir_var_system_value: 1508 entry = new(mem_ctx) variable_storage(var, 1509 PROGRAM_SYSTEM_VALUE, 1510 var->data.location); 1511 break; 1512 case ir_var_auto: 1513 case ir_var_temporary: 1514 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, 1515 this->next_temp); 1516 this->variables.push_tail(entry); 1517 1518 next_temp += type_size(var->type); 1519 break; 1520 } 1521 1522 if (!entry) { 1523 printf("Failed to make storage for %s\n", var->name); 1524 exit(1); 1525 } 1526 } 1527 1528 this->result = src_reg(entry->file, entry->index, var->type); 1529 } 1530 1531 void 1532 ir_to_mesa_visitor::visit(ir_dereference_array *ir) 1533 { 1534 ir_constant *index; 1535 src_reg src; 1536 int element_size = type_size(ir->type); 1537 1538 index = ir->array_index->constant_expression_value(ralloc_parent(ir)); 1539 1540 ir->array->accept(this); 1541 src = this->result; 1542 1543 if (index) { 1544 src.index += index->value.i[0] * element_size; 1545 } else { 1546 /* Variable index array dereference. It eats the "vec4" of the 1547 * base of the array and an index that offsets the Mesa register 1548 * index. 1549 */ 1550 ir->array_index->accept(this); 1551 1552 src_reg index_reg; 1553 1554 if (element_size == 1) { 1555 index_reg = this->result; 1556 } else { 1557 index_reg = get_temp(glsl_type::float_type); 1558 1559 emit(ir, OPCODE_MUL, dst_reg(index_reg), 1560 this->result, src_reg_for_float(element_size)); 1561 } 1562 1563 /* If there was already a relative address register involved, add the 1564 * new and the old together to get the new offset. 1565 */ 1566 if (src.reladdr != NULL) { 1567 src_reg accum_reg = get_temp(glsl_type::float_type); 1568 1569 emit(ir, OPCODE_ADD, dst_reg(accum_reg), 1570 index_reg, *src.reladdr); 1571 1572 index_reg = accum_reg; 1573 } 1574 1575 src.reladdr = ralloc(mem_ctx, src_reg); 1576 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 1577 } 1578 1579 /* If the type is smaller than a vec4, replicate the last channel out. */ 1580 if (ir->type->is_scalar() || ir->type->is_vector()) 1581 src.swizzle = swizzle_for_size(ir->type->vector_elements); 1582 else 1583 src.swizzle = SWIZZLE_NOOP; 1584 1585 this->result = src; 1586 } 1587 1588 void 1589 ir_to_mesa_visitor::visit(ir_dereference_record *ir) 1590 { 1591 unsigned int i; 1592 const glsl_type *struct_type = ir->record->type; 1593 int offset = 0; 1594 1595 ir->record->accept(this); 1596 1597 assert(ir->field_idx >= 0); 1598 for (i = 0; i < struct_type->length; i++) { 1599 if (i == (unsigned) ir->field_idx) 1600 break; 1601 offset += type_size(struct_type->fields.structure[i].type); 1602 } 1603 1604 /* If the type is smaller than a vec4, replicate the last channel out. */ 1605 if (ir->type->is_scalar() || ir->type->is_vector()) 1606 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 1607 else 1608 this->result.swizzle = SWIZZLE_NOOP; 1609 1610 this->result.index += offset; 1611 } 1612 1613 /** 1614 * We want to be careful in assignment setup to hit the actual storage 1615 * instead of potentially using a temporary like we might with the 1616 * ir_dereference handler. 1617 */ 1618 static dst_reg 1619 get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v) 1620 { 1621 /* The LHS must be a dereference. If the LHS is a variable indexed array 1622 * access of a vector, it must be separated into a series conditional moves 1623 * before reaching this point (see ir_vec_index_to_cond_assign). 1624 */ 1625 assert(ir->as_dereference()); 1626 ir_dereference_array *deref_array = ir->as_dereference_array(); 1627 if (deref_array) { 1628 assert(!deref_array->array->type->is_vector()); 1629 } 1630 1631 /* Use the rvalue deref handler for the most part. We'll ignore 1632 * swizzles in it and write swizzles using writemask, though. 1633 */ 1634 ir->accept(v); 1635 return dst_reg(v->result); 1636 } 1637 1638 /* Calculate the sampler index and also calculate the base uniform location 1639 * for struct members. 1640 */ 1641 static void 1642 calc_sampler_offsets(struct gl_shader_program *prog, ir_dereference *deref, 1643 unsigned *offset, unsigned *array_elements, 1644 unsigned *location) 1645 { 1646 if (deref->ir_type == ir_type_dereference_variable) 1647 return; 1648 1649 switch (deref->ir_type) { 1650 case ir_type_dereference_array: { 1651 ir_dereference_array *deref_arr = deref->as_dereference_array(); 1652 1653 void *mem_ctx = ralloc_parent(deref_arr); 1654 ir_constant *array_index = 1655 deref_arr->array_index->constant_expression_value(mem_ctx); 1656 1657 if (!array_index) { 1658 /* GLSL 1.10 and 1.20 allowed variable sampler array indices, 1659 * while GLSL 1.30 requires that the array indices be 1660 * constant integer expressions. We don't expect any driver 1661 * to actually work with a really variable array index, so 1662 * all that would work would be an unrolled loop counter that ends 1663 * up being constant above. 1664 */ 1665 ralloc_strcat(&prog->data->InfoLog, 1666 "warning: Variable sampler array index unsupported.\n" 1667 "This feature of the language was removed in GLSL 1.20 " 1668 "and is unlikely to be supported for 1.10 in Mesa.\n"); 1669 } else { 1670 *offset += array_index->value.u[0] * *array_elements; 1671 } 1672 1673 *array_elements *= deref_arr->array->type->length; 1674 1675 calc_sampler_offsets(prog, deref_arr->array->as_dereference(), 1676 offset, array_elements, location); 1677 break; 1678 } 1679 1680 case ir_type_dereference_record: { 1681 ir_dereference_record *deref_record = deref->as_dereference_record(); 1682 unsigned field_index = deref_record->field_idx; 1683 *location += 1684 deref_record->record->type->record_location_offset(field_index); 1685 calc_sampler_offsets(prog, deref_record->record->as_dereference(), 1686 offset, array_elements, location); 1687 break; 1688 } 1689 1690 default: 1691 unreachable("Invalid deref type"); 1692 break; 1693 } 1694 } 1695 1696 static int 1697 get_sampler_uniform_value(class ir_dereference *sampler, 1698 struct gl_shader_program *shader_program, 1699 const struct gl_program *prog) 1700 { 1701 GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target); 1702 ir_variable *var = sampler->variable_referenced(); 1703 unsigned location = var->data.location; 1704 unsigned array_elements = 1; 1705 unsigned offset = 0; 1706 1707 calc_sampler_offsets(shader_program, sampler, &offset, &array_elements, 1708 &location); 1709 1710 assert(shader_program->data->UniformStorage[location].opaque[shader].active); 1711 return shader_program->data->UniformStorage[location].opaque[shader].index + 1712 offset; 1713 } 1714 1715 /** 1716 * Process the condition of a conditional assignment 1717 * 1718 * Examines the condition of a conditional assignment to generate the optimal 1719 * first operand of a \c CMP instruction. If the condition is a relational 1720 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be 1721 * used as the source for the \c CMP instruction. Otherwise the comparison 1722 * is processed to a boolean result, and the boolean result is used as the 1723 * operand to the CMP instruction. 1724 */ 1725 bool 1726 ir_to_mesa_visitor::process_move_condition(ir_rvalue *ir) 1727 { 1728 ir_rvalue *src_ir = ir; 1729 bool negate = true; 1730 bool switch_order = false; 1731 1732 ir_expression *const expr = ir->as_expression(); 1733 if ((expr != NULL) && (expr->num_operands == 2)) { 1734 bool zero_on_left = false; 1735 1736 if (expr->operands[0]->is_zero()) { 1737 src_ir = expr->operands[1]; 1738 zero_on_left = true; 1739 } else if (expr->operands[1]->is_zero()) { 1740 src_ir = expr->operands[0]; 1741 zero_on_left = false; 1742 } 1743 1744 /* a is - 0 + - 0 + 1745 * (a < 0) T F F ( a < 0) T F F 1746 * (0 < a) F F T (-a < 0) F F T 1747 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) 1748 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) 1749 * 1750 * Note that exchanging the order of 0 and 'a' in the comparison simply 1751 * means that the value of 'a' should be negated. 1752 */ 1753 if (src_ir != ir) { 1754 switch (expr->operation) { 1755 case ir_binop_less: 1756 switch_order = false; 1757 negate = zero_on_left; 1758 break; 1759 1760 case ir_binop_gequal: 1761 switch_order = true; 1762 negate = zero_on_left; 1763 break; 1764 1765 default: 1766 /* This isn't the right kind of comparison afterall, so make sure 1767 * the whole condition is visited. 1768 */ 1769 src_ir = ir; 1770 break; 1771 } 1772 } 1773 } 1774 1775 src_ir->accept(this); 1776 1777 /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the 1778 * condition we produced is 0.0 or 1.0. By flipping the sign, we can 1779 * choose which value OPCODE_CMP produces without an extra instruction 1780 * computing the condition. 1781 */ 1782 if (negate) 1783 this->result.negate = ~this->result.negate; 1784 1785 return switch_order; 1786 } 1787 1788 void 1789 ir_to_mesa_visitor::visit(ir_assignment *ir) 1790 { 1791 dst_reg l; 1792 src_reg r; 1793 int i; 1794 1795 ir->rhs->accept(this); 1796 r = this->result; 1797 1798 l = get_assignment_lhs(ir->lhs, this); 1799 1800 /* FINISHME: This should really set to the correct maximal writemask for each 1801 * FINISHME: component written (in the loops below). This case can only 1802 * FINISHME: occur for matrices, arrays, and structures. 1803 */ 1804 if (ir->write_mask == 0) { 1805 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 1806 l.writemask = WRITEMASK_XYZW; 1807 } else if (ir->lhs->type->is_scalar()) { 1808 /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the 1809 * FINISHME: W component of fragment shader output zero, work correctly. 1810 */ 1811 l.writemask = WRITEMASK_XYZW; 1812 } else { 1813 int swizzles[4]; 1814 int first_enabled_chan = 0; 1815 int rhs_chan = 0; 1816 1817 assert(ir->lhs->type->is_vector()); 1818 l.writemask = ir->write_mask; 1819 1820 for (int i = 0; i < 4; i++) { 1821 if (l.writemask & (1 << i)) { 1822 first_enabled_chan = GET_SWZ(r.swizzle, i); 1823 break; 1824 } 1825 } 1826 1827 /* Swizzle a small RHS vector into the channels being written. 1828 * 1829 * glsl ir treats write_mask as dictating how many channels are 1830 * present on the RHS while Mesa IR treats write_mask as just 1831 * showing which channels of the vec4 RHS get written. 1832 */ 1833 for (int i = 0; i < 4; i++) { 1834 if (l.writemask & (1 << i)) 1835 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); 1836 else 1837 swizzles[i] = first_enabled_chan; 1838 } 1839 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], 1840 swizzles[2], swizzles[3]); 1841 } 1842 1843 assert(l.file != PROGRAM_UNDEFINED); 1844 assert(r.file != PROGRAM_UNDEFINED); 1845 1846 if (ir->condition) { 1847 const bool switch_order = this->process_move_condition(ir->condition); 1848 src_reg condition = this->result; 1849 1850 for (i = 0; i < type_size(ir->lhs->type); i++) { 1851 if (switch_order) { 1852 emit(ir, OPCODE_CMP, l, condition, src_reg(l), r); 1853 } else { 1854 emit(ir, OPCODE_CMP, l, condition, r, src_reg(l)); 1855 } 1856 1857 l.index++; 1858 r.index++; 1859 } 1860 } else { 1861 for (i = 0; i < type_size(ir->lhs->type); i++) { 1862 emit(ir, OPCODE_MOV, l, r); 1863 l.index++; 1864 r.index++; 1865 } 1866 } 1867 } 1868 1869 1870 void 1871 ir_to_mesa_visitor::visit(ir_constant *ir) 1872 { 1873 src_reg src; 1874 GLfloat stack_vals[4] = { 0 }; 1875 GLfloat *values = stack_vals; 1876 unsigned int i; 1877 1878 /* Unfortunately, 4 floats is all we can get into 1879 * _mesa_add_unnamed_constant. So, make a temp to store an 1880 * aggregate constant and move each constant value into it. If we 1881 * get lucky, copy propagation will eliminate the extra moves. 1882 */ 1883 1884 if (ir->type->is_record()) { 1885 src_reg temp_base = get_temp(ir->type); 1886 dst_reg temp = dst_reg(temp_base); 1887 1888 for (i = 0; i < ir->type->length; i++) { 1889 ir_constant *const field_value = ir->get_record_field(i); 1890 int size = type_size(field_value->type); 1891 1892 assert(size > 0); 1893 1894 field_value->accept(this); 1895 src = this->result; 1896 1897 for (unsigned j = 0; j < (unsigned int)size; j++) { 1898 emit(ir, OPCODE_MOV, temp, src); 1899 1900 src.index++; 1901 temp.index++; 1902 } 1903 } 1904 this->result = temp_base; 1905 return; 1906 } 1907 1908 if (ir->type->is_array()) { 1909 src_reg temp_base = get_temp(ir->type); 1910 dst_reg temp = dst_reg(temp_base); 1911 int size = type_size(ir->type->fields.array); 1912 1913 assert(size > 0); 1914 1915 for (i = 0; i < ir->type->length; i++) { 1916 ir->const_elements[i]->accept(this); 1917 src = this->result; 1918 for (int j = 0; j < size; j++) { 1919 emit(ir, OPCODE_MOV, temp, src); 1920 1921 src.index++; 1922 temp.index++; 1923 } 1924 } 1925 this->result = temp_base; 1926 return; 1927 } 1928 1929 if (ir->type->is_matrix()) { 1930 src_reg mat = get_temp(ir->type); 1931 dst_reg mat_column = dst_reg(mat); 1932 1933 for (i = 0; i < ir->type->matrix_columns; i++) { 1934 assert(ir->type->is_float()); 1935 values = &ir->value.f[i * ir->type->vector_elements]; 1936 1937 src = src_reg(PROGRAM_CONSTANT, -1, NULL); 1938 src.index = _mesa_add_unnamed_constant(this->prog->Parameters, 1939 (gl_constant_value *) values, 1940 ir->type->vector_elements, 1941 &src.swizzle); 1942 emit(ir, OPCODE_MOV, mat_column, src); 1943 1944 mat_column.index++; 1945 } 1946 1947 this->result = mat; 1948 return; 1949 } 1950 1951 src.file = PROGRAM_CONSTANT; 1952 switch (ir->type->base_type) { 1953 case GLSL_TYPE_FLOAT: 1954 values = &ir->value.f[0]; 1955 break; 1956 case GLSL_TYPE_UINT: 1957 for (i = 0; i < ir->type->vector_elements; i++) { 1958 values[i] = ir->value.u[i]; 1959 } 1960 break; 1961 case GLSL_TYPE_INT: 1962 for (i = 0; i < ir->type->vector_elements; i++) { 1963 values[i] = ir->value.i[i]; 1964 } 1965 break; 1966 case GLSL_TYPE_BOOL: 1967 for (i = 0; i < ir->type->vector_elements; i++) { 1968 values[i] = ir->value.b[i]; 1969 } 1970 break; 1971 default: 1972 assert(!"Non-float/uint/int/bool constant"); 1973 } 1974 1975 this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type); 1976 this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, 1977 (gl_constant_value *) values, 1978 ir->type->vector_elements, 1979 &this->result.swizzle); 1980 } 1981 1982 void 1983 ir_to_mesa_visitor::visit(ir_call *) 1984 { 1985 assert(!"ir_to_mesa: All function calls should have been inlined by now."); 1986 } 1987 1988 void 1989 ir_to_mesa_visitor::visit(ir_texture *ir) 1990 { 1991 src_reg result_src, coord, lod_info, projector, dx, dy; 1992 dst_reg result_dst, coord_dst; 1993 ir_to_mesa_instruction *inst = NULL; 1994 prog_opcode opcode = OPCODE_NOP; 1995 1996 if (ir->op == ir_txs) 1997 this->result = src_reg_for_float(0.0); 1998 else 1999 ir->coordinate->accept(this); 2000 2001 /* Put our coords in a temp. We'll need to modify them for shadow, 2002 * projection, or LOD, so the only case we'd use it as-is is if 2003 * we're doing plain old texturing. Mesa IR optimization should 2004 * handle cleaning up our mess in that case. 2005 */ 2006 coord = get_temp(glsl_type::vec4_type); 2007 coord_dst = dst_reg(coord); 2008 emit(ir, OPCODE_MOV, coord_dst, this->result); 2009 2010 if (ir->projector) { 2011 ir->projector->accept(this); 2012 projector = this->result; 2013 } 2014 2015 /* Storage for our result. Ideally for an assignment we'd be using 2016 * the actual storage for the result here, instead. 2017 */ 2018 result_src = get_temp(glsl_type::vec4_type); 2019 result_dst = dst_reg(result_src); 2020 2021 switch (ir->op) { 2022 case ir_tex: 2023 case ir_txs: 2024 opcode = OPCODE_TEX; 2025 break; 2026 case ir_txb: 2027 opcode = OPCODE_TXB; 2028 ir->lod_info.bias->accept(this); 2029 lod_info = this->result; 2030 break; 2031 case ir_txf: 2032 /* Pretend to be TXL so the sampler, coordinate, lod are available */ 2033 case ir_txl: 2034 opcode = OPCODE_TXL; 2035 ir->lod_info.lod->accept(this); 2036 lod_info = this->result; 2037 break; 2038 case ir_txd: 2039 opcode = OPCODE_TXD; 2040 ir->lod_info.grad.dPdx->accept(this); 2041 dx = this->result; 2042 ir->lod_info.grad.dPdy->accept(this); 2043 dy = this->result; 2044 break; 2045 case ir_txf_ms: 2046 assert(!"Unexpected ir_txf_ms opcode"); 2047 break; 2048 case ir_lod: 2049 assert(!"Unexpected ir_lod opcode"); 2050 break; 2051 case ir_tg4: 2052 assert(!"Unexpected ir_tg4 opcode"); 2053 break; 2054 case ir_query_levels: 2055 assert(!"Unexpected ir_query_levels opcode"); 2056 break; 2057 case ir_samples_identical: 2058 unreachable("Unexpected ir_samples_identical opcode"); 2059 case ir_texture_samples: 2060 unreachable("Unexpected ir_texture_samples opcode"); 2061 } 2062 2063 const glsl_type *sampler_type = ir->sampler->type; 2064 2065 if (ir->projector) { 2066 if (opcode == OPCODE_TEX) { 2067 /* Slot the projector in as the last component of the coord. */ 2068 coord_dst.writemask = WRITEMASK_W; 2069 emit(ir, OPCODE_MOV, coord_dst, projector); 2070 coord_dst.writemask = WRITEMASK_XYZW; 2071 opcode = OPCODE_TXP; 2072 } else { 2073 src_reg coord_w = coord; 2074 coord_w.swizzle = SWIZZLE_WWWW; 2075 2076 /* For the other TEX opcodes there's no projective version 2077 * since the last slot is taken up by lod info. Do the 2078 * projective divide now. 2079 */ 2080 coord_dst.writemask = WRITEMASK_W; 2081 emit(ir, OPCODE_RCP, coord_dst, projector); 2082 2083 /* In the case where we have to project the coordinates "by hand," 2084 * the shadow comparator value must also be projected. 2085 */ 2086 src_reg tmp_src = coord; 2087 if (ir->shadow_comparator) { 2088 /* Slot the shadow value in as the second to last component of the 2089 * coord. 2090 */ 2091 ir->shadow_comparator->accept(this); 2092 2093 tmp_src = get_temp(glsl_type::vec4_type); 2094 dst_reg tmp_dst = dst_reg(tmp_src); 2095 2096 /* Projective division not allowed for array samplers. */ 2097 assert(!sampler_type->sampler_array); 2098 2099 tmp_dst.writemask = WRITEMASK_Z; 2100 emit(ir, OPCODE_MOV, tmp_dst, this->result); 2101 2102 tmp_dst.writemask = WRITEMASK_XY; 2103 emit(ir, OPCODE_MOV, tmp_dst, coord); 2104 } 2105 2106 coord_dst.writemask = WRITEMASK_XYZ; 2107 emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w); 2108 2109 coord_dst.writemask = WRITEMASK_XYZW; 2110 coord.swizzle = SWIZZLE_XYZW; 2111 } 2112 } 2113 2114 /* If projection is done and the opcode is not OPCODE_TXP, then the shadow 2115 * comparator was put in the correct place (and projected) by the code, 2116 * above, that handles by-hand projection. 2117 */ 2118 if (ir->shadow_comparator && (!ir->projector || opcode == OPCODE_TXP)) { 2119 /* Slot the shadow value in as the second to last component of the 2120 * coord. 2121 */ 2122 ir->shadow_comparator->accept(this); 2123 2124 /* XXX This will need to be updated for cubemap array samplers. */ 2125 if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && 2126 sampler_type->sampler_array) { 2127 coord_dst.writemask = WRITEMASK_W; 2128 } else { 2129 coord_dst.writemask = WRITEMASK_Z; 2130 } 2131 2132 emit(ir, OPCODE_MOV, coord_dst, this->result); 2133 coord_dst.writemask = WRITEMASK_XYZW; 2134 } 2135 2136 if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) { 2137 /* Mesa IR stores lod or lod bias in the last channel of the coords. */ 2138 coord_dst.writemask = WRITEMASK_W; 2139 emit(ir, OPCODE_MOV, coord_dst, lod_info); 2140 coord_dst.writemask = WRITEMASK_XYZW; 2141 } 2142 2143 if (opcode == OPCODE_TXD) 2144 inst = emit(ir, opcode, result_dst, coord, dx, dy); 2145 else 2146 inst = emit(ir, opcode, result_dst, coord); 2147 2148 if (ir->shadow_comparator) 2149 inst->tex_shadow = GL_TRUE; 2150 2151 inst->sampler = get_sampler_uniform_value(ir->sampler, shader_program, 2152 prog); 2153 2154 switch (sampler_type->sampler_dimensionality) { 2155 case GLSL_SAMPLER_DIM_1D: 2156 inst->tex_target = (sampler_type->sampler_array) 2157 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; 2158 break; 2159 case GLSL_SAMPLER_DIM_2D: 2160 inst->tex_target = (sampler_type->sampler_array) 2161 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; 2162 break; 2163 case GLSL_SAMPLER_DIM_3D: 2164 inst->tex_target = TEXTURE_3D_INDEX; 2165 break; 2166 case GLSL_SAMPLER_DIM_CUBE: 2167 inst->tex_target = TEXTURE_CUBE_INDEX; 2168 break; 2169 case GLSL_SAMPLER_DIM_RECT: 2170 inst->tex_target = TEXTURE_RECT_INDEX; 2171 break; 2172 case GLSL_SAMPLER_DIM_BUF: 2173 assert(!"FINISHME: Implement ARB_texture_buffer_object"); 2174 break; 2175 case GLSL_SAMPLER_DIM_EXTERNAL: 2176 inst->tex_target = TEXTURE_EXTERNAL_INDEX; 2177 break; 2178 default: 2179 assert(!"Should not get here."); 2180 } 2181 2182 this->result = result_src; 2183 } 2184 2185 void 2186 ir_to_mesa_visitor::visit(ir_return *ir) 2187 { 2188 /* Non-void functions should have been inlined. We may still emit RETs 2189 * from main() unless the EmitNoMainReturn option is set. 2190 */ 2191 assert(!ir->get_value()); 2192 emit(ir, OPCODE_RET); 2193 } 2194 2195 void 2196 ir_to_mesa_visitor::visit(ir_discard *ir) 2197 { 2198 if (!ir->condition) 2199 ir->condition = new(mem_ctx) ir_constant(true); 2200 2201 ir->condition->accept(this); 2202 this->result.negate = ~this->result.negate; 2203 emit(ir, OPCODE_KIL, undef_dst, this->result); 2204 } 2205 2206 void 2207 ir_to_mesa_visitor::visit(ir_if *ir) 2208 { 2209 ir_to_mesa_instruction *if_inst; 2210 2211 ir->condition->accept(this); 2212 assert(this->result.file != PROGRAM_UNDEFINED); 2213 2214 if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result); 2215 2216 this->instructions.push_tail(if_inst); 2217 2218 visit_exec_list(&ir->then_instructions, this); 2219 2220 if (!ir->else_instructions.is_empty()) { 2221 emit(ir->condition, OPCODE_ELSE); 2222 visit_exec_list(&ir->else_instructions, this); 2223 } 2224 2225 emit(ir->condition, OPCODE_ENDIF); 2226 } 2227 2228 void 2229 ir_to_mesa_visitor::visit(ir_emit_vertex *) 2230 { 2231 assert(!"Geometry shaders not supported."); 2232 } 2233 2234 void 2235 ir_to_mesa_visitor::visit(ir_end_primitive *) 2236 { 2237 assert(!"Geometry shaders not supported."); 2238 } 2239 2240 void 2241 ir_to_mesa_visitor::visit(ir_barrier *) 2242 { 2243 unreachable("GLSL barrier() not supported."); 2244 } 2245 2246 ir_to_mesa_visitor::ir_to_mesa_visitor() 2247 { 2248 result.file = PROGRAM_UNDEFINED; 2249 next_temp = 1; 2250 next_signature_id = 1; 2251 current_function = NULL; 2252 mem_ctx = ralloc_context(NULL); 2253 } 2254 2255 ir_to_mesa_visitor::~ir_to_mesa_visitor() 2256 { 2257 ralloc_free(mem_ctx); 2258 } 2259 2260 static struct prog_src_register 2261 mesa_src_reg_from_ir_src_reg(src_reg reg) 2262 { 2263 struct prog_src_register mesa_reg; 2264 2265 mesa_reg.File = reg.file; 2266 assert(reg.index < (1 << INST_INDEX_BITS)); 2267 mesa_reg.Index = reg.index; 2268 mesa_reg.Swizzle = reg.swizzle; 2269 mesa_reg.RelAddr = reg.reladdr != NULL; 2270 mesa_reg.Negate = reg.negate; 2271 2272 return mesa_reg; 2273 } 2274 2275 static void 2276 set_branchtargets(ir_to_mesa_visitor *v, 2277 struct prog_instruction *mesa_instructions, 2278 int num_instructions) 2279 { 2280 int if_count = 0, loop_count = 0; 2281 int *if_stack, *loop_stack; 2282 int if_stack_pos = 0, loop_stack_pos = 0; 2283 int i, j; 2284 2285 for (i = 0; i < num_instructions; i++) { 2286 switch (mesa_instructions[i].Opcode) { 2287 case OPCODE_IF: 2288 if_count++; 2289 break; 2290 case OPCODE_BGNLOOP: 2291 loop_count++; 2292 break; 2293 case OPCODE_BRK: 2294 case OPCODE_CONT: 2295 mesa_instructions[i].BranchTarget = -1; 2296 break; 2297 default: 2298 break; 2299 } 2300 } 2301 2302 if_stack = rzalloc_array(v->mem_ctx, int, if_count); 2303 loop_stack = rzalloc_array(v->mem_ctx, int, loop_count); 2304 2305 for (i = 0; i < num_instructions; i++) { 2306 switch (mesa_instructions[i].Opcode) { 2307 case OPCODE_IF: 2308 if_stack[if_stack_pos] = i; 2309 if_stack_pos++; 2310 break; 2311 case OPCODE_ELSE: 2312 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; 2313 if_stack[if_stack_pos - 1] = i; 2314 break; 2315 case OPCODE_ENDIF: 2316 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; 2317 if_stack_pos--; 2318 break; 2319 case OPCODE_BGNLOOP: 2320 loop_stack[loop_stack_pos] = i; 2321 loop_stack_pos++; 2322 break; 2323 case OPCODE_ENDLOOP: 2324 loop_stack_pos--; 2325 /* Rewrite any breaks/conts at this nesting level (haven't 2326 * already had a BranchTarget assigned) to point to the end 2327 * of the loop. 2328 */ 2329 for (j = loop_stack[loop_stack_pos]; j < i; j++) { 2330 if (mesa_instructions[j].Opcode == OPCODE_BRK || 2331 mesa_instructions[j].Opcode == OPCODE_CONT) { 2332 if (mesa_instructions[j].BranchTarget == -1) { 2333 mesa_instructions[j].BranchTarget = i; 2334 } 2335 } 2336 } 2337 /* The loop ends point at each other. */ 2338 mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos]; 2339 mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i; 2340 break; 2341 case OPCODE_CAL: 2342 foreach_in_list(function_entry, entry, &v->function_signatures) { 2343 if (entry->sig_id == mesa_instructions[i].BranchTarget) { 2344 mesa_instructions[i].BranchTarget = entry->inst; 2345 break; 2346 } 2347 } 2348 break; 2349 default: 2350 break; 2351 } 2352 } 2353 } 2354 2355 static void 2356 print_program(struct prog_instruction *mesa_instructions, 2357 ir_instruction **mesa_instruction_annotation, 2358 int num_instructions) 2359 { 2360 ir_instruction *last_ir = NULL; 2361 int i; 2362 int indent = 0; 2363 2364 for (i = 0; i < num_instructions; i++) { 2365 struct prog_instruction *mesa_inst = mesa_instructions + i; 2366 ir_instruction *ir = mesa_instruction_annotation[i]; 2367 2368 fprintf(stdout, "%3d: ", i); 2369 2370 if (last_ir != ir && ir) { 2371 int j; 2372 2373 for (j = 0; j < indent; j++) { 2374 fprintf(stdout, " "); 2375 } 2376 ir->print(); 2377 printf("\n"); 2378 last_ir = ir; 2379 2380 fprintf(stdout, " "); /* line number spacing. */ 2381 } 2382 2383 indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent, 2384 PROG_PRINT_DEBUG, NULL); 2385 } 2386 } 2387 2388 namespace { 2389 2390 class add_uniform_to_shader : public program_resource_visitor { 2391 public: 2392 add_uniform_to_shader(struct gl_context *ctx, 2393 struct gl_shader_program *shader_program, 2394 struct gl_program_parameter_list *params) 2395 : ctx(ctx), params(params), idx(-1) 2396 { 2397 /* empty */ 2398 } 2399 2400 void process(ir_variable *var) 2401 { 2402 this->idx = -1; 2403 this->var = var; 2404 this->program_resource_visitor::process(var, 2405 ctx->Const.UseSTD430AsDefaultPacking); 2406 var->data.param_index = this->idx; 2407 } 2408 2409 private: 2410 virtual void visit_field(const glsl_type *type, const char *name, 2411 bool row_major, const glsl_type *record_type, 2412 const enum glsl_interface_packing packing, 2413 bool last_field); 2414 2415 struct gl_context *ctx; 2416 struct gl_program_parameter_list *params; 2417 int idx; 2418 ir_variable *var; 2419 }; 2420 2421 } /* anonymous namespace */ 2422 2423 void 2424 add_uniform_to_shader::visit_field(const glsl_type *type, const char *name, 2425 bool /* row_major */, 2426 const glsl_type * /* record_type */, 2427 const enum glsl_interface_packing, 2428 bool /* last_field */) 2429 { 2430 /* opaque types don't use storage in the param list unless they are 2431 * bindless samplers or images. 2432 */ 2433 if (type->contains_opaque() && !var->data.bindless) 2434 return; 2435 2436 /* Add the uniform to the param list */ 2437 assert(_mesa_lookup_parameter_index(params, name) < 0); 2438 int index = _mesa_lookup_parameter_index(params, name); 2439 2440 unsigned num_params = type->arrays_of_arrays_size(); 2441 num_params = MAX2(num_params, 1); 2442 num_params *= type->without_array()->matrix_columns; 2443 2444 bool is_dual_slot = type->without_array()->is_dual_slot(); 2445 if (is_dual_slot) 2446 num_params *= 2; 2447 2448 _mesa_reserve_parameter_storage(params, num_params); 2449 index = params->NumParameters; 2450 for (unsigned i = 0; i < num_params; i++) { 2451 unsigned comps = 4; 2452 _mesa_add_parameter(params, PROGRAM_UNIFORM, name, comps, 2453 type->gl_type, NULL, NULL); 2454 } 2455 2456 /* The first part of the uniform that's processed determines the base 2457 * location of the whole uniform (for structures). 2458 */ 2459 if (this->idx < 0) 2460 this->idx = index; 2461 } 2462 2463 /** 2464 * Generate the program parameters list for the user uniforms in a shader 2465 * 2466 * \param shader_program Linked shader program. This is only used to 2467 * emit possible link errors to the info log. 2468 * \param sh Shader whose uniforms are to be processed. 2469 * \param params Parameter list to be filled in. 2470 */ 2471 void 2472 _mesa_generate_parameters_list_for_uniforms(struct gl_context *ctx, 2473 struct gl_shader_program 2474 *shader_program, 2475 struct gl_linked_shader *sh, 2476 struct gl_program_parameter_list 2477 *params) 2478 { 2479 add_uniform_to_shader add(ctx, shader_program, params); 2480 2481 foreach_in_list(ir_instruction, node, sh->ir) { 2482 ir_variable *var = node->as_variable(); 2483 2484 if ((var == NULL) || (var->data.mode != ir_var_uniform) 2485 || var->is_in_buffer_block() || (strncmp(var->name, "gl_", 3) == 0)) 2486 continue; 2487 2488 add.process(var); 2489 } 2490 } 2491 2492 void 2493 _mesa_associate_uniform_storage(struct gl_context *ctx, 2494 struct gl_shader_program *shader_program, 2495 struct gl_program *prog, 2496 bool propagate_to_storage) 2497 { 2498 struct gl_program_parameter_list *params = prog->Parameters; 2499 gl_shader_stage shader_type = prog->info.stage; 2500 2501 /* After adding each uniform to the parameter list, connect the storage for 2502 * the parameter with the tracking structure used by the API for the 2503 * uniform. 2504 */ 2505 unsigned last_location = unsigned(~0); 2506 for (unsigned i = 0; i < params->NumParameters; i++) { 2507 if (params->Parameters[i].Type != PROGRAM_UNIFORM) 2508 continue; 2509 2510 unsigned location; 2511 const bool found = 2512 shader_program->UniformHash->get(location, params->Parameters[i].Name); 2513 assert(found); 2514 2515 if (!found) 2516 continue; 2517 2518 struct gl_uniform_storage *storage = 2519 &shader_program->data->UniformStorage[location]; 2520 2521 /* Do not associate any uniform storage to built-in uniforms */ 2522 if (storage->builtin) 2523 continue; 2524 2525 if (location != last_location) { 2526 enum gl_uniform_driver_format format = uniform_native; 2527 unsigned columns = 0; 2528 int dmul = 4 * sizeof(float); 2529 2530 switch (storage->type->base_type) { 2531 case GLSL_TYPE_UINT64: 2532 if (storage->type->vector_elements > 2) 2533 dmul *= 2; 2534 /* fallthrough */ 2535 case GLSL_TYPE_UINT: 2536 case GLSL_TYPE_UINT16: 2537 assert(ctx->Const.NativeIntegers); 2538 format = uniform_native; 2539 columns = 1; 2540 break; 2541 case GLSL_TYPE_INT64: 2542 if (storage->type->vector_elements > 2) 2543 dmul *= 2; 2544 /* fallthrough */ 2545 case GLSL_TYPE_INT: 2546 case GLSL_TYPE_INT16: 2547 format = 2548 (ctx->Const.NativeIntegers) ? uniform_native : uniform_int_float; 2549 columns = 1; 2550 break; 2551 case GLSL_TYPE_DOUBLE: 2552 if (storage->type->vector_elements > 2) 2553 dmul *= 2; 2554 /* fallthrough */ 2555 case GLSL_TYPE_FLOAT: 2556 case GLSL_TYPE_FLOAT16: 2557 format = uniform_native; 2558 columns = storage->type->matrix_columns; 2559 break; 2560 case GLSL_TYPE_BOOL: 2561 format = uniform_native; 2562 columns = 1; 2563 break; 2564 case GLSL_TYPE_SAMPLER: 2565 case GLSL_TYPE_IMAGE: 2566 case GLSL_TYPE_SUBROUTINE: 2567 format = uniform_native; 2568 columns = 1; 2569 break; 2570 case GLSL_TYPE_ATOMIC_UINT: 2571 case GLSL_TYPE_ARRAY: 2572 case GLSL_TYPE_VOID: 2573 case GLSL_TYPE_STRUCT: 2574 case GLSL_TYPE_ERROR: 2575 case GLSL_TYPE_INTERFACE: 2576 case GLSL_TYPE_FUNCTION: 2577 assert(!"Should not get here."); 2578 break; 2579 } 2580 2581 _mesa_uniform_attach_driver_storage(storage, dmul * columns, dmul, 2582 format, 2583 ¶ms->ParameterValues[i]); 2584 2585 /* When a bindless sampler/image is bound to a texture/image unit, we 2586 * have to overwrite the constant value by the resident handle 2587 * directly in the constant buffer before the next draw. One solution 2588 * is to keep track a pointer to the base of the data. 2589 */ 2590 if (storage->is_bindless && (prog->sh.NumBindlessSamplers || 2591 prog->sh.NumBindlessImages)) { 2592 unsigned array_elements = MAX2(1, storage->array_elements); 2593 2594 for (unsigned j = 0; j < array_elements; ++j) { 2595 unsigned unit = storage->opaque[shader_type].index + j; 2596 2597 if (storage->type->without_array()->is_sampler()) { 2598 assert(unit >= 0 && unit < prog->sh.NumBindlessSamplers); 2599 prog->sh.BindlessSamplers[unit].data = 2600 ¶ms->ParameterValues[i] + j; 2601 } else if (storage->type->without_array()->is_image()) { 2602 assert(unit >= 0 && unit < prog->sh.NumBindlessImages); 2603 prog->sh.BindlessImages[unit].data = 2604 ¶ms->ParameterValues[i] + j; 2605 } 2606 } 2607 } 2608 2609 /* After attaching the driver's storage to the uniform, propagate any 2610 * data from the linker's backing store. This will cause values from 2611 * initializers in the source code to be copied over. 2612 */ 2613 if (propagate_to_storage) { 2614 unsigned array_elements = MAX2(1, storage->array_elements); 2615 _mesa_propagate_uniforms_to_driver_storage(storage, 0, 2616 array_elements); 2617 } 2618 2619 last_location = location; 2620 } 2621 } 2622 } 2623 2624 /* 2625 * On a basic block basis, tracks available PROGRAM_TEMPORARY register 2626 * channels for copy propagation and updates following instructions to 2627 * use the original versions. 2628 * 2629 * The ir_to_mesa_visitor lazily produces code assuming that this pass 2630 * will occur. As an example, a TXP production before this pass: 2631 * 2632 * 0: MOV TEMP[1], INPUT[4].xyyy; 2633 * 1: MOV TEMP[1].w, INPUT[4].wwww; 2634 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; 2635 * 2636 * and after: 2637 * 2638 * 0: MOV TEMP[1], INPUT[4].xyyy; 2639 * 1: MOV TEMP[1].w, INPUT[4].wwww; 2640 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 2641 * 2642 * which allows for dead code elimination on TEMP[1]'s writes. 2643 */ 2644 void 2645 ir_to_mesa_visitor::copy_propagate(void) 2646 { 2647 ir_to_mesa_instruction **acp = rzalloc_array(mem_ctx, 2648 ir_to_mesa_instruction *, 2649 this->next_temp * 4); 2650 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 2651 int level = 0; 2652 2653 foreach_in_list(ir_to_mesa_instruction, inst, &this->instructions) { 2654 assert(inst->dst.file != PROGRAM_TEMPORARY 2655 || inst->dst.index < this->next_temp); 2656 2657 /* First, do any copy propagation possible into the src regs. */ 2658 for (int r = 0; r < 3; r++) { 2659 ir_to_mesa_instruction *first = NULL; 2660 bool good = true; 2661 int acp_base = inst->src[r].index * 4; 2662 2663 if (inst->src[r].file != PROGRAM_TEMPORARY || 2664 inst->src[r].reladdr) 2665 continue; 2666 2667 /* See if we can find entries in the ACP consisting of MOVs 2668 * from the same src register for all the swizzled channels 2669 * of this src register reference. 2670 */ 2671 for (int i = 0; i < 4; i++) { 2672 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 2673 ir_to_mesa_instruction *copy_chan = acp[acp_base + src_chan]; 2674 2675 if (!copy_chan) { 2676 good = false; 2677 break; 2678 } 2679 2680 assert(acp_level[acp_base + src_chan] <= level); 2681 2682 if (!first) { 2683 first = copy_chan; 2684 } else { 2685 if (first->src[0].file != copy_chan->src[0].file || 2686 first->src[0].index != copy_chan->src[0].index) { 2687 good = false; 2688 break; 2689 } 2690 } 2691 } 2692 2693 if (good) { 2694 /* We've now validated that we can copy-propagate to 2695 * replace this src register reference. Do it. 2696 */ 2697 inst->src[r].file = first->src[0].file; 2698 inst->src[r].index = first->src[0].index; 2699 2700 int swizzle = 0; 2701 for (int i = 0; i < 4; i++) { 2702 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 2703 ir_to_mesa_instruction *copy_inst = acp[acp_base + src_chan]; 2704 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << 2705 (3 * i)); 2706 } 2707 inst->src[r].swizzle = swizzle; 2708 } 2709 } 2710 2711 switch (inst->op) { 2712 case OPCODE_BGNLOOP: 2713 case OPCODE_ENDLOOP: 2714 /* End of a basic block, clear the ACP entirely. */ 2715 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 2716 break; 2717 2718 case OPCODE_IF: 2719 ++level; 2720 break; 2721 2722 case OPCODE_ENDIF: 2723 case OPCODE_ELSE: 2724 /* Clear all channels written inside the block from the ACP, but 2725 * leaving those that were not touched. 2726 */ 2727 for (int r = 0; r < this->next_temp; r++) { 2728 for (int c = 0; c < 4; c++) { 2729 if (!acp[4 * r + c]) 2730 continue; 2731 2732 if (acp_level[4 * r + c] >= level) 2733 acp[4 * r + c] = NULL; 2734 } 2735 } 2736 if (inst->op == OPCODE_ENDIF) 2737 --level; 2738 break; 2739 2740 default: 2741 /* Continuing the block, clear any written channels from 2742 * the ACP. 2743 */ 2744 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { 2745 /* Any temporary might be written, so no copy propagation 2746 * across this instruction. 2747 */ 2748 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 2749 } else if (inst->dst.file == PROGRAM_OUTPUT && 2750 inst->dst.reladdr) { 2751 /* Any output might be written, so no copy propagation 2752 * from outputs across this instruction. 2753 */ 2754 for (int r = 0; r < this->next_temp; r++) { 2755 for (int c = 0; c < 4; c++) { 2756 if (!acp[4 * r + c]) 2757 continue; 2758 2759 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) 2760 acp[4 * r + c] = NULL; 2761 } 2762 } 2763 } else if (inst->dst.file == PROGRAM_TEMPORARY || 2764 inst->dst.file == PROGRAM_OUTPUT) { 2765 /* Clear where it's used as dst. */ 2766 if (inst->dst.file == PROGRAM_TEMPORARY) { 2767 for (int c = 0; c < 4; c++) { 2768 if (inst->dst.writemask & (1 << c)) { 2769 acp[4 * inst->dst.index + c] = NULL; 2770 } 2771 } 2772 } 2773 2774 /* Clear where it's used as src. */ 2775 for (int r = 0; r < this->next_temp; r++) { 2776 for (int c = 0; c < 4; c++) { 2777 if (!acp[4 * r + c]) 2778 continue; 2779 2780 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); 2781 2782 if (acp[4 * r + c]->src[0].file == inst->dst.file && 2783 acp[4 * r + c]->src[0].index == inst->dst.index && 2784 inst->dst.writemask & (1 << src_chan)) 2785 { 2786 acp[4 * r + c] = NULL; 2787 } 2788 } 2789 } 2790 } 2791 break; 2792 } 2793 2794 /* If this is a copy, add it to the ACP. */ 2795 if (inst->op == OPCODE_MOV && 2796 inst->dst.file == PROGRAM_TEMPORARY && 2797 !(inst->dst.file == inst->src[0].file && 2798 inst->dst.index == inst->src[0].index) && 2799 !inst->dst.reladdr && 2800 !inst->saturate && 2801 !inst->src[0].reladdr && 2802 !inst->src[0].negate) { 2803 for (int i = 0; i < 4; i++) { 2804 if (inst->dst.writemask & (1 << i)) { 2805 acp[4 * inst->dst.index + i] = inst; 2806 acp_level[4 * inst->dst.index + i] = level; 2807 } 2808 } 2809 } 2810 } 2811 2812 ralloc_free(acp_level); 2813 ralloc_free(acp); 2814 } 2815 2816 2817 /** 2818 * Convert a shader's GLSL IR into a Mesa gl_program. 2819 */ 2820 static struct gl_program * 2821 get_mesa_program(struct gl_context *ctx, 2822 struct gl_shader_program *shader_program, 2823 struct gl_linked_shader *shader) 2824 { 2825 ir_to_mesa_visitor v; 2826 struct prog_instruction *mesa_instructions, *mesa_inst; 2827 ir_instruction **mesa_instruction_annotation; 2828 int i; 2829 struct gl_program *prog; 2830 GLenum target = _mesa_shader_stage_to_program(shader->Stage); 2831 const char *target_string = _mesa_shader_stage_to_string(shader->Stage); 2832 struct gl_shader_compiler_options *options = 2833 &ctx->Const.ShaderCompilerOptions[shader->Stage]; 2834 2835 validate_ir_tree(shader->ir); 2836 2837 prog = shader->Program; 2838 prog->Parameters = _mesa_new_parameter_list(); 2839 v.ctx = ctx; 2840 v.prog = prog; 2841 v.shader_program = shader_program; 2842 v.options = options; 2843 2844 _mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader, 2845 prog->Parameters); 2846 2847 /* Emit Mesa IR for main(). */ 2848 visit_exec_list(shader->ir, &v); 2849 v.emit(NULL, OPCODE_END); 2850 2851 prog->arb.NumTemporaries = v.next_temp; 2852 2853 unsigned num_instructions = v.instructions.length(); 2854 2855 mesa_instructions = rzalloc_array(prog, struct prog_instruction, 2856 num_instructions); 2857 mesa_instruction_annotation = ralloc_array(v.mem_ctx, ir_instruction *, 2858 num_instructions); 2859 2860 v.copy_propagate(); 2861 2862 /* Convert ir_mesa_instructions into prog_instructions. 2863 */ 2864 mesa_inst = mesa_instructions; 2865 i = 0; 2866 foreach_in_list(const ir_to_mesa_instruction, inst, &v.instructions) { 2867 mesa_inst->Opcode = inst->op; 2868 if (inst->saturate) 2869 mesa_inst->Saturate = GL_TRUE; 2870 mesa_inst->DstReg.File = inst->dst.file; 2871 mesa_inst->DstReg.Index = inst->dst.index; 2872 mesa_inst->DstReg.WriteMask = inst->dst.writemask; 2873 mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL; 2874 mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src[0]); 2875 mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src[1]); 2876 mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src[2]); 2877 mesa_inst->TexSrcUnit = inst->sampler; 2878 mesa_inst->TexSrcTarget = inst->tex_target; 2879 mesa_inst->TexShadow = inst->tex_shadow; 2880 mesa_instruction_annotation[i] = inst->ir; 2881 2882 /* Set IndirectRegisterFiles. */ 2883 if (mesa_inst->DstReg.RelAddr) 2884 prog->arb.IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File; 2885 2886 /* Update program's bitmask of indirectly accessed register files */ 2887 for (unsigned src = 0; src < 3; src++) 2888 if (mesa_inst->SrcReg[src].RelAddr) 2889 prog->arb.IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File; 2890 2891 switch (mesa_inst->Opcode) { 2892 case OPCODE_IF: 2893 if (options->MaxIfDepth == 0) { 2894 linker_warning(shader_program, 2895 "Couldn't flatten if-statement. " 2896 "This will likely result in software " 2897 "rasterization.\n"); 2898 } 2899 break; 2900 case OPCODE_BGNLOOP: 2901 if (options->EmitNoLoops) { 2902 linker_warning(shader_program, 2903 "Couldn't unroll loop. " 2904 "This will likely result in software " 2905 "rasterization.\n"); 2906 } 2907 break; 2908 case OPCODE_CONT: 2909 if (options->EmitNoCont) { 2910 linker_warning(shader_program, 2911 "Couldn't lower continue-statement. " 2912 "This will likely result in software " 2913 "rasterization.\n"); 2914 } 2915 break; 2916 case OPCODE_ARL: 2917 prog->arb.NumAddressRegs = 1; 2918 break; 2919 default: 2920 break; 2921 } 2922 2923 mesa_inst++; 2924 i++; 2925 2926 if (!shader_program->data->LinkStatus) 2927 break; 2928 } 2929 2930 if (!shader_program->data->LinkStatus) { 2931 goto fail_exit; 2932 } 2933 2934 set_branchtargets(&v, mesa_instructions, num_instructions); 2935 2936 if (ctx->_Shader->Flags & GLSL_DUMP) { 2937 fprintf(stderr, "\n"); 2938 fprintf(stderr, "GLSL IR for linked %s program %d:\n", target_string, 2939 shader_program->Name); 2940 _mesa_print_ir(stderr, shader->ir, NULL); 2941 fprintf(stderr, "\n"); 2942 fprintf(stderr, "\n"); 2943 fprintf(stderr, "Mesa IR for linked %s program %d:\n", target_string, 2944 shader_program->Name); 2945 print_program(mesa_instructions, mesa_instruction_annotation, 2946 num_instructions); 2947 fflush(stderr); 2948 } 2949 2950 prog->arb.Instructions = mesa_instructions; 2951 prog->arb.NumInstructions = num_instructions; 2952 2953 /* Setting this to NULL prevents a possible double free in the fail_exit 2954 * path (far below). 2955 */ 2956 mesa_instructions = NULL; 2957 2958 do_set_program_inouts(shader->ir, prog, shader->Stage); 2959 2960 prog->ShadowSamplers = shader->shadow_samplers; 2961 prog->ExternalSamplersUsed = gl_external_samplers(prog); 2962 _mesa_update_shader_textures_used(shader_program, prog); 2963 2964 /* Set the gl_FragDepth layout. */ 2965 if (target == GL_FRAGMENT_PROGRAM_ARB) { 2966 prog->info.fs.depth_layout = shader_program->FragDepthLayout; 2967 } 2968 2969 _mesa_optimize_program(prog, prog); 2970 2971 /* This has to be done last. Any operation that can cause 2972 * prog->ParameterValues to get reallocated (e.g., anything that adds a 2973 * program constant) has to happen before creating this linkage. 2974 */ 2975 _mesa_associate_uniform_storage(ctx, shader_program, prog, true); 2976 if (!shader_program->data->LinkStatus) { 2977 goto fail_exit; 2978 } 2979 2980 return prog; 2981 2982 fail_exit: 2983 ralloc_free(mesa_instructions); 2984 _mesa_reference_program(ctx, &shader->Program, NULL); 2985 return NULL; 2986 } 2987 2988 extern "C" { 2989 2990 /** 2991 * Link a shader. 2992 * Called via ctx->Driver.LinkShader() 2993 * This actually involves converting GLSL IR into Mesa gl_programs with 2994 * code lowering and other optimizations. 2995 */ 2996 GLboolean 2997 _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 2998 { 2999 assert(prog->data->LinkStatus); 3000 3001 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 3002 if (prog->_LinkedShaders[i] == NULL) 3003 continue; 3004 3005 bool progress; 3006 exec_list *ir = prog->_LinkedShaders[i]->ir; 3007 const struct gl_shader_compiler_options *options = 3008 &ctx->Const.ShaderCompilerOptions[prog->_LinkedShaders[i]->Stage]; 3009 3010 do { 3011 progress = false; 3012 3013 /* Lowering */ 3014 do_mat_op_to_vec(ir); 3015 lower_instructions(ir, (MOD_TO_FLOOR | DIV_TO_MUL_RCP | EXP_TO_EXP2 3016 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP 3017 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); 3018 3019 progress = do_common_optimization(ir, true, true, 3020 options, ctx->Const.NativeIntegers) 3021 || progress; 3022 3023 progress = lower_quadop_vector(ir, true) || progress; 3024 3025 if (options->MaxIfDepth == 0) 3026 progress = lower_discard(ir) || progress; 3027 3028 progress = lower_if_to_cond_assign((gl_shader_stage)i, ir, 3029 options->MaxIfDepth) || progress; 3030 3031 progress = lower_noise(ir) || progress; 3032 3033 /* If there are forms of indirect addressing that the driver 3034 * cannot handle, perform the lowering pass. 3035 */ 3036 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput 3037 || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) 3038 progress = 3039 lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir, 3040 options->EmitNoIndirectInput, 3041 options->EmitNoIndirectOutput, 3042 options->EmitNoIndirectTemp, 3043 options->EmitNoIndirectUniform) 3044 || progress; 3045 3046 progress = do_vec_index_to_cond_assign(ir) || progress; 3047 progress = lower_vector_insert(ir, true) || progress; 3048 } while (progress); 3049 3050 validate_ir_tree(ir); 3051 } 3052 3053 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 3054 struct gl_program *linked_prog; 3055 3056 if (prog->_LinkedShaders[i] == NULL) 3057 continue; 3058 3059 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); 3060 3061 if (linked_prog) { 3062 _mesa_copy_linked_program_data(prog, prog->_LinkedShaders[i]); 3063 3064 if (!ctx->Driver.ProgramStringNotify(ctx, 3065 _mesa_shader_stage_to_program(i), 3066 linked_prog)) { 3067 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, 3068 NULL); 3069 return GL_FALSE; 3070 } 3071 } 3072 } 3073 3074 build_program_resource_list(ctx, prog); 3075 return prog->data->LinkStatus; 3076 } 3077 3078 /** 3079 * Link a GLSL shader program. Called via glLinkProgram(). 3080 */ 3081 void 3082 _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 3083 { 3084 unsigned int i; 3085 bool spirv; 3086 3087 _mesa_clear_shader_program_data(ctx, prog); 3088 3089 prog->data = _mesa_create_shader_program_data(); 3090 3091 prog->data->LinkStatus = linking_success; 3092 3093 for (i = 0; i < prog->NumShaders; i++) { 3094 if (!prog->Shaders[i]->CompileStatus) { 3095 linker_error(prog, "linking with uncompiled/unspecialized shader"); 3096 } 3097 3098 if (!i) { 3099 spirv = (prog->Shaders[i]->spirv_data != NULL); 3100 } else if (spirv && !prog->Shaders[i]->spirv_data) { 3101 /* The GL_ARB_gl_spirv spec adds a new bullet point to the list of 3102 * reasons LinkProgram can fail: 3103 * 3104 * "All the shader objects attached to <program> do not have the 3105 * same value for the SPIR_V_BINARY_ARB state." 3106 */ 3107 linker_error(prog, 3108 "not all attached shaders have the same " 3109 "SPIR_V_BINARY_ARB state"); 3110 } 3111 } 3112 3113 if (prog->data->LinkStatus) { 3114 link_shaders(ctx, prog); 3115 } 3116 3117 /* If LinkStatus is linking_success, then reset sampler validated to true. 3118 * Validation happens via the LinkShader call below. If LinkStatus is 3119 * linking_skipped, then SamplersValidated will have been restored from the 3120 * shader cache. 3121 */ 3122 if (prog->data->LinkStatus == linking_success) { 3123 prog->SamplersValidated = GL_TRUE; 3124 } 3125 3126 if (prog->data->LinkStatus && !ctx->Driver.LinkShader(ctx, prog)) { 3127 prog->data->LinkStatus = linking_failure; 3128 } 3129 3130 /* Return early if we are loading the shader from on-disk cache */ 3131 if (prog->data->LinkStatus == linking_skipped) 3132 return; 3133 3134 if (ctx->_Shader->Flags & GLSL_DUMP) { 3135 if (!prog->data->LinkStatus) { 3136 fprintf(stderr, "GLSL shader program %d failed to link\n", prog->Name); 3137 } 3138 3139 if (prog->data->InfoLog && prog->data->InfoLog[0] != 0) { 3140 fprintf(stderr, "GLSL shader program %d info log:\n", prog->Name); 3141 fprintf(stderr, "%s\n", prog->data->InfoLog); 3142 } 3143 } 3144 3145 #ifdef ENABLE_SHADER_CACHE 3146 if (prog->data->LinkStatus) 3147 shader_cache_write_program_metadata(ctx, prog); 3148 #endif 3149 } 3150 3151 } /* extern "C" */ 3152