1 /* 2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. 3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved. 4 * Copyright 2010 Intel Corporation 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 * DEALINGS IN THE SOFTWARE. 24 */ 25 26 /** 27 * \file ir_to_mesa.cpp 28 * 29 * Translate GLSL IR to Mesa's gl_program representation. 30 */ 31 32 #include <stdio.h> 33 #include "main/compiler.h" 34 #include "ir.h" 35 #include "ir_visitor.h" 36 #include "ir_print_visitor.h" 37 #include "ir_expression_flattening.h" 38 #include "ir_uniform.h" 39 #include "glsl_types.h" 40 #include "glsl_parser_extras.h" 41 #include "../glsl/program.h" 42 #include "ir_optimization.h" 43 #include "ast.h" 44 #include "linker.h" 45 46 #include "main/mtypes.h" 47 #include "main/shaderobj.h" 48 #include "program/hash_table.h" 49 50 extern "C" { 51 #include "main/shaderapi.h" 52 #include "main/uniforms.h" 53 #include "program/prog_instruction.h" 54 #include "program/prog_optimize.h" 55 #include "program/prog_print.h" 56 #include "program/program.h" 57 #include "program/prog_parameter.h" 58 #include "program/sampler.h" 59 } 60 61 class src_reg; 62 class dst_reg; 63 64 static int swizzle_for_size(int size); 65 66 /** 67 * This struct is a corresponding struct to Mesa prog_src_register, with 68 * wider fields. 69 */ 70 class src_reg { 71 public: 72 src_reg(gl_register_file file, int index, const glsl_type *type) 73 { 74 this->file = file; 75 this->index = index; 76 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) 77 this->swizzle = swizzle_for_size(type->vector_elements); 78 else 79 this->swizzle = SWIZZLE_XYZW; 80 this->negate = 0; 81 this->reladdr = NULL; 82 } 83 84 src_reg() 85 { 86 this->file = PROGRAM_UNDEFINED; 87 this->index = 0; 88 this->swizzle = 0; 89 this->negate = 0; 90 this->reladdr = NULL; 91 } 92 93 explicit src_reg(dst_reg reg); 94 95 gl_register_file file; /**< PROGRAM_* from Mesa */ 96 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 97 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ 98 int negate; /**< NEGATE_XYZW mask from mesa */ 99 /** Register index should be offset by the integer in this reg. */ 100 src_reg *reladdr; 101 }; 102 103 class dst_reg { 104 public: 105 dst_reg(gl_register_file file, int writemask) 106 { 107 this->file = file; 108 this->index = 0; 109 this->writemask = writemask; 110 this->cond_mask = COND_TR; 111 this->reladdr = NULL; 112 } 113 114 dst_reg() 115 { 116 this->file = PROGRAM_UNDEFINED; 117 this->index = 0; 118 this->writemask = 0; 119 this->cond_mask = COND_TR; 120 this->reladdr = NULL; 121 } 122 123 explicit dst_reg(src_reg reg); 124 125 gl_register_file file; /**< PROGRAM_* from Mesa */ 126 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 127 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ 128 GLuint cond_mask:4; 129 /** Register index should be offset by the integer in this reg. */ 130 src_reg *reladdr; 131 }; 132 133 src_reg::src_reg(dst_reg reg) 134 { 135 this->file = reg.file; 136 this->index = reg.index; 137 this->swizzle = SWIZZLE_XYZW; 138 this->negate = 0; 139 this->reladdr = reg.reladdr; 140 } 141 142 dst_reg::dst_reg(src_reg reg) 143 { 144 this->file = reg.file; 145 this->index = reg.index; 146 this->writemask = WRITEMASK_XYZW; 147 this->cond_mask = COND_TR; 148 this->reladdr = reg.reladdr; 149 } 150 151 class ir_to_mesa_instruction : public exec_node { 152 public: 153 /* Callers of this ralloc-based new need not call delete. It's 154 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 155 static void* operator new(size_t size, void *ctx) 156 { 157 void *node; 158 159 node = rzalloc_size(ctx, size); 160 assert(node != NULL); 161 162 return node; 163 } 164 165 enum prog_opcode op; 166 dst_reg dst; 167 src_reg src[3]; 168 /** Pointer to the ir source this tree came from for debugging */ 169 ir_instruction *ir; 170 GLboolean cond_update; 171 bool saturate; 172 int sampler; /**< sampler index */ 173 int tex_target; /**< One of TEXTURE_*_INDEX */ 174 GLboolean tex_shadow; 175 }; 176 177 class variable_storage : public exec_node { 178 public: 179 variable_storage(ir_variable *var, gl_register_file file, int index) 180 : file(file), index(index), var(var) 181 { 182 /* empty */ 183 } 184 185 gl_register_file file; 186 int index; 187 ir_variable *var; /* variable that maps to this, if any */ 188 }; 189 190 class function_entry : public exec_node { 191 public: 192 ir_function_signature *sig; 193 194 /** 195 * identifier of this function signature used by the program. 196 * 197 * At the point that Mesa instructions for function calls are 198 * generated, we don't know the address of the first instruction of 199 * the function body. So we make the BranchTarget that is called a 200 * small integer and rewrite them during set_branchtargets(). 201 */ 202 int sig_id; 203 204 /** 205 * Pointer to first instruction of the function body. 206 * 207 * Set during function body emits after main() is processed. 208 */ 209 ir_to_mesa_instruction *bgn_inst; 210 211 /** 212 * Index of the first instruction of the function body in actual 213 * Mesa IR. 214 * 215 * Set after convertion from ir_to_mesa_instruction to prog_instruction. 216 */ 217 int inst; 218 219 /** Storage for the return value. */ 220 src_reg return_reg; 221 }; 222 223 class ir_to_mesa_visitor : public ir_visitor { 224 public: 225 ir_to_mesa_visitor(); 226 ~ir_to_mesa_visitor(); 227 228 function_entry *current_function; 229 230 struct gl_context *ctx; 231 struct gl_program *prog; 232 struct gl_shader_program *shader_program; 233 struct gl_shader_compiler_options *options; 234 235 int next_temp; 236 237 variable_storage *find_variable_storage(ir_variable *var); 238 239 src_reg get_temp(const glsl_type *type); 240 void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr); 241 242 src_reg src_reg_for_float(float val); 243 244 /** 245 * \name Visit methods 246 * 247 * As typical for the visitor pattern, there must be one \c visit method for 248 * each concrete subclass of \c ir_instruction. Virtual base classes within 249 * the hierarchy should not have \c visit methods. 250 */ 251 /*@{*/ 252 virtual void visit(ir_variable *); 253 virtual void visit(ir_loop *); 254 virtual void visit(ir_loop_jump *); 255 virtual void visit(ir_function_signature *); 256 virtual void visit(ir_function *); 257 virtual void visit(ir_expression *); 258 virtual void visit(ir_swizzle *); 259 virtual void visit(ir_dereference_variable *); 260 virtual void visit(ir_dereference_array *); 261 virtual void visit(ir_dereference_record *); 262 virtual void visit(ir_assignment *); 263 virtual void visit(ir_constant *); 264 virtual void visit(ir_call *); 265 virtual void visit(ir_return *); 266 virtual void visit(ir_discard *); 267 virtual void visit(ir_texture *); 268 virtual void visit(ir_if *); 269 /*@}*/ 270 271 src_reg result; 272 273 /** List of variable_storage */ 274 exec_list variables; 275 276 /** List of function_entry */ 277 exec_list function_signatures; 278 int next_signature_id; 279 280 /** List of ir_to_mesa_instruction */ 281 exec_list instructions; 282 283 ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op); 284 285 ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, 286 dst_reg dst, src_reg src0); 287 288 ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, 289 dst_reg dst, src_reg src0, src_reg src1); 290 291 ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, 292 dst_reg dst, 293 src_reg src0, src_reg src1, src_reg src2); 294 295 /** 296 * Emit the correct dot-product instruction for the type of arguments 297 */ 298 ir_to_mesa_instruction * emit_dp(ir_instruction *ir, 299 dst_reg dst, 300 src_reg src0, 301 src_reg src1, 302 unsigned elements); 303 304 void emit_scalar(ir_instruction *ir, enum prog_opcode op, 305 dst_reg dst, src_reg src0); 306 307 void emit_scalar(ir_instruction *ir, enum prog_opcode op, 308 dst_reg dst, src_reg src0, src_reg src1); 309 310 void emit_scs(ir_instruction *ir, enum prog_opcode op, 311 dst_reg dst, const src_reg &src); 312 313 bool try_emit_mad(ir_expression *ir, 314 int mul_operand); 315 bool try_emit_mad_for_and_not(ir_expression *ir, 316 int mul_operand); 317 bool try_emit_sat(ir_expression *ir); 318 319 void emit_swz(ir_expression *ir); 320 321 bool process_move_condition(ir_rvalue *ir); 322 323 void copy_propagate(void); 324 325 void *mem_ctx; 326 }; 327 328 src_reg undef_src = src_reg(PROGRAM_UNDEFINED, 0, NULL); 329 330 dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP); 331 332 dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X); 333 334 static int 335 swizzle_for_size(int size) 336 { 337 static const int size_swizzles[4] = { 338 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), 339 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), 340 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), 341 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), 342 }; 343 344 assert((size >= 1) && (size <= 4)); 345 return size_swizzles[size - 1]; 346 } 347 348 ir_to_mesa_instruction * 349 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, 350 dst_reg dst, 351 src_reg src0, src_reg src1, src_reg src2) 352 { 353 ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction(); 354 int num_reladdr = 0; 355 356 /* If we have to do relative addressing, we want to load the ARL 357 * reg directly for one of the regs, and preload the other reladdr 358 * sources into temps. 359 */ 360 num_reladdr += dst.reladdr != NULL; 361 num_reladdr += src0.reladdr != NULL; 362 num_reladdr += src1.reladdr != NULL; 363 num_reladdr += src2.reladdr != NULL; 364 365 reladdr_to_temp(ir, &src2, &num_reladdr); 366 reladdr_to_temp(ir, &src1, &num_reladdr); 367 reladdr_to_temp(ir, &src0, &num_reladdr); 368 369 if (dst.reladdr) { 370 emit(ir, OPCODE_ARL, address_reg, *dst.reladdr); 371 num_reladdr--; 372 } 373 assert(num_reladdr == 0); 374 375 inst->op = op; 376 inst->dst = dst; 377 inst->src[0] = src0; 378 inst->src[1] = src1; 379 inst->src[2] = src2; 380 inst->ir = ir; 381 382 this->instructions.push_tail(inst); 383 384 return inst; 385 } 386 387 388 ir_to_mesa_instruction * 389 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, 390 dst_reg dst, src_reg src0, src_reg src1) 391 { 392 return emit(ir, op, dst, src0, src1, undef_src); 393 } 394 395 ir_to_mesa_instruction * 396 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, 397 dst_reg dst, src_reg src0) 398 { 399 assert(dst.writemask != 0); 400 return emit(ir, op, dst, src0, undef_src, undef_src); 401 } 402 403 ir_to_mesa_instruction * 404 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op) 405 { 406 return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); 407 } 408 409 ir_to_mesa_instruction * 410 ir_to_mesa_visitor::emit_dp(ir_instruction *ir, 411 dst_reg dst, src_reg src0, src_reg src1, 412 unsigned elements) 413 { 414 static const gl_inst_opcode dot_opcodes[] = { 415 OPCODE_DP2, OPCODE_DP3, OPCODE_DP4 416 }; 417 418 return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); 419 } 420 421 /** 422 * Emits Mesa scalar opcodes to produce unique answers across channels. 423 * 424 * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X 425 * channel determines the result across all channels. So to do a vec4 426 * of this operation, we want to emit a scalar per source channel used 427 * to produce dest channels. 428 */ 429 void 430 ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, 431 dst_reg dst, 432 src_reg orig_src0, src_reg orig_src1) 433 { 434 int i, j; 435 int done_mask = ~dst.writemask; 436 437 /* Mesa RCP is a scalar operation splatting results to all channels, 438 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our 439 * dst channels. 440 */ 441 for (i = 0; i < 4; i++) { 442 GLuint this_mask = (1 << i); 443 ir_to_mesa_instruction *inst; 444 src_reg src0 = orig_src0; 445 src_reg src1 = orig_src1; 446 447 if (done_mask & this_mask) 448 continue; 449 450 GLuint src0_swiz = GET_SWZ(src0.swizzle, i); 451 GLuint src1_swiz = GET_SWZ(src1.swizzle, i); 452 for (j = i + 1; j < 4; j++) { 453 /* If there is another enabled component in the destination that is 454 * derived from the same inputs, generate its value on this pass as 455 * well. 456 */ 457 if (!(done_mask & (1 << j)) && 458 GET_SWZ(src0.swizzle, j) == src0_swiz && 459 GET_SWZ(src1.swizzle, j) == src1_swiz) { 460 this_mask |= (1 << j); 461 } 462 } 463 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 464 src0_swiz, src0_swiz); 465 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, 466 src1_swiz, src1_swiz); 467 468 inst = emit(ir, op, dst, src0, src1); 469 inst->dst.writemask = this_mask; 470 done_mask |= this_mask; 471 } 472 } 473 474 void 475 ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, 476 dst_reg dst, src_reg src0) 477 { 478 src_reg undef = undef_src; 479 480 undef.swizzle = SWIZZLE_XXXX; 481 482 emit_scalar(ir, op, dst, src0, undef); 483 } 484 485 /** 486 * Emit an OPCODE_SCS instruction 487 * 488 * The \c SCS opcode functions a bit differently than the other Mesa (or 489 * ARB_fragment_program) opcodes. Instead of splatting its result across all 490 * four components of the destination, it writes one value to the \c x 491 * component and another value to the \c y component. 492 * 493 * \param ir IR instruction being processed 494 * \param op Either \c OPCODE_SIN or \c OPCODE_COS depending on which 495 * value is desired. 496 * \param dst Destination register 497 * \param src Source register 498 */ 499 void 500 ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op, 501 dst_reg dst, 502 const src_reg &src) 503 { 504 /* Vertex programs cannot use the SCS opcode. 505 */ 506 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) { 507 emit_scalar(ir, op, dst, src); 508 return; 509 } 510 511 const unsigned component = (op == OPCODE_SIN) ? 0 : 1; 512 const unsigned scs_mask = (1U << component); 513 int done_mask = ~dst.writemask; 514 src_reg tmp; 515 516 assert(op == OPCODE_SIN || op == OPCODE_COS); 517 518 /* If there are compnents in the destination that differ from the component 519 * that will be written by the SCS instrution, we'll need a temporary. 520 */ 521 if (scs_mask != unsigned(dst.writemask)) { 522 tmp = get_temp(glsl_type::vec4_type); 523 } 524 525 for (unsigned i = 0; i < 4; i++) { 526 unsigned this_mask = (1U << i); 527 src_reg src0 = src; 528 529 if ((done_mask & this_mask) != 0) 530 continue; 531 532 /* The source swizzle specified which component of the source generates 533 * sine / cosine for the current component in the destination. The SCS 534 * instruction requires that this value be swizzle to the X component. 535 * Replace the current swizzle with a swizzle that puts the source in 536 * the X component. 537 */ 538 unsigned src0_swiz = GET_SWZ(src.swizzle, i); 539 540 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 541 src0_swiz, src0_swiz); 542 for (unsigned j = i + 1; j < 4; j++) { 543 /* If there is another enabled component in the destination that is 544 * derived from the same inputs, generate its value on this pass as 545 * well. 546 */ 547 if (!(done_mask & (1 << j)) && 548 GET_SWZ(src0.swizzle, j) == src0_swiz) { 549 this_mask |= (1 << j); 550 } 551 } 552 553 if (this_mask != scs_mask) { 554 ir_to_mesa_instruction *inst; 555 dst_reg tmp_dst = dst_reg(tmp); 556 557 /* Emit the SCS instruction. 558 */ 559 inst = emit(ir, OPCODE_SCS, tmp_dst, src0); 560 inst->dst.writemask = scs_mask; 561 562 /* Move the result of the SCS instruction to the desired location in 563 * the destination. 564 */ 565 tmp.swizzle = MAKE_SWIZZLE4(component, component, 566 component, component); 567 inst = emit(ir, OPCODE_SCS, dst, tmp); 568 inst->dst.writemask = this_mask; 569 } else { 570 /* Emit the SCS instruction to write directly to the destination. 571 */ 572 ir_to_mesa_instruction *inst = emit(ir, OPCODE_SCS, dst, src0); 573 inst->dst.writemask = scs_mask; 574 } 575 576 done_mask |= this_mask; 577 } 578 } 579 580 src_reg 581 ir_to_mesa_visitor::src_reg_for_float(float val) 582 { 583 src_reg src(PROGRAM_CONSTANT, -1, NULL); 584 585 src.index = _mesa_add_unnamed_constant(this->prog->Parameters, 586 (const gl_constant_value *)&val, 1, &src.swizzle); 587 588 return src; 589 } 590 591 static int 592 type_size(const struct glsl_type *type) 593 { 594 unsigned int i; 595 int size; 596 597 switch (type->base_type) { 598 case GLSL_TYPE_UINT: 599 case GLSL_TYPE_INT: 600 case GLSL_TYPE_FLOAT: 601 case GLSL_TYPE_BOOL: 602 if (type->is_matrix()) { 603 return type->matrix_columns; 604 } else { 605 /* Regardless of size of vector, it gets a vec4. This is bad 606 * packing for things like floats, but otherwise arrays become a 607 * mess. Hopefully a later pass over the code can pack scalars 608 * down if appropriate. 609 */ 610 return 1; 611 } 612 case GLSL_TYPE_ARRAY: 613 assert(type->length > 0); 614 return type_size(type->fields.array) * type->length; 615 case GLSL_TYPE_STRUCT: 616 size = 0; 617 for (i = 0; i < type->length; i++) { 618 size += type_size(type->fields.structure[i].type); 619 } 620 return size; 621 case GLSL_TYPE_SAMPLER: 622 /* Samplers take up one slot in UNIFORMS[], but they're baked in 623 * at link time. 624 */ 625 return 1; 626 default: 627 assert(0); 628 return 0; 629 } 630 } 631 632 /** 633 * In the initial pass of codegen, we assign temporary numbers to 634 * intermediate results. (not SSA -- variable assignments will reuse 635 * storage). Actual register allocation for the Mesa VM occurs in a 636 * pass over the Mesa IR later. 637 */ 638 src_reg 639 ir_to_mesa_visitor::get_temp(const glsl_type *type) 640 { 641 src_reg src; 642 643 src.file = PROGRAM_TEMPORARY; 644 src.index = next_temp; 645 src.reladdr = NULL; 646 next_temp += type_size(type); 647 648 if (type->is_array() || type->is_record()) { 649 src.swizzle = SWIZZLE_NOOP; 650 } else { 651 src.swizzle = swizzle_for_size(type->vector_elements); 652 } 653 src.negate = 0; 654 655 return src; 656 } 657 658 variable_storage * 659 ir_to_mesa_visitor::find_variable_storage(ir_variable *var) 660 { 661 662 variable_storage *entry; 663 664 foreach_iter(exec_list_iterator, iter, this->variables) { 665 entry = (variable_storage *)iter.get(); 666 667 if (entry->var == var) 668 return entry; 669 } 670 671 return NULL; 672 } 673 674 void 675 ir_to_mesa_visitor::visit(ir_variable *ir) 676 { 677 if (strcmp(ir->name, "gl_FragCoord") == 0) { 678 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 679 680 fp->OriginUpperLeft = ir->origin_upper_left; 681 fp->PixelCenterInteger = ir->pixel_center_integer; 682 } 683 684 if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { 685 unsigned int i; 686 const ir_state_slot *const slots = ir->state_slots; 687 assert(ir->state_slots != NULL); 688 689 /* Check if this statevar's setup in the STATE file exactly 690 * matches how we'll want to reference it as a 691 * struct/array/whatever. If not, then we need to move it into 692 * temporary storage and hope that it'll get copy-propagated 693 * out. 694 */ 695 for (i = 0; i < ir->num_state_slots; i++) { 696 if (slots[i].swizzle != SWIZZLE_XYZW) { 697 break; 698 } 699 } 700 701 variable_storage *storage; 702 dst_reg dst; 703 if (i == ir->num_state_slots) { 704 /* We'll set the index later. */ 705 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); 706 this->variables.push_tail(storage); 707 708 dst = undef_dst; 709 } else { 710 /* The variable_storage constructor allocates slots based on the size 711 * of the type. However, this had better match the number of state 712 * elements that we're going to copy into the new temporary. 713 */ 714 assert((int) ir->num_state_slots == type_size(ir->type)); 715 716 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, 717 this->next_temp); 718 this->variables.push_tail(storage); 719 this->next_temp += type_size(ir->type); 720 721 dst = dst_reg(src_reg(PROGRAM_TEMPORARY, storage->index, NULL)); 722 } 723 724 725 for (unsigned int i = 0; i < ir->num_state_slots; i++) { 726 int index = _mesa_add_state_reference(this->prog->Parameters, 727 (gl_state_index *)slots[i].tokens); 728 729 if (storage->file == PROGRAM_STATE_VAR) { 730 if (storage->index == -1) { 731 storage->index = index; 732 } else { 733 assert(index == storage->index + (int)i); 734 } 735 } else { 736 src_reg src(PROGRAM_STATE_VAR, index, NULL); 737 src.swizzle = slots[i].swizzle; 738 emit(ir, OPCODE_MOV, dst, src); 739 /* even a float takes up a whole vec4 reg in a struct/array. */ 740 dst.index++; 741 } 742 } 743 744 if (storage->file == PROGRAM_TEMPORARY && 745 dst.index != storage->index + (int) ir->num_state_slots) { 746 linker_error(this->shader_program, 747 "failed to load builtin uniform `%s' " 748 "(%d/%d regs loaded)\n", 749 ir->name, dst.index - storage->index, 750 type_size(ir->type)); 751 } 752 } 753 } 754 755 void 756 ir_to_mesa_visitor::visit(ir_loop *ir) 757 { 758 ir_dereference_variable *counter = NULL; 759 760 if (ir->counter != NULL) 761 counter = new(mem_ctx) ir_dereference_variable(ir->counter); 762 763 if (ir->from != NULL) { 764 assert(ir->counter != NULL); 765 766 ir_assignment *a = 767 new(mem_ctx) ir_assignment(counter, ir->from, NULL); 768 769 a->accept(this); 770 } 771 772 emit(NULL, OPCODE_BGNLOOP); 773 774 if (ir->to) { 775 ir_expression *e = 776 new(mem_ctx) ir_expression(ir->cmp, glsl_type::bool_type, 777 counter, ir->to); 778 ir_if *if_stmt = new(mem_ctx) ir_if(e); 779 780 ir_loop_jump *brk = 781 new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_break); 782 783 if_stmt->then_instructions.push_tail(brk); 784 785 if_stmt->accept(this); 786 } 787 788 visit_exec_list(&ir->body_instructions, this); 789 790 if (ir->increment) { 791 ir_expression *e = 792 new(mem_ctx) ir_expression(ir_binop_add, counter->type, 793 counter, ir->increment); 794 795 ir_assignment *a = 796 new(mem_ctx) ir_assignment(counter, e, NULL); 797 798 a->accept(this); 799 } 800 801 emit(NULL, OPCODE_ENDLOOP); 802 } 803 804 void 805 ir_to_mesa_visitor::visit(ir_loop_jump *ir) 806 { 807 switch (ir->mode) { 808 case ir_loop_jump::jump_break: 809 emit(NULL, OPCODE_BRK); 810 break; 811 case ir_loop_jump::jump_continue: 812 emit(NULL, OPCODE_CONT); 813 break; 814 } 815 } 816 817 818 void 819 ir_to_mesa_visitor::visit(ir_function_signature *ir) 820 { 821 assert(0); 822 (void)ir; 823 } 824 825 void 826 ir_to_mesa_visitor::visit(ir_function *ir) 827 { 828 /* Ignore function bodies other than main() -- we shouldn't see calls to 829 * them since they should all be inlined before we get to ir_to_mesa. 830 */ 831 if (strcmp(ir->name, "main") == 0) { 832 const ir_function_signature *sig; 833 exec_list empty; 834 835 sig = ir->matching_signature(&empty); 836 837 assert(sig); 838 839 foreach_iter(exec_list_iterator, iter, sig->body) { 840 ir_instruction *ir = (ir_instruction *)iter.get(); 841 842 ir->accept(this); 843 } 844 } 845 } 846 847 bool 848 ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand) 849 { 850 int nonmul_operand = 1 - mul_operand; 851 src_reg a, b, c; 852 853 ir_expression *expr = ir->operands[mul_operand]->as_expression(); 854 if (!expr || expr->operation != ir_binop_mul) 855 return false; 856 857 expr->operands[0]->accept(this); 858 a = this->result; 859 expr->operands[1]->accept(this); 860 b = this->result; 861 ir->operands[nonmul_operand]->accept(this); 862 c = this->result; 863 864 this->result = get_temp(ir->type); 865 emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, c); 866 867 return true; 868 } 869 870 /** 871 * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b)) 872 * 873 * The logic values are 1.0 for true and 0.0 for false. Logical-and is 874 * implemented using multiplication, and logical-or is implemented using 875 * addition. Logical-not can be implemented as (true - x), or (1.0 - x). 876 * As result, the logical expression (a & !b) can be rewritten as: 877 * 878 * - a * !b 879 * - a * (1 - b) 880 * - (a * 1) - (a * b) 881 * - a + -(a * b) 882 * - a + (a * -b) 883 * 884 * This final expression can be implemented as a single MAD(a, -b, a) 885 * instruction. 886 */ 887 bool 888 ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) 889 { 890 const int other_operand = 1 - try_operand; 891 src_reg a, b; 892 893 ir_expression *expr = ir->operands[try_operand]->as_expression(); 894 if (!expr || expr->operation != ir_unop_logic_not) 895 return false; 896 897 ir->operands[other_operand]->accept(this); 898 a = this->result; 899 expr->operands[0]->accept(this); 900 b = this->result; 901 902 b.negate = ~b.negate; 903 904 this->result = get_temp(ir->type); 905 emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a); 906 907 return true; 908 } 909 910 bool 911 ir_to_mesa_visitor::try_emit_sat(ir_expression *ir) 912 { 913 /* Saturates were only introduced to vertex programs in 914 * NV_vertex_program3, so don't give them to drivers in the VP. 915 */ 916 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) 917 return false; 918 919 ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 920 if (!sat_src) 921 return false; 922 923 sat_src->accept(this); 924 src_reg src = this->result; 925 926 /* If we generated an expression instruction into a temporary in 927 * processing the saturate's operand, apply the saturate to that 928 * instruction. Otherwise, generate a MOV to do the saturate. 929 * 930 * Note that we have to be careful to only do this optimization if 931 * the instruction in question was what generated src->result. For 932 * example, ir_dereference_array might generate a MUL instruction 933 * to create the reladdr, and return us a src reg using that 934 * reladdr. That MUL result is not the value we're trying to 935 * saturate. 936 */ 937 ir_expression *sat_src_expr = sat_src->as_expression(); 938 ir_to_mesa_instruction *new_inst; 939 new_inst = (ir_to_mesa_instruction *)this->instructions.get_tail(); 940 if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || 941 sat_src_expr->operation == ir_binop_add || 942 sat_src_expr->operation == ir_binop_dot)) { 943 new_inst->saturate = true; 944 } else { 945 this->result = get_temp(ir->type); 946 ir_to_mesa_instruction *inst; 947 inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src); 948 inst->saturate = true; 949 } 950 951 return true; 952 } 953 954 void 955 ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir, 956 src_reg *reg, int *num_reladdr) 957 { 958 if (!reg->reladdr) 959 return; 960 961 emit(ir, OPCODE_ARL, address_reg, *reg->reladdr); 962 963 if (*num_reladdr != 1) { 964 src_reg temp = get_temp(glsl_type::vec4_type); 965 966 emit(ir, OPCODE_MOV, dst_reg(temp), *reg); 967 *reg = temp; 968 } 969 970 (*num_reladdr)--; 971 } 972 973 void 974 ir_to_mesa_visitor::emit_swz(ir_expression *ir) 975 { 976 /* Assume that the vector operator is in a form compatible with OPCODE_SWZ. 977 * This means that each of the operands is either an immediate value of -1, 978 * 0, or 1, or is a component from one source register (possibly with 979 * negation). 980 */ 981 uint8_t components[4] = { 0 }; 982 bool negate[4] = { false }; 983 ir_variable *var = NULL; 984 985 for (unsigned i = 0; i < ir->type->vector_elements; i++) { 986 ir_rvalue *op = ir->operands[i]; 987 988 assert(op->type->is_scalar()); 989 990 while (op != NULL) { 991 switch (op->ir_type) { 992 case ir_type_constant: { 993 994 assert(op->type->is_scalar()); 995 996 const ir_constant *const c = op->as_constant(); 997 if (c->is_one()) { 998 components[i] = SWIZZLE_ONE; 999 } else if (c->is_zero()) { 1000 components[i] = SWIZZLE_ZERO; 1001 } else if (c->is_negative_one()) { 1002 components[i] = SWIZZLE_ONE; 1003 negate[i] = true; 1004 } else { 1005 assert(!"SWZ constant must be 0.0 or 1.0."); 1006 } 1007 1008 op = NULL; 1009 break; 1010 } 1011 1012 case ir_type_dereference_variable: { 1013 ir_dereference_variable *const deref = 1014 (ir_dereference_variable *) op; 1015 1016 assert((var == NULL) || (deref->var == var)); 1017 components[i] = SWIZZLE_X; 1018 var = deref->var; 1019 op = NULL; 1020 break; 1021 } 1022 1023 case ir_type_expression: { 1024 ir_expression *const expr = (ir_expression *) op; 1025 1026 assert(expr->operation == ir_unop_neg); 1027 negate[i] = true; 1028 1029 op = expr->operands[0]; 1030 break; 1031 } 1032 1033 case ir_type_swizzle: { 1034 ir_swizzle *const swiz = (ir_swizzle *) op; 1035 1036 components[i] = swiz->mask.x; 1037 op = swiz->val; 1038 break; 1039 } 1040 1041 default: 1042 assert(!"Should not get here."); 1043 return; 1044 } 1045 } 1046 } 1047 1048 assert(var != NULL); 1049 1050 ir_dereference_variable *const deref = 1051 new(mem_ctx) ir_dereference_variable(var); 1052 1053 this->result.file = PROGRAM_UNDEFINED; 1054 deref->accept(this); 1055 if (this->result.file == PROGRAM_UNDEFINED) { 1056 ir_print_visitor v; 1057 printf("Failed to get tree for expression operand:\n"); 1058 deref->accept(&v); 1059 exit(1); 1060 } 1061 1062 src_reg src; 1063 1064 src = this->result; 1065 src.swizzle = MAKE_SWIZZLE4(components[0], 1066 components[1], 1067 components[2], 1068 components[3]); 1069 src.negate = ((unsigned(negate[0]) << 0) 1070 | (unsigned(negate[1]) << 1) 1071 | (unsigned(negate[2]) << 2) 1072 | (unsigned(negate[3]) << 3)); 1073 1074 /* Storage for our result. Ideally for an assignment we'd be using the 1075 * actual storage for the result here, instead. 1076 */ 1077 const src_reg result_src = get_temp(ir->type); 1078 dst_reg result_dst = dst_reg(result_src); 1079 1080 /* Limit writes to the channels that will be used by result_src later. 1081 * This does limit this temp's use as a temporary for multi-instruction 1082 * sequences. 1083 */ 1084 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1085 1086 emit(ir, OPCODE_SWZ, result_dst, src); 1087 this->result = result_src; 1088 } 1089 1090 void 1091 ir_to_mesa_visitor::visit(ir_expression *ir) 1092 { 1093 unsigned int operand; 1094 src_reg op[Elements(ir->operands)]; 1095 src_reg result_src; 1096 dst_reg result_dst; 1097 1098 /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c) 1099 */ 1100 if (ir->operation == ir_binop_add) { 1101 if (try_emit_mad(ir, 1)) 1102 return; 1103 if (try_emit_mad(ir, 0)) 1104 return; 1105 } 1106 1107 /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) 1108 */ 1109 if (ir->operation == ir_binop_logic_and) { 1110 if (try_emit_mad_for_and_not(ir, 1)) 1111 return; 1112 if (try_emit_mad_for_and_not(ir, 0)) 1113 return; 1114 } 1115 1116 if (try_emit_sat(ir)) 1117 return; 1118 1119 if (ir->operation == ir_quadop_vector) { 1120 this->emit_swz(ir); 1121 return; 1122 } 1123 1124 for (operand = 0; operand < ir->get_num_operands(); operand++) { 1125 this->result.file = PROGRAM_UNDEFINED; 1126 ir->operands[operand]->accept(this); 1127 if (this->result.file == PROGRAM_UNDEFINED) { 1128 ir_print_visitor v; 1129 printf("Failed to get tree for expression operand:\n"); 1130 ir->operands[operand]->accept(&v); 1131 exit(1); 1132 } 1133 op[operand] = this->result; 1134 1135 /* Matrix expression operands should have been broken down to vector 1136 * operations already. 1137 */ 1138 assert(!ir->operands[operand]->type->is_matrix()); 1139 } 1140 1141 int vector_elements = ir->operands[0]->type->vector_elements; 1142 if (ir->operands[1]) { 1143 vector_elements = MAX2(vector_elements, 1144 ir->operands[1]->type->vector_elements); 1145 } 1146 1147 this->result.file = PROGRAM_UNDEFINED; 1148 1149 /* Storage for our result. Ideally for an assignment we'd be using 1150 * the actual storage for the result here, instead. 1151 */ 1152 result_src = get_temp(ir->type); 1153 /* convenience for the emit functions below. */ 1154 result_dst = dst_reg(result_src); 1155 /* Limit writes to the channels that will be used by result_src later. 1156 * This does limit this temp's use as a temporary for multi-instruction 1157 * sequences. 1158 */ 1159 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1160 1161 switch (ir->operation) { 1162 case ir_unop_logic_not: 1163 /* Previously 'SEQ dst, src, 0.0' was used for this. However, many 1164 * older GPUs implement SEQ using multiple instructions (i915 uses two 1165 * SGE instructions and a MUL instruction). Since our logic values are 1166 * 0.0 and 1.0, 1-x also implements !x. 1167 */ 1168 op[0].negate = ~op[0].negate; 1169 emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0)); 1170 break; 1171 case ir_unop_neg: 1172 op[0].negate = ~op[0].negate; 1173 result_src = op[0]; 1174 break; 1175 case ir_unop_abs: 1176 emit(ir, OPCODE_ABS, result_dst, op[0]); 1177 break; 1178 case ir_unop_sign: 1179 emit(ir, OPCODE_SSG, result_dst, op[0]); 1180 break; 1181 case ir_unop_rcp: 1182 emit_scalar(ir, OPCODE_RCP, result_dst, op[0]); 1183 break; 1184 1185 case ir_unop_exp2: 1186 emit_scalar(ir, OPCODE_EX2, result_dst, op[0]); 1187 break; 1188 case ir_unop_exp: 1189 case ir_unop_log: 1190 assert(!"not reached: should be handled by ir_explog_to_explog2"); 1191 break; 1192 case ir_unop_log2: 1193 emit_scalar(ir, OPCODE_LG2, result_dst, op[0]); 1194 break; 1195 case ir_unop_sin: 1196 emit_scalar(ir, OPCODE_SIN, result_dst, op[0]); 1197 break; 1198 case ir_unop_cos: 1199 emit_scalar(ir, OPCODE_COS, result_dst, op[0]); 1200 break; 1201 case ir_unop_sin_reduced: 1202 emit_scs(ir, OPCODE_SIN, result_dst, op[0]); 1203 break; 1204 case ir_unop_cos_reduced: 1205 emit_scs(ir, OPCODE_COS, result_dst, op[0]); 1206 break; 1207 1208 case ir_unop_dFdx: 1209 emit(ir, OPCODE_DDX, result_dst, op[0]); 1210 break; 1211 case ir_unop_dFdy: 1212 emit(ir, OPCODE_DDY, result_dst, op[0]); 1213 break; 1214 1215 case ir_unop_noise: { 1216 const enum prog_opcode opcode = 1217 prog_opcode(OPCODE_NOISE1 1218 + (ir->operands[0]->type->vector_elements) - 1); 1219 assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4)); 1220 1221 emit(ir, opcode, result_dst, op[0]); 1222 break; 1223 } 1224 1225 case ir_binop_add: 1226 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); 1227 break; 1228 case ir_binop_sub: 1229 emit(ir, OPCODE_SUB, result_dst, op[0], op[1]); 1230 break; 1231 1232 case ir_binop_mul: 1233 emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); 1234 break; 1235 case ir_binop_div: 1236 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 1237 break; 1238 case ir_binop_mod: 1239 /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */ 1240 assert(ir->type->is_integer()); 1241 emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); 1242 break; 1243 1244 case ir_binop_less: 1245 emit(ir, OPCODE_SLT, result_dst, op[0], op[1]); 1246 break; 1247 case ir_binop_greater: 1248 emit(ir, OPCODE_SGT, result_dst, op[0], op[1]); 1249 break; 1250 case ir_binop_lequal: 1251 emit(ir, OPCODE_SLE, result_dst, op[0], op[1]); 1252 break; 1253 case ir_binop_gequal: 1254 emit(ir, OPCODE_SGE, result_dst, op[0], op[1]); 1255 break; 1256 case ir_binop_equal: 1257 emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); 1258 break; 1259 case ir_binop_nequal: 1260 emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); 1261 break; 1262 case ir_binop_all_equal: 1263 /* "==" operator producing a scalar boolean. */ 1264 if (ir->operands[0]->type->is_vector() || 1265 ir->operands[1]->type->is_vector()) { 1266 src_reg temp = get_temp(glsl_type::vec4_type); 1267 emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]); 1268 1269 /* After the dot-product, the value will be an integer on the 1270 * range [0,4]. Zero becomes 1.0, and positive values become zero. 1271 */ 1272 emit_dp(ir, result_dst, temp, temp, vector_elements); 1273 1274 /* Negating the result of the dot-product gives values on the range 1275 * [-4, 0]. Zero becomes 1.0, and negative values become zero. This 1276 * achieved using SGE. 1277 */ 1278 src_reg sge_src = result_src; 1279 sge_src.negate = ~sge_src.negate; 1280 emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0)); 1281 } else { 1282 emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]); 1283 } 1284 break; 1285 case ir_binop_any_nequal: 1286 /* "!=" operator producing a scalar boolean. */ 1287 if (ir->operands[0]->type->is_vector() || 1288 ir->operands[1]->type->is_vector()) { 1289 src_reg temp = get_temp(glsl_type::vec4_type); 1290 emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]); 1291 1292 /* After the dot-product, the value will be an integer on the 1293 * range [0,4]. Zero stays zero, and positive values become 1.0. 1294 */ 1295 ir_to_mesa_instruction *const dp = 1296 emit_dp(ir, result_dst, temp, temp, vector_elements); 1297 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1298 /* The clamping to [0,1] can be done for free in the fragment 1299 * shader with a saturate. 1300 */ 1301 dp->saturate = true; 1302 } else { 1303 /* Negating the result of the dot-product gives values on the range 1304 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1305 * achieved using SLT. 1306 */ 1307 src_reg slt_src = result_src; 1308 slt_src.negate = ~slt_src.negate; 1309 emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); 1310 } 1311 } else { 1312 emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); 1313 } 1314 break; 1315 1316 case ir_unop_any: { 1317 assert(ir->operands[0]->type->is_vector()); 1318 1319 /* After the dot-product, the value will be an integer on the 1320 * range [0,4]. Zero stays zero, and positive values become 1.0. 1321 */ 1322 ir_to_mesa_instruction *const dp = 1323 emit_dp(ir, result_dst, op[0], op[0], 1324 ir->operands[0]->type->vector_elements); 1325 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1326 /* The clamping to [0,1] can be done for free in the fragment 1327 * shader with a saturate. 1328 */ 1329 dp->saturate = true; 1330 } else { 1331 /* Negating the result of the dot-product gives values on the range 1332 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1333 * is achieved using SLT. 1334 */ 1335 src_reg slt_src = result_src; 1336 slt_src.negate = ~slt_src.negate; 1337 emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); 1338 } 1339 break; 1340 } 1341 1342 case ir_binop_logic_xor: 1343 emit(ir, OPCODE_SNE, result_dst, op[0], op[1]); 1344 break; 1345 1346 case ir_binop_logic_or: { 1347 /* After the addition, the value will be an integer on the 1348 * range [0,2]. Zero stays zero, and positive values become 1.0. 1349 */ 1350 ir_to_mesa_instruction *add = 1351 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); 1352 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1353 /* The clamping to [0,1] can be done for free in the fragment 1354 * shader with a saturate. 1355 */ 1356 add->saturate = true; 1357 } else { 1358 /* Negating the result of the addition gives values on the range 1359 * [-2, 0]. Zero stays zero, and negative values become 1.0. This 1360 * is achieved using SLT. 1361 */ 1362 src_reg slt_src = result_src; 1363 slt_src.negate = ~slt_src.negate; 1364 emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); 1365 } 1366 break; 1367 } 1368 1369 case ir_binop_logic_and: 1370 /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ 1371 emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); 1372 break; 1373 1374 case ir_binop_dot: 1375 assert(ir->operands[0]->type->is_vector()); 1376 assert(ir->operands[0]->type == ir->operands[1]->type); 1377 emit_dp(ir, result_dst, op[0], op[1], 1378 ir->operands[0]->type->vector_elements); 1379 break; 1380 1381 case ir_unop_sqrt: 1382 /* sqrt(x) = x * rsq(x). */ 1383 emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); 1384 emit(ir, OPCODE_MUL, result_dst, result_src, op[0]); 1385 /* For incoming channels <= 0, set the result to 0. */ 1386 op[0].negate = ~op[0].negate; 1387 emit(ir, OPCODE_CMP, result_dst, 1388 op[0], result_src, src_reg_for_float(0.0)); 1389 break; 1390 case ir_unop_rsq: 1391 emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); 1392 break; 1393 case ir_unop_i2f: 1394 case ir_unop_u2f: 1395 case ir_unop_b2f: 1396 case ir_unop_b2i: 1397 case ir_unop_i2u: 1398 case ir_unop_u2i: 1399 /* Mesa IR lacks types, ints are stored as truncated floats. */ 1400 result_src = op[0]; 1401 break; 1402 case ir_unop_f2i: 1403 case ir_unop_f2u: 1404 emit(ir, OPCODE_TRUNC, result_dst, op[0]); 1405 break; 1406 case ir_unop_f2b: 1407 case ir_unop_i2b: 1408 emit(ir, OPCODE_SNE, result_dst, 1409 op[0], src_reg_for_float(0.0)); 1410 break; 1411 case ir_unop_bitcast_f2i: // Ignore these 4, they can't happen here anyway 1412 case ir_unop_bitcast_f2u: 1413 case ir_unop_bitcast_i2f: 1414 case ir_unop_bitcast_u2f: 1415 break; 1416 case ir_unop_trunc: 1417 emit(ir, OPCODE_TRUNC, result_dst, op[0]); 1418 break; 1419 case ir_unop_ceil: 1420 op[0].negate = ~op[0].negate; 1421 emit(ir, OPCODE_FLR, result_dst, op[0]); 1422 result_src.negate = ~result_src.negate; 1423 break; 1424 case ir_unop_floor: 1425 emit(ir, OPCODE_FLR, result_dst, op[0]); 1426 break; 1427 case ir_unop_fract: 1428 emit(ir, OPCODE_FRC, result_dst, op[0]); 1429 break; 1430 1431 case ir_binop_min: 1432 emit(ir, OPCODE_MIN, result_dst, op[0], op[1]); 1433 break; 1434 case ir_binop_max: 1435 emit(ir, OPCODE_MAX, result_dst, op[0], op[1]); 1436 break; 1437 case ir_binop_pow: 1438 emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]); 1439 break; 1440 1441 /* GLSL 1.30 integer ops are unsupported in Mesa IR, but since 1442 * hardware backends have no way to avoid Mesa IR generation 1443 * even if they don't use it, we need to emit "something" and 1444 * continue. 1445 */ 1446 case ir_binop_lshift: 1447 case ir_binop_rshift: 1448 case ir_binop_bit_and: 1449 case ir_binop_bit_xor: 1450 case ir_binop_bit_or: 1451 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); 1452 break; 1453 1454 case ir_unop_bit_not: 1455 case ir_unop_round_even: 1456 emit(ir, OPCODE_MOV, result_dst, op[0]); 1457 break; 1458 1459 case ir_binop_ubo_load: 1460 assert(!"not supported"); 1461 break; 1462 1463 case ir_quadop_vector: 1464 /* This operation should have already been handled. 1465 */ 1466 assert(!"Should not get here."); 1467 break; 1468 } 1469 1470 this->result = result_src; 1471 } 1472 1473 1474 void 1475 ir_to_mesa_visitor::visit(ir_swizzle *ir) 1476 { 1477 src_reg src; 1478 int i; 1479 int swizzle[4]; 1480 1481 /* Note that this is only swizzles in expressions, not those on the left 1482 * hand side of an assignment, which do write masking. See ir_assignment 1483 * for that. 1484 */ 1485 1486 ir->val->accept(this); 1487 src = this->result; 1488 assert(src.file != PROGRAM_UNDEFINED); 1489 1490 for (i = 0; i < 4; i++) { 1491 if (i < ir->type->vector_elements) { 1492 switch (i) { 1493 case 0: 1494 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); 1495 break; 1496 case 1: 1497 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); 1498 break; 1499 case 2: 1500 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); 1501 break; 1502 case 3: 1503 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); 1504 break; 1505 } 1506 } else { 1507 /* If the type is smaller than a vec4, replicate the last 1508 * channel out. 1509 */ 1510 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1511 } 1512 } 1513 1514 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1515 1516 this->result = src; 1517 } 1518 1519 void 1520 ir_to_mesa_visitor::visit(ir_dereference_variable *ir) 1521 { 1522 variable_storage *entry = find_variable_storage(ir->var); 1523 ir_variable *var = ir->var; 1524 1525 if (!entry) { 1526 switch (var->mode) { 1527 case ir_var_uniform: 1528 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, 1529 var->location); 1530 this->variables.push_tail(entry); 1531 break; 1532 case ir_var_in: 1533 case ir_var_inout: 1534 /* The linker assigns locations for varyings and attributes, 1535 * including deprecated builtins (like gl_Color), 1536 * user-assigned generic attributes (glBindVertexLocation), 1537 * and user-defined varyings. 1538 * 1539 * FINISHME: We would hit this path for function arguments. Fix! 1540 */ 1541 assert(var->location != -1); 1542 entry = new(mem_ctx) variable_storage(var, 1543 PROGRAM_INPUT, 1544 var->location); 1545 break; 1546 case ir_var_out: 1547 assert(var->location != -1); 1548 entry = new(mem_ctx) variable_storage(var, 1549 PROGRAM_OUTPUT, 1550 var->location); 1551 break; 1552 case ir_var_system_value: 1553 entry = new(mem_ctx) variable_storage(var, 1554 PROGRAM_SYSTEM_VALUE, 1555 var->location); 1556 break; 1557 case ir_var_auto: 1558 case ir_var_temporary: 1559 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, 1560 this->next_temp); 1561 this->variables.push_tail(entry); 1562 1563 next_temp += type_size(var->type); 1564 break; 1565 } 1566 1567 if (!entry) { 1568 printf("Failed to make storage for %s\n", var->name); 1569 exit(1); 1570 } 1571 } 1572 1573 this->result = src_reg(entry->file, entry->index, var->type); 1574 } 1575 1576 void 1577 ir_to_mesa_visitor::visit(ir_dereference_array *ir) 1578 { 1579 ir_constant *index; 1580 src_reg src; 1581 int element_size = type_size(ir->type); 1582 1583 index = ir->array_index->constant_expression_value(); 1584 1585 ir->array->accept(this); 1586 src = this->result; 1587 1588 if (index) { 1589 src.index += index->value.i[0] * element_size; 1590 } else { 1591 /* Variable index array dereference. It eats the "vec4" of the 1592 * base of the array and an index that offsets the Mesa register 1593 * index. 1594 */ 1595 ir->array_index->accept(this); 1596 1597 src_reg index_reg; 1598 1599 if (element_size == 1) { 1600 index_reg = this->result; 1601 } else { 1602 index_reg = get_temp(glsl_type::float_type); 1603 1604 emit(ir, OPCODE_MUL, dst_reg(index_reg), 1605 this->result, src_reg_for_float(element_size)); 1606 } 1607 1608 /* If there was already a relative address register involved, add the 1609 * new and the old together to get the new offset. 1610 */ 1611 if (src.reladdr != NULL) { 1612 src_reg accum_reg = get_temp(glsl_type::float_type); 1613 1614 emit(ir, OPCODE_ADD, dst_reg(accum_reg), 1615 index_reg, *src.reladdr); 1616 1617 index_reg = accum_reg; 1618 } 1619 1620 src.reladdr = ralloc(mem_ctx, src_reg); 1621 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 1622 } 1623 1624 /* If the type is smaller than a vec4, replicate the last channel out. */ 1625 if (ir->type->is_scalar() || ir->type->is_vector()) 1626 src.swizzle = swizzle_for_size(ir->type->vector_elements); 1627 else 1628 src.swizzle = SWIZZLE_NOOP; 1629 1630 this->result = src; 1631 } 1632 1633 void 1634 ir_to_mesa_visitor::visit(ir_dereference_record *ir) 1635 { 1636 unsigned int i; 1637 const glsl_type *struct_type = ir->record->type; 1638 int offset = 0; 1639 1640 ir->record->accept(this); 1641 1642 for (i = 0; i < struct_type->length; i++) { 1643 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 1644 break; 1645 offset += type_size(struct_type->fields.structure[i].type); 1646 } 1647 1648 /* If the type is smaller than a vec4, replicate the last channel out. */ 1649 if (ir->type->is_scalar() || ir->type->is_vector()) 1650 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 1651 else 1652 this->result.swizzle = SWIZZLE_NOOP; 1653 1654 this->result.index += offset; 1655 } 1656 1657 /** 1658 * We want to be careful in assignment setup to hit the actual storage 1659 * instead of potentially using a temporary like we might with the 1660 * ir_dereference handler. 1661 */ 1662 static dst_reg 1663 get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v) 1664 { 1665 /* The LHS must be a dereference. If the LHS is a variable indexed array 1666 * access of a vector, it must be separated into a series conditional moves 1667 * before reaching this point (see ir_vec_index_to_cond_assign). 1668 */ 1669 assert(ir->as_dereference()); 1670 ir_dereference_array *deref_array = ir->as_dereference_array(); 1671 if (deref_array) { 1672 assert(!deref_array->array->type->is_vector()); 1673 } 1674 1675 /* Use the rvalue deref handler for the most part. We'll ignore 1676 * swizzles in it and write swizzles using writemask, though. 1677 */ 1678 ir->accept(v); 1679 return dst_reg(v->result); 1680 } 1681 1682 /** 1683 * Process the condition of a conditional assignment 1684 * 1685 * Examines the condition of a conditional assignment to generate the optimal 1686 * first operand of a \c CMP instruction. If the condition is a relational 1687 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be 1688 * used as the source for the \c CMP instruction. Otherwise the comparison 1689 * is processed to a boolean result, and the boolean result is used as the 1690 * operand to the CMP instruction. 1691 */ 1692 bool 1693 ir_to_mesa_visitor::process_move_condition(ir_rvalue *ir) 1694 { 1695 ir_rvalue *src_ir = ir; 1696 bool negate = true; 1697 bool switch_order = false; 1698 1699 ir_expression *const expr = ir->as_expression(); 1700 if ((expr != NULL) && (expr->get_num_operands() == 2)) { 1701 bool zero_on_left = false; 1702 1703 if (expr->operands[0]->is_zero()) { 1704 src_ir = expr->operands[1]; 1705 zero_on_left = true; 1706 } else if (expr->operands[1]->is_zero()) { 1707 src_ir = expr->operands[0]; 1708 zero_on_left = false; 1709 } 1710 1711 /* a is - 0 + - 0 + 1712 * (a < 0) T F F ( a < 0) T F F 1713 * (0 < a) F F T (-a < 0) F F T 1714 * (a <= 0) T T F (-a < 0) F F T (swap order of other operands) 1715 * (0 <= a) F T T ( a < 0) T F F (swap order of other operands) 1716 * (a > 0) F F T (-a < 0) F F T 1717 * (0 > a) T F F ( a < 0) T F F 1718 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) 1719 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) 1720 * 1721 * Note that exchanging the order of 0 and 'a' in the comparison simply 1722 * means that the value of 'a' should be negated. 1723 */ 1724 if (src_ir != ir) { 1725 switch (expr->operation) { 1726 case ir_binop_less: 1727 switch_order = false; 1728 negate = zero_on_left; 1729 break; 1730 1731 case ir_binop_greater: 1732 switch_order = false; 1733 negate = !zero_on_left; 1734 break; 1735 1736 case ir_binop_lequal: 1737 switch_order = true; 1738 negate = !zero_on_left; 1739 break; 1740 1741 case ir_binop_gequal: 1742 switch_order = true; 1743 negate = zero_on_left; 1744 break; 1745 1746 default: 1747 /* This isn't the right kind of comparison afterall, so make sure 1748 * the whole condition is visited. 1749 */ 1750 src_ir = ir; 1751 break; 1752 } 1753 } 1754 } 1755 1756 src_ir->accept(this); 1757 1758 /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the 1759 * condition we produced is 0.0 or 1.0. By flipping the sign, we can 1760 * choose which value OPCODE_CMP produces without an extra instruction 1761 * computing the condition. 1762 */ 1763 if (negate) 1764 this->result.negate = ~this->result.negate; 1765 1766 return switch_order; 1767 } 1768 1769 void 1770 ir_to_mesa_visitor::visit(ir_assignment *ir) 1771 { 1772 dst_reg l; 1773 src_reg r; 1774 int i; 1775 1776 ir->rhs->accept(this); 1777 r = this->result; 1778 1779 l = get_assignment_lhs(ir->lhs, this); 1780 1781 /* FINISHME: This should really set to the correct maximal writemask for each 1782 * FINISHME: component written (in the loops below). This case can only 1783 * FINISHME: occur for matrices, arrays, and structures. 1784 */ 1785 if (ir->write_mask == 0) { 1786 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 1787 l.writemask = WRITEMASK_XYZW; 1788 } else if (ir->lhs->type->is_scalar()) { 1789 /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the 1790 * FINISHME: W component of fragment shader output zero, work correctly. 1791 */ 1792 l.writemask = WRITEMASK_XYZW; 1793 } else { 1794 int swizzles[4]; 1795 int first_enabled_chan = 0; 1796 int rhs_chan = 0; 1797 1798 assert(ir->lhs->type->is_vector()); 1799 l.writemask = ir->write_mask; 1800 1801 for (int i = 0; i < 4; i++) { 1802 if (l.writemask & (1 << i)) { 1803 first_enabled_chan = GET_SWZ(r.swizzle, i); 1804 break; 1805 } 1806 } 1807 1808 /* Swizzle a small RHS vector into the channels being written. 1809 * 1810 * glsl ir treats write_mask as dictating how many channels are 1811 * present on the RHS while Mesa IR treats write_mask as just 1812 * showing which channels of the vec4 RHS get written. 1813 */ 1814 for (int i = 0; i < 4; i++) { 1815 if (l.writemask & (1 << i)) 1816 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); 1817 else 1818 swizzles[i] = first_enabled_chan; 1819 } 1820 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], 1821 swizzles[2], swizzles[3]); 1822 } 1823 1824 assert(l.file != PROGRAM_UNDEFINED); 1825 assert(r.file != PROGRAM_UNDEFINED); 1826 1827 if (ir->condition) { 1828 const bool switch_order = this->process_move_condition(ir->condition); 1829 src_reg condition = this->result; 1830 1831 for (i = 0; i < type_size(ir->lhs->type); i++) { 1832 if (switch_order) { 1833 emit(ir, OPCODE_CMP, l, condition, src_reg(l), r); 1834 } else { 1835 emit(ir, OPCODE_CMP, l, condition, r, src_reg(l)); 1836 } 1837 1838 l.index++; 1839 r.index++; 1840 } 1841 } else { 1842 for (i = 0; i < type_size(ir->lhs->type); i++) { 1843 emit(ir, OPCODE_MOV, l, r); 1844 l.index++; 1845 r.index++; 1846 } 1847 } 1848 } 1849 1850 1851 void 1852 ir_to_mesa_visitor::visit(ir_constant *ir) 1853 { 1854 src_reg src; 1855 GLfloat stack_vals[4] = { 0 }; 1856 GLfloat *values = stack_vals; 1857 unsigned int i; 1858 1859 /* Unfortunately, 4 floats is all we can get into 1860 * _mesa_add_unnamed_constant. So, make a temp to store an 1861 * aggregate constant and move each constant value into it. If we 1862 * get lucky, copy propagation will eliminate the extra moves. 1863 */ 1864 1865 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 1866 src_reg temp_base = get_temp(ir->type); 1867 dst_reg temp = dst_reg(temp_base); 1868 1869 foreach_iter(exec_list_iterator, iter, ir->components) { 1870 ir_constant *field_value = (ir_constant *)iter.get(); 1871 int size = type_size(field_value->type); 1872 1873 assert(size > 0); 1874 1875 field_value->accept(this); 1876 src = this->result; 1877 1878 for (i = 0; i < (unsigned int)size; i++) { 1879 emit(ir, OPCODE_MOV, temp, src); 1880 1881 src.index++; 1882 temp.index++; 1883 } 1884 } 1885 this->result = temp_base; 1886 return; 1887 } 1888 1889 if (ir->type->is_array()) { 1890 src_reg temp_base = get_temp(ir->type); 1891 dst_reg temp = dst_reg(temp_base); 1892 int size = type_size(ir->type->fields.array); 1893 1894 assert(size > 0); 1895 1896 for (i = 0; i < ir->type->length; i++) { 1897 ir->array_elements[i]->accept(this); 1898 src = this->result; 1899 for (int j = 0; j < size; j++) { 1900 emit(ir, OPCODE_MOV, temp, src); 1901 1902 src.index++; 1903 temp.index++; 1904 } 1905 } 1906 this->result = temp_base; 1907 return; 1908 } 1909 1910 if (ir->type->is_matrix()) { 1911 src_reg mat = get_temp(ir->type); 1912 dst_reg mat_column = dst_reg(mat); 1913 1914 for (i = 0; i < ir->type->matrix_columns; i++) { 1915 assert(ir->type->base_type == GLSL_TYPE_FLOAT); 1916 values = &ir->value.f[i * ir->type->vector_elements]; 1917 1918 src = src_reg(PROGRAM_CONSTANT, -1, NULL); 1919 src.index = _mesa_add_unnamed_constant(this->prog->Parameters, 1920 (gl_constant_value *) values, 1921 ir->type->vector_elements, 1922 &src.swizzle); 1923 emit(ir, OPCODE_MOV, mat_column, src); 1924 1925 mat_column.index++; 1926 } 1927 1928 this->result = mat; 1929 return; 1930 } 1931 1932 src.file = PROGRAM_CONSTANT; 1933 switch (ir->type->base_type) { 1934 case GLSL_TYPE_FLOAT: 1935 values = &ir->value.f[0]; 1936 break; 1937 case GLSL_TYPE_UINT: 1938 for (i = 0; i < ir->type->vector_elements; i++) { 1939 values[i] = ir->value.u[i]; 1940 } 1941 break; 1942 case GLSL_TYPE_INT: 1943 for (i = 0; i < ir->type->vector_elements; i++) { 1944 values[i] = ir->value.i[i]; 1945 } 1946 break; 1947 case GLSL_TYPE_BOOL: 1948 for (i = 0; i < ir->type->vector_elements; i++) { 1949 values[i] = ir->value.b[i]; 1950 } 1951 break; 1952 default: 1953 assert(!"Non-float/uint/int/bool constant"); 1954 } 1955 1956 this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type); 1957 this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, 1958 (gl_constant_value *) values, 1959 ir->type->vector_elements, 1960 &this->result.swizzle); 1961 } 1962 1963 void 1964 ir_to_mesa_visitor::visit(ir_call *ir) 1965 { 1966 assert(!"ir_to_mesa: All function calls should have been inlined by now."); 1967 } 1968 1969 void 1970 ir_to_mesa_visitor::visit(ir_texture *ir) 1971 { 1972 src_reg result_src, coord, lod_info, projector, dx, dy; 1973 dst_reg result_dst, coord_dst; 1974 ir_to_mesa_instruction *inst = NULL; 1975 prog_opcode opcode = OPCODE_NOP; 1976 1977 if (ir->op == ir_txs) 1978 this->result = src_reg_for_float(0.0); 1979 else 1980 ir->coordinate->accept(this); 1981 1982 /* Put our coords in a temp. We'll need to modify them for shadow, 1983 * projection, or LOD, so the only case we'd use it as is is if 1984 * we're doing plain old texturing. Mesa IR optimization should 1985 * handle cleaning up our mess in that case. 1986 */ 1987 coord = get_temp(glsl_type::vec4_type); 1988 coord_dst = dst_reg(coord); 1989 emit(ir, OPCODE_MOV, coord_dst, this->result); 1990 1991 if (ir->projector) { 1992 ir->projector->accept(this); 1993 projector = this->result; 1994 } 1995 1996 /* Storage for our result. Ideally for an assignment we'd be using 1997 * the actual storage for the result here, instead. 1998 */ 1999 result_src = get_temp(glsl_type::vec4_type); 2000 result_dst = dst_reg(result_src); 2001 2002 switch (ir->op) { 2003 case ir_tex: 2004 case ir_txs: 2005 opcode = OPCODE_TEX; 2006 break; 2007 case ir_txb: 2008 opcode = OPCODE_TXB; 2009 ir->lod_info.bias->accept(this); 2010 lod_info = this->result; 2011 break; 2012 case ir_txf: 2013 /* Pretend to be TXL so the sampler, coordinate, lod are available */ 2014 case ir_txl: 2015 opcode = OPCODE_TXL; 2016 ir->lod_info.lod->accept(this); 2017 lod_info = this->result; 2018 break; 2019 case ir_txd: 2020 opcode = OPCODE_TXD; 2021 ir->lod_info.grad.dPdx->accept(this); 2022 dx = this->result; 2023 ir->lod_info.grad.dPdy->accept(this); 2024 dy = this->result; 2025 break; 2026 } 2027 2028 const glsl_type *sampler_type = ir->sampler->type; 2029 2030 if (ir->projector) { 2031 if (opcode == OPCODE_TEX) { 2032 /* Slot the projector in as the last component of the coord. */ 2033 coord_dst.writemask = WRITEMASK_W; 2034 emit(ir, OPCODE_MOV, coord_dst, projector); 2035 coord_dst.writemask = WRITEMASK_XYZW; 2036 opcode = OPCODE_TXP; 2037 } else { 2038 src_reg coord_w = coord; 2039 coord_w.swizzle = SWIZZLE_WWWW; 2040 2041 /* For the other TEX opcodes there's no projective version 2042 * since the last slot is taken up by lod info. Do the 2043 * projective divide now. 2044 */ 2045 coord_dst.writemask = WRITEMASK_W; 2046 emit(ir, OPCODE_RCP, coord_dst, projector); 2047 2048 /* In the case where we have to project the coordinates "by hand," 2049 * the shadow comparitor value must also be projected. 2050 */ 2051 src_reg tmp_src = coord; 2052 if (ir->shadow_comparitor) { 2053 /* Slot the shadow value in as the second to last component of the 2054 * coord. 2055 */ 2056 ir->shadow_comparitor->accept(this); 2057 2058 tmp_src = get_temp(glsl_type::vec4_type); 2059 dst_reg tmp_dst = dst_reg(tmp_src); 2060 2061 /* Projective division not allowed for array samplers. */ 2062 assert(!sampler_type->sampler_array); 2063 2064 tmp_dst.writemask = WRITEMASK_Z; 2065 emit(ir, OPCODE_MOV, tmp_dst, this->result); 2066 2067 tmp_dst.writemask = WRITEMASK_XY; 2068 emit(ir, OPCODE_MOV, tmp_dst, coord); 2069 } 2070 2071 coord_dst.writemask = WRITEMASK_XYZ; 2072 emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w); 2073 2074 coord_dst.writemask = WRITEMASK_XYZW; 2075 coord.swizzle = SWIZZLE_XYZW; 2076 } 2077 } 2078 2079 /* If projection is done and the opcode is not OPCODE_TXP, then the shadow 2080 * comparitor was put in the correct place (and projected) by the code, 2081 * above, that handles by-hand projection. 2082 */ 2083 if (ir->shadow_comparitor && (!ir->projector || opcode == OPCODE_TXP)) { 2084 /* Slot the shadow value in as the second to last component of the 2085 * coord. 2086 */ 2087 ir->shadow_comparitor->accept(this); 2088 2089 /* XXX This will need to be updated for cubemap array samplers. */ 2090 if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && 2091 sampler_type->sampler_array) { 2092 coord_dst.writemask = WRITEMASK_W; 2093 } else { 2094 coord_dst.writemask = WRITEMASK_Z; 2095 } 2096 2097 emit(ir, OPCODE_MOV, coord_dst, this->result); 2098 coord_dst.writemask = WRITEMASK_XYZW; 2099 } 2100 2101 if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) { 2102 /* Mesa IR stores lod or lod bias in the last channel of the coords. */ 2103 coord_dst.writemask = WRITEMASK_W; 2104 emit(ir, OPCODE_MOV, coord_dst, lod_info); 2105 coord_dst.writemask = WRITEMASK_XYZW; 2106 } 2107 2108 if (opcode == OPCODE_TXD) 2109 inst = emit(ir, opcode, result_dst, coord, dx, dy); 2110 else 2111 inst = emit(ir, opcode, result_dst, coord); 2112 2113 if (ir->shadow_comparitor) 2114 inst->tex_shadow = GL_TRUE; 2115 2116 inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler, 2117 this->shader_program, 2118 this->prog); 2119 2120 switch (sampler_type->sampler_dimensionality) { 2121 case GLSL_SAMPLER_DIM_1D: 2122 inst->tex_target = (sampler_type->sampler_array) 2123 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; 2124 break; 2125 case GLSL_SAMPLER_DIM_2D: 2126 inst->tex_target = (sampler_type->sampler_array) 2127 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; 2128 break; 2129 case GLSL_SAMPLER_DIM_3D: 2130 inst->tex_target = TEXTURE_3D_INDEX; 2131 break; 2132 case GLSL_SAMPLER_DIM_CUBE: 2133 inst->tex_target = TEXTURE_CUBE_INDEX; 2134 break; 2135 case GLSL_SAMPLER_DIM_RECT: 2136 inst->tex_target = TEXTURE_RECT_INDEX; 2137 break; 2138 case GLSL_SAMPLER_DIM_BUF: 2139 assert(!"FINISHME: Implement ARB_texture_buffer_object"); 2140 break; 2141 case GLSL_SAMPLER_DIM_EXTERNAL: 2142 inst->tex_target = TEXTURE_EXTERNAL_INDEX; 2143 break; 2144 default: 2145 assert(!"Should not get here."); 2146 } 2147 2148 this->result = result_src; 2149 } 2150 2151 void 2152 ir_to_mesa_visitor::visit(ir_return *ir) 2153 { 2154 /* Non-void functions should have been inlined. We may still emit RETs 2155 * from main() unless the EmitNoMainReturn option is set. 2156 */ 2157 assert(!ir->get_value()); 2158 emit(ir, OPCODE_RET); 2159 } 2160 2161 void 2162 ir_to_mesa_visitor::visit(ir_discard *ir) 2163 { 2164 if (ir->condition) { 2165 ir->condition->accept(this); 2166 this->result.negate = ~this->result.negate; 2167 emit(ir, OPCODE_KIL, undef_dst, this->result); 2168 } else { 2169 emit(ir, OPCODE_KIL_NV); 2170 } 2171 } 2172 2173 void 2174 ir_to_mesa_visitor::visit(ir_if *ir) 2175 { 2176 ir_to_mesa_instruction *cond_inst, *if_inst; 2177 ir_to_mesa_instruction *prev_inst; 2178 2179 prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail(); 2180 2181 ir->condition->accept(this); 2182 assert(this->result.file != PROGRAM_UNDEFINED); 2183 2184 if (this->options->EmitCondCodes) { 2185 cond_inst = (ir_to_mesa_instruction *)this->instructions.get_tail(); 2186 2187 /* See if we actually generated any instruction for generating 2188 * the condition. If not, then cook up a move to a temp so we 2189 * have something to set cond_update on. 2190 */ 2191 if (cond_inst == prev_inst) { 2192 src_reg temp = get_temp(glsl_type::bool_type); 2193 cond_inst = emit(ir->condition, OPCODE_MOV, dst_reg(temp), result); 2194 } 2195 cond_inst->cond_update = GL_TRUE; 2196 2197 if_inst = emit(ir->condition, OPCODE_IF); 2198 if_inst->dst.cond_mask = COND_NE; 2199 } else { 2200 if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result); 2201 } 2202 2203 this->instructions.push_tail(if_inst); 2204 2205 visit_exec_list(&ir->then_instructions, this); 2206 2207 if (!ir->else_instructions.is_empty()) { 2208 emit(ir->condition, OPCODE_ELSE); 2209 visit_exec_list(&ir->else_instructions, this); 2210 } 2211 2212 if_inst = emit(ir->condition, OPCODE_ENDIF); 2213 } 2214 2215 ir_to_mesa_visitor::ir_to_mesa_visitor() 2216 { 2217 result.file = PROGRAM_UNDEFINED; 2218 next_temp = 1; 2219 next_signature_id = 1; 2220 current_function = NULL; 2221 mem_ctx = ralloc_context(NULL); 2222 } 2223 2224 ir_to_mesa_visitor::~ir_to_mesa_visitor() 2225 { 2226 ralloc_free(mem_ctx); 2227 } 2228 2229 static struct prog_src_register 2230 mesa_src_reg_from_ir_src_reg(src_reg reg) 2231 { 2232 struct prog_src_register mesa_reg; 2233 2234 mesa_reg.File = reg.file; 2235 assert(reg.index < (1 << INST_INDEX_BITS)); 2236 mesa_reg.Index = reg.index; 2237 mesa_reg.Swizzle = reg.swizzle; 2238 mesa_reg.RelAddr = reg.reladdr != NULL; 2239 mesa_reg.Negate = reg.negate; 2240 mesa_reg.Abs = 0; 2241 mesa_reg.HasIndex2 = GL_FALSE; 2242 mesa_reg.RelAddr2 = 0; 2243 mesa_reg.Index2 = 0; 2244 2245 return mesa_reg; 2246 } 2247 2248 static void 2249 set_branchtargets(ir_to_mesa_visitor *v, 2250 struct prog_instruction *mesa_instructions, 2251 int num_instructions) 2252 { 2253 int if_count = 0, loop_count = 0; 2254 int *if_stack, *loop_stack; 2255 int if_stack_pos = 0, loop_stack_pos = 0; 2256 int i, j; 2257 2258 for (i = 0; i < num_instructions; i++) { 2259 switch (mesa_instructions[i].Opcode) { 2260 case OPCODE_IF: 2261 if_count++; 2262 break; 2263 case OPCODE_BGNLOOP: 2264 loop_count++; 2265 break; 2266 case OPCODE_BRK: 2267 case OPCODE_CONT: 2268 mesa_instructions[i].BranchTarget = -1; 2269 break; 2270 default: 2271 break; 2272 } 2273 } 2274 2275 if_stack = rzalloc_array(v->mem_ctx, int, if_count); 2276 loop_stack = rzalloc_array(v->mem_ctx, int, loop_count); 2277 2278 for (i = 0; i < num_instructions; i++) { 2279 switch (mesa_instructions[i].Opcode) { 2280 case OPCODE_IF: 2281 if_stack[if_stack_pos] = i; 2282 if_stack_pos++; 2283 break; 2284 case OPCODE_ELSE: 2285 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; 2286 if_stack[if_stack_pos - 1] = i; 2287 break; 2288 case OPCODE_ENDIF: 2289 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; 2290 if_stack_pos--; 2291 break; 2292 case OPCODE_BGNLOOP: 2293 loop_stack[loop_stack_pos] = i; 2294 loop_stack_pos++; 2295 break; 2296 case OPCODE_ENDLOOP: 2297 loop_stack_pos--; 2298 /* Rewrite any breaks/conts at this nesting level (haven't 2299 * already had a BranchTarget assigned) to point to the end 2300 * of the loop. 2301 */ 2302 for (j = loop_stack[loop_stack_pos]; j < i; j++) { 2303 if (mesa_instructions[j].Opcode == OPCODE_BRK || 2304 mesa_instructions[j].Opcode == OPCODE_CONT) { 2305 if (mesa_instructions[j].BranchTarget == -1) { 2306 mesa_instructions[j].BranchTarget = i; 2307 } 2308 } 2309 } 2310 /* The loop ends point at each other. */ 2311 mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos]; 2312 mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i; 2313 break; 2314 case OPCODE_CAL: 2315 foreach_iter(exec_list_iterator, iter, v->function_signatures) { 2316 function_entry *entry = (function_entry *)iter.get(); 2317 2318 if (entry->sig_id == mesa_instructions[i].BranchTarget) { 2319 mesa_instructions[i].BranchTarget = entry->inst; 2320 break; 2321 } 2322 } 2323 break; 2324 default: 2325 break; 2326 } 2327 } 2328 } 2329 2330 static void 2331 print_program(struct prog_instruction *mesa_instructions, 2332 ir_instruction **mesa_instruction_annotation, 2333 int num_instructions) 2334 { 2335 ir_instruction *last_ir = NULL; 2336 int i; 2337 int indent = 0; 2338 2339 for (i = 0; i < num_instructions; i++) { 2340 struct prog_instruction *mesa_inst = mesa_instructions + i; 2341 ir_instruction *ir = mesa_instruction_annotation[i]; 2342 2343 fprintf(stdout, "%3d: ", i); 2344 2345 if (last_ir != ir && ir) { 2346 int j; 2347 2348 for (j = 0; j < indent; j++) { 2349 fprintf(stdout, " "); 2350 } 2351 ir->print(); 2352 printf("\n"); 2353 last_ir = ir; 2354 2355 fprintf(stdout, " "); /* line number spacing. */ 2356 } 2357 2358 indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent, 2359 PROG_PRINT_DEBUG, NULL); 2360 } 2361 } 2362 2363 class add_uniform_to_shader : public uniform_field_visitor { 2364 public: 2365 add_uniform_to_shader(struct gl_shader_program *shader_program, 2366 struct gl_program_parameter_list *params) 2367 : shader_program(shader_program), params(params), idx(-1) 2368 { 2369 /* empty */ 2370 } 2371 2372 void process(ir_variable *var) 2373 { 2374 this->idx = -1; 2375 this->uniform_field_visitor::process(var); 2376 2377 var->location = this->idx; 2378 } 2379 2380 private: 2381 virtual void visit_field(const glsl_type *type, const char *name); 2382 2383 struct gl_shader_program *shader_program; 2384 struct gl_program_parameter_list *params; 2385 int idx; 2386 }; 2387 2388 void 2389 add_uniform_to_shader::visit_field(const glsl_type *type, const char *name) 2390 { 2391 unsigned int size; 2392 2393 if (type->is_vector() || type->is_scalar()) { 2394 size = type->vector_elements; 2395 } else { 2396 size = type_size(type) * 4; 2397 } 2398 2399 gl_register_file file; 2400 if (type->is_sampler() || 2401 (type->is_array() && type->fields.array->is_sampler())) { 2402 file = PROGRAM_SAMPLER; 2403 } else { 2404 file = PROGRAM_UNIFORM; 2405 } 2406 2407 int index = _mesa_lookup_parameter_index(params, -1, name); 2408 if (index < 0) { 2409 index = _mesa_add_parameter(params, file, name, size, type->gl_type, 2410 NULL, NULL, 0x0); 2411 2412 /* Sampler uniform values are stored in prog->SamplerUnits, 2413 * and the entry in that array is selected by this index we 2414 * store in ParameterValues[]. 2415 */ 2416 if (file == PROGRAM_SAMPLER) { 2417 unsigned location; 2418 const bool found = 2419 this->shader_program->UniformHash->get(location, 2420 params->Parameters[index].Name); 2421 assert(found); 2422 2423 if (!found) 2424 return; 2425 2426 struct gl_uniform_storage *storage = 2427 &this->shader_program->UniformStorage[location]; 2428 2429 for (unsigned int j = 0; j < size / 4; j++) 2430 params->ParameterValues[index + j][0].f = storage->sampler + j; 2431 } 2432 } 2433 2434 /* The first part of the uniform that's processed determines the base 2435 * location of the whole uniform (for structures). 2436 */ 2437 if (this->idx < 0) 2438 this->idx = index; 2439 } 2440 2441 /** 2442 * Generate the program parameters list for the user uniforms in a shader 2443 * 2444 * \param shader_program Linked shader program. This is only used to 2445 * emit possible link errors to the info log. 2446 * \param sh Shader whose uniforms are to be processed. 2447 * \param params Parameter list to be filled in. 2448 */ 2449 void 2450 _mesa_generate_parameters_list_for_uniforms(struct gl_shader_program 2451 *shader_program, 2452 struct gl_shader *sh, 2453 struct gl_program_parameter_list 2454 *params) 2455 { 2456 add_uniform_to_shader add(shader_program, params); 2457 2458 foreach_list(node, sh->ir) { 2459 ir_variable *var = ((ir_instruction *) node)->as_variable(); 2460 2461 if ((var == NULL) || (var->mode != ir_var_uniform) 2462 || var->uniform_block != -1 || (strncmp(var->name, "gl_", 3) == 0)) 2463 continue; 2464 2465 add.process(var); 2466 } 2467 } 2468 2469 void 2470 _mesa_associate_uniform_storage(struct gl_context *ctx, 2471 struct gl_shader_program *shader_program, 2472 struct gl_program_parameter_list *params) 2473 { 2474 /* After adding each uniform to the parameter list, connect the storage for 2475 * the parameter with the tracking structure used by the API for the 2476 * uniform. 2477 */ 2478 unsigned last_location = unsigned(~0); 2479 for (unsigned i = 0; i < params->NumParameters; i++) { 2480 if (params->Parameters[i].Type != PROGRAM_UNIFORM) 2481 continue; 2482 2483 unsigned location; 2484 const bool found = 2485 shader_program->UniformHash->get(location, params->Parameters[i].Name); 2486 assert(found); 2487 2488 if (!found) 2489 continue; 2490 2491 if (location != last_location) { 2492 struct gl_uniform_storage *storage = 2493 &shader_program->UniformStorage[location]; 2494 enum gl_uniform_driver_format format = uniform_native; 2495 2496 unsigned columns = 0; 2497 switch (storage->type->base_type) { 2498 case GLSL_TYPE_UINT: 2499 assert(ctx->Const.NativeIntegers); 2500 format = uniform_native; 2501 columns = 1; 2502 break; 2503 case GLSL_TYPE_INT: 2504 format = 2505 (ctx->Const.NativeIntegers) ? uniform_native : uniform_int_float; 2506 columns = 1; 2507 break; 2508 case GLSL_TYPE_FLOAT: 2509 format = uniform_native; 2510 columns = storage->type->matrix_columns; 2511 break; 2512 case GLSL_TYPE_BOOL: 2513 if (ctx->Const.NativeIntegers) { 2514 format = (ctx->Const.UniformBooleanTrue == 1) 2515 ? uniform_bool_int_0_1 : uniform_bool_int_0_not0; 2516 } else { 2517 format = uniform_bool_float; 2518 } 2519 columns = 1; 2520 break; 2521 case GLSL_TYPE_SAMPLER: 2522 format = uniform_native; 2523 columns = 1; 2524 break; 2525 default: 2526 assert(!"Should not get here."); 2527 break; 2528 } 2529 2530 _mesa_uniform_attach_driver_storage(storage, 2531 4 * sizeof(float) * columns, 2532 4 * sizeof(float), 2533 format, 2534 ¶ms->ParameterValues[i]); 2535 2536 /* After attaching the driver's storage to the uniform, propagate any 2537 * data from the linker's backing store. This will cause values from 2538 * initializers in the source code to be copied over. 2539 */ 2540 _mesa_propagate_uniforms_to_driver_storage(storage, 2541 0, 2542 MAX2(1, storage->array_elements)); 2543 2544 last_location = location; 2545 } 2546 } 2547 } 2548 2549 /* 2550 * On a basic block basis, tracks available PROGRAM_TEMPORARY register 2551 * channels for copy propagation and updates following instructions to 2552 * use the original versions. 2553 * 2554 * The ir_to_mesa_visitor lazily produces code assuming that this pass 2555 * will occur. As an example, a TXP production before this pass: 2556 * 2557 * 0: MOV TEMP[1], INPUT[4].xyyy; 2558 * 1: MOV TEMP[1].w, INPUT[4].wwww; 2559 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; 2560 * 2561 * and after: 2562 * 2563 * 0: MOV TEMP[1], INPUT[4].xyyy; 2564 * 1: MOV TEMP[1].w, INPUT[4].wwww; 2565 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 2566 * 2567 * which allows for dead code elimination on TEMP[1]'s writes. 2568 */ 2569 void 2570 ir_to_mesa_visitor::copy_propagate(void) 2571 { 2572 ir_to_mesa_instruction **acp = rzalloc_array(mem_ctx, 2573 ir_to_mesa_instruction *, 2574 this->next_temp * 4); 2575 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 2576 int level = 0; 2577 2578 foreach_iter(exec_list_iterator, iter, this->instructions) { 2579 ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get(); 2580 2581 assert(inst->dst.file != PROGRAM_TEMPORARY 2582 || inst->dst.index < this->next_temp); 2583 2584 /* First, do any copy propagation possible into the src regs. */ 2585 for (int r = 0; r < 3; r++) { 2586 ir_to_mesa_instruction *first = NULL; 2587 bool good = true; 2588 int acp_base = inst->src[r].index * 4; 2589 2590 if (inst->src[r].file != PROGRAM_TEMPORARY || 2591 inst->src[r].reladdr) 2592 continue; 2593 2594 /* See if we can find entries in the ACP consisting of MOVs 2595 * from the same src register for all the swizzled channels 2596 * of this src register reference. 2597 */ 2598 for (int i = 0; i < 4; i++) { 2599 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 2600 ir_to_mesa_instruction *copy_chan = acp[acp_base + src_chan]; 2601 2602 if (!copy_chan) { 2603 good = false; 2604 break; 2605 } 2606 2607 assert(acp_level[acp_base + src_chan] <= level); 2608 2609 if (!first) { 2610 first = copy_chan; 2611 } else { 2612 if (first->src[0].file != copy_chan->src[0].file || 2613 first->src[0].index != copy_chan->src[0].index) { 2614 good = false; 2615 break; 2616 } 2617 } 2618 } 2619 2620 if (good) { 2621 /* We've now validated that we can copy-propagate to 2622 * replace this src register reference. Do it. 2623 */ 2624 inst->src[r].file = first->src[0].file; 2625 inst->src[r].index = first->src[0].index; 2626 2627 int swizzle = 0; 2628 for (int i = 0; i < 4; i++) { 2629 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 2630 ir_to_mesa_instruction *copy_inst = acp[acp_base + src_chan]; 2631 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << 2632 (3 * i)); 2633 } 2634 inst->src[r].swizzle = swizzle; 2635 } 2636 } 2637 2638 switch (inst->op) { 2639 case OPCODE_BGNLOOP: 2640 case OPCODE_ENDLOOP: 2641 /* End of a basic block, clear the ACP entirely. */ 2642 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 2643 break; 2644 2645 case OPCODE_IF: 2646 ++level; 2647 break; 2648 2649 case OPCODE_ENDIF: 2650 case OPCODE_ELSE: 2651 /* Clear all channels written inside the block from the ACP, but 2652 * leaving those that were not touched. 2653 */ 2654 for (int r = 0; r < this->next_temp; r++) { 2655 for (int c = 0; c < 4; c++) { 2656 if (!acp[4 * r + c]) 2657 continue; 2658 2659 if (acp_level[4 * r + c] >= level) 2660 acp[4 * r + c] = NULL; 2661 } 2662 } 2663 if (inst->op == OPCODE_ENDIF) 2664 --level; 2665 break; 2666 2667 default: 2668 /* Continuing the block, clear any written channels from 2669 * the ACP. 2670 */ 2671 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { 2672 /* Any temporary might be written, so no copy propagation 2673 * across this instruction. 2674 */ 2675 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 2676 } else if (inst->dst.file == PROGRAM_OUTPUT && 2677 inst->dst.reladdr) { 2678 /* Any output might be written, so no copy propagation 2679 * from outputs across this instruction. 2680 */ 2681 for (int r = 0; r < this->next_temp; r++) { 2682 for (int c = 0; c < 4; c++) { 2683 if (!acp[4 * r + c]) 2684 continue; 2685 2686 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) 2687 acp[4 * r + c] = NULL; 2688 } 2689 } 2690 } else if (inst->dst.file == PROGRAM_TEMPORARY || 2691 inst->dst.file == PROGRAM_OUTPUT) { 2692 /* Clear where it's used as dst. */ 2693 if (inst->dst.file == PROGRAM_TEMPORARY) { 2694 for (int c = 0; c < 4; c++) { 2695 if (inst->dst.writemask & (1 << c)) { 2696 acp[4 * inst->dst.index + c] = NULL; 2697 } 2698 } 2699 } 2700 2701 /* Clear where it's used as src. */ 2702 for (int r = 0; r < this->next_temp; r++) { 2703 for (int c = 0; c < 4; c++) { 2704 if (!acp[4 * r + c]) 2705 continue; 2706 2707 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); 2708 2709 if (acp[4 * r + c]->src[0].file == inst->dst.file && 2710 acp[4 * r + c]->src[0].index == inst->dst.index && 2711 inst->dst.writemask & (1 << src_chan)) 2712 { 2713 acp[4 * r + c] = NULL; 2714 } 2715 } 2716 } 2717 } 2718 break; 2719 } 2720 2721 /* If this is a copy, add it to the ACP. */ 2722 if (inst->op == OPCODE_MOV && 2723 inst->dst.file == PROGRAM_TEMPORARY && 2724 !inst->dst.reladdr && 2725 !inst->saturate && 2726 !inst->src[0].reladdr && 2727 !inst->src[0].negate) { 2728 for (int i = 0; i < 4; i++) { 2729 if (inst->dst.writemask & (1 << i)) { 2730 acp[4 * inst->dst.index + i] = inst; 2731 acp_level[4 * inst->dst.index + i] = level; 2732 } 2733 } 2734 } 2735 } 2736 2737 ralloc_free(acp_level); 2738 ralloc_free(acp); 2739 } 2740 2741 2742 /** 2743 * Convert a shader's GLSL IR into a Mesa gl_program. 2744 */ 2745 static struct gl_program * 2746 get_mesa_program(struct gl_context *ctx, 2747 struct gl_shader_program *shader_program, 2748 struct gl_shader *shader) 2749 { 2750 ir_to_mesa_visitor v; 2751 struct prog_instruction *mesa_instructions, *mesa_inst; 2752 ir_instruction **mesa_instruction_annotation; 2753 int i; 2754 struct gl_program *prog; 2755 GLenum target; 2756 const char *target_string; 2757 struct gl_shader_compiler_options *options = 2758 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; 2759 2760 switch (shader->Type) { 2761 case GL_VERTEX_SHADER: 2762 target = GL_VERTEX_PROGRAM_ARB; 2763 target_string = "vertex"; 2764 break; 2765 case GL_FRAGMENT_SHADER: 2766 target = GL_FRAGMENT_PROGRAM_ARB; 2767 target_string = "fragment"; 2768 break; 2769 case GL_GEOMETRY_SHADER: 2770 target = GL_GEOMETRY_PROGRAM_NV; 2771 target_string = "geometry"; 2772 break; 2773 default: 2774 assert(!"should not be reached"); 2775 return NULL; 2776 } 2777 2778 validate_ir_tree(shader->ir); 2779 2780 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name); 2781 if (!prog) 2782 return NULL; 2783 prog->Parameters = _mesa_new_parameter_list(); 2784 v.ctx = ctx; 2785 v.prog = prog; 2786 v.shader_program = shader_program; 2787 v.options = options; 2788 2789 _mesa_generate_parameters_list_for_uniforms(shader_program, shader, 2790 prog->Parameters); 2791 2792 /* Emit Mesa IR for main(). */ 2793 visit_exec_list(shader->ir, &v); 2794 v.emit(NULL, OPCODE_END); 2795 2796 prog->NumTemporaries = v.next_temp; 2797 2798 int num_instructions = 0; 2799 foreach_iter(exec_list_iterator, iter, v.instructions) { 2800 num_instructions++; 2801 } 2802 2803 mesa_instructions = 2804 (struct prog_instruction *)calloc(num_instructions, 2805 sizeof(*mesa_instructions)); 2806 mesa_instruction_annotation = ralloc_array(v.mem_ctx, ir_instruction *, 2807 num_instructions); 2808 2809 v.copy_propagate(); 2810 2811 /* Convert ir_mesa_instructions into prog_instructions. 2812 */ 2813 mesa_inst = mesa_instructions; 2814 i = 0; 2815 foreach_iter(exec_list_iterator, iter, v.instructions) { 2816 const ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get(); 2817 2818 mesa_inst->Opcode = inst->op; 2819 mesa_inst->CondUpdate = inst->cond_update; 2820 if (inst->saturate) 2821 mesa_inst->SaturateMode = SATURATE_ZERO_ONE; 2822 mesa_inst->DstReg.File = inst->dst.file; 2823 mesa_inst->DstReg.Index = inst->dst.index; 2824 mesa_inst->DstReg.CondMask = inst->dst.cond_mask; 2825 mesa_inst->DstReg.WriteMask = inst->dst.writemask; 2826 mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL; 2827 mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src[0]); 2828 mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src[1]); 2829 mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src[2]); 2830 mesa_inst->TexSrcUnit = inst->sampler; 2831 mesa_inst->TexSrcTarget = inst->tex_target; 2832 mesa_inst->TexShadow = inst->tex_shadow; 2833 mesa_instruction_annotation[i] = inst->ir; 2834 2835 /* Set IndirectRegisterFiles. */ 2836 if (mesa_inst->DstReg.RelAddr) 2837 prog->IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File; 2838 2839 /* Update program's bitmask of indirectly accessed register files */ 2840 for (unsigned src = 0; src < 3; src++) 2841 if (mesa_inst->SrcReg[src].RelAddr) 2842 prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File; 2843 2844 switch (mesa_inst->Opcode) { 2845 case OPCODE_IF: 2846 if (options->MaxIfDepth == 0) { 2847 linker_warning(shader_program, 2848 "Couldn't flatten if-statement. " 2849 "This will likely result in software " 2850 "rasterization.\n"); 2851 } 2852 break; 2853 case OPCODE_BGNLOOP: 2854 if (options->EmitNoLoops) { 2855 linker_warning(shader_program, 2856 "Couldn't unroll loop. " 2857 "This will likely result in software " 2858 "rasterization.\n"); 2859 } 2860 break; 2861 case OPCODE_CONT: 2862 if (options->EmitNoCont) { 2863 linker_warning(shader_program, 2864 "Couldn't lower continue-statement. " 2865 "This will likely result in software " 2866 "rasterization.\n"); 2867 } 2868 break; 2869 case OPCODE_ARL: 2870 prog->NumAddressRegs = 1; 2871 break; 2872 default: 2873 break; 2874 } 2875 2876 mesa_inst++; 2877 i++; 2878 2879 if (!shader_program->LinkStatus) 2880 break; 2881 } 2882 2883 if (!shader_program->LinkStatus) { 2884 goto fail_exit; 2885 } 2886 2887 set_branchtargets(&v, mesa_instructions, num_instructions); 2888 2889 if (ctx->Shader.Flags & GLSL_DUMP) { 2890 printf("\n"); 2891 printf("GLSL IR for linked %s program %d:\n", target_string, 2892 shader_program->Name); 2893 _mesa_print_ir(shader->ir, NULL); 2894 printf("\n"); 2895 printf("\n"); 2896 printf("Mesa IR for linked %s program %d:\n", target_string, 2897 shader_program->Name); 2898 print_program(mesa_instructions, mesa_instruction_annotation, 2899 num_instructions); 2900 } 2901 2902 prog->Instructions = mesa_instructions; 2903 prog->NumInstructions = num_instructions; 2904 2905 /* Setting this to NULL prevents a possible double free in the fail_exit 2906 * path (far below). 2907 */ 2908 mesa_instructions = NULL; 2909 2910 do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER); 2911 2912 prog->SamplersUsed = shader->active_samplers; 2913 prog->ShadowSamplers = shader->shadow_samplers; 2914 _mesa_update_shader_textures_used(shader_program, prog); 2915 2916 /* Set the gl_FragDepth layout. */ 2917 if (target == GL_FRAGMENT_PROGRAM_ARB) { 2918 struct gl_fragment_program *fp = (struct gl_fragment_program *)prog; 2919 fp->FragDepthLayout = shader_program->FragDepthLayout; 2920 } 2921 2922 _mesa_reference_program(ctx, &shader->Program, prog); 2923 2924 if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) { 2925 _mesa_optimize_program(ctx, prog); 2926 } 2927 2928 /* This has to be done last. Any operation that can cause 2929 * prog->ParameterValues to get reallocated (e.g., anything that adds a 2930 * program constant) has to happen before creating this linkage. 2931 */ 2932 _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters); 2933 if (!shader_program->LinkStatus) { 2934 goto fail_exit; 2935 } 2936 2937 return prog; 2938 2939 fail_exit: 2940 free(mesa_instructions); 2941 _mesa_reference_program(ctx, &shader->Program, NULL); 2942 return NULL; 2943 } 2944 2945 extern "C" { 2946 2947 /** 2948 * Link a shader. 2949 * Called via ctx->Driver.LinkShader() 2950 * This actually involves converting GLSL IR into Mesa gl_programs with 2951 * code lowering and other optimizations. 2952 */ 2953 GLboolean 2954 _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 2955 { 2956 assert(prog->LinkStatus); 2957 2958 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 2959 if (prog->_LinkedShaders[i] == NULL) 2960 continue; 2961 2962 bool progress; 2963 exec_list *ir = prog->_LinkedShaders[i]->ir; 2964 const struct gl_shader_compiler_options *options = 2965 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)]; 2966 2967 do { 2968 progress = false; 2969 2970 /* Lowering */ 2971 do_mat_op_to_vec(ir); 2972 lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2 2973 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP 2974 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); 2975 2976 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; 2977 2978 progress = do_common_optimization(ir, true, true, 2979 options->MaxUnrollIterations) 2980 || progress; 2981 2982 progress = lower_quadop_vector(ir, true) || progress; 2983 2984 if (options->MaxIfDepth == 0) 2985 progress = lower_discard(ir) || progress; 2986 2987 progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress; 2988 2989 if (options->EmitNoNoise) 2990 progress = lower_noise(ir) || progress; 2991 2992 /* If there are forms of indirect addressing that the driver 2993 * cannot handle, perform the lowering pass. 2994 */ 2995 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput 2996 || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) 2997 progress = 2998 lower_variable_index_to_cond_assign(ir, 2999 options->EmitNoIndirectInput, 3000 options->EmitNoIndirectOutput, 3001 options->EmitNoIndirectTemp, 3002 options->EmitNoIndirectUniform) 3003 || progress; 3004 3005 progress = do_vec_index_to_cond_assign(ir) || progress; 3006 } while (progress); 3007 3008 validate_ir_tree(ir); 3009 } 3010 3011 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 3012 struct gl_program *linked_prog; 3013 3014 if (prog->_LinkedShaders[i] == NULL) 3015 continue; 3016 3017 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); 3018 3019 if (linked_prog) { 3020 static const GLenum targets[] = { 3021 GL_VERTEX_PROGRAM_ARB, 3022 GL_FRAGMENT_PROGRAM_ARB, 3023 GL_GEOMETRY_PROGRAM_NV 3024 }; 3025 3026 if (i == MESA_SHADER_VERTEX) { 3027 ((struct gl_vertex_program *)linked_prog)->UsesClipDistance 3028 = prog->Vert.UsesClipDistance; 3029 } 3030 3031 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, 3032 linked_prog); 3033 if (!ctx->Driver.ProgramStringNotify(ctx, targets[i], linked_prog)) { 3034 return GL_FALSE; 3035 } 3036 } 3037 3038 _mesa_reference_program(ctx, &linked_prog, NULL); 3039 } 3040 3041 return prog->LinkStatus; 3042 } 3043 3044 3045 /** 3046 * Compile a GLSL shader. Called via glCompileShader(). 3047 */ 3048 void 3049 _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader) 3050 { 3051 struct _mesa_glsl_parse_state *state = 3052 new(shader) _mesa_glsl_parse_state(ctx, shader->Type, shader); 3053 3054 const char *source = shader->Source; 3055 /* Check if the user called glCompileShader without first calling 3056 * glShaderSource. This should fail to compile, but not raise a GL_ERROR. 3057 */ 3058 if (source == NULL) { 3059 shader->CompileStatus = GL_FALSE; 3060 return; 3061 } 3062 3063 state->error = glcpp_preprocess(state, &source, &state->info_log, 3064 &ctx->Extensions, ctx->API); 3065 3066 if (ctx->Shader.Flags & GLSL_DUMP) { 3067 printf("GLSL source for %s shader %d:\n", 3068 _mesa_glsl_shader_target_name(state->target), shader->Name); 3069 printf("%s\n", shader->Source); 3070 } 3071 3072 if (!state->error) { 3073 _mesa_glsl_lexer_ctor(state, source); 3074 _mesa_glsl_parse(state); 3075 _mesa_glsl_lexer_dtor(state); 3076 } 3077 3078 ralloc_free(shader->ir); 3079 shader->ir = new(shader) exec_list; 3080 if (!state->error && !state->translation_unit.is_empty()) 3081 _mesa_ast_to_hir(shader->ir, state); 3082 3083 if (!state->error && !shader->ir->is_empty()) { 3084 validate_ir_tree(shader->ir); 3085 3086 /* Do some optimization at compile time to reduce shader IR size 3087 * and reduce later work if the same shader is linked multiple times 3088 */ 3089 while (do_common_optimization(shader->ir, false, false, 32)) 3090 ; 3091 3092 validate_ir_tree(shader->ir); 3093 } 3094 3095 shader->symbols = state->symbols; 3096 3097 shader->CompileStatus = !state->error; 3098 shader->InfoLog = state->info_log; 3099 shader->Version = state->language_version; 3100 memcpy(shader->builtins_to_link, state->builtins_to_link, 3101 sizeof(shader->builtins_to_link[0]) * state->num_builtins_to_link); 3102 shader->num_builtins_to_link = state->num_builtins_to_link; 3103 3104 if (ctx->Shader.Flags & GLSL_LOG) { 3105 _mesa_write_shader_to_file(shader); 3106 } 3107 3108 if (ctx->Shader.Flags & GLSL_DUMP) { 3109 if (shader->CompileStatus) { 3110 printf("GLSL IR for shader %d:\n", shader->Name); 3111 _mesa_print_ir(shader->ir, NULL); 3112 printf("\n\n"); 3113 } else { 3114 printf("GLSL shader %d failed to compile.\n", shader->Name); 3115 } 3116 if (shader->InfoLog && shader->InfoLog[0] != 0) { 3117 printf("GLSL shader %d info log:\n", shader->Name); 3118 printf("%s\n", shader->InfoLog); 3119 } 3120 } 3121 3122 if (shader->UniformBlocks) 3123 ralloc_free(shader->UniformBlocks); 3124 shader->NumUniformBlocks = state->num_uniform_blocks; 3125 shader->UniformBlocks = state->uniform_blocks; 3126 ralloc_steal(shader, shader->UniformBlocks); 3127 3128 /* Retain any live IR, but trash the rest. */ 3129 reparent_ir(shader->ir, shader->ir); 3130 3131 ralloc_free(state); 3132 } 3133 3134 3135 /** 3136 * Link a GLSL shader program. Called via glLinkProgram(). 3137 */ 3138 void 3139 _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 3140 { 3141 unsigned int i; 3142 3143 _mesa_clear_shader_program_data(ctx, prog); 3144 3145 prog->LinkStatus = GL_TRUE; 3146 3147 for (i = 0; i < prog->NumShaders; i++) { 3148 if (!prog->Shaders[i]->CompileStatus) { 3149 linker_error(prog, "linking with uncompiled shader"); 3150 prog->LinkStatus = GL_FALSE; 3151 } 3152 } 3153 3154 if (prog->LinkStatus) { 3155 link_shaders(ctx, prog); 3156 } 3157 3158 if (prog->LinkStatus) { 3159 if (!ctx->Driver.LinkShader(ctx, prog)) { 3160 prog->LinkStatus = GL_FALSE; 3161 } 3162 } 3163 3164 if (ctx->Shader.Flags & GLSL_DUMP) { 3165 if (!prog->LinkStatus) { 3166 printf("GLSL shader program %d failed to link\n", prog->Name); 3167 } 3168 3169 if (prog->InfoLog && prog->InfoLog[0] != 0) { 3170 printf("GLSL shader program %d info log:\n", prog->Name); 3171 printf("%s\n", prog->InfoLog); 3172 } 3173 } 3174 } 3175 3176 } /* extern "C" */ 3177