1 /* 2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. 3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved. 4 * Copyright 2010 Intel Corporation 5 * Copyright 2011 Bryan Cain 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 * DEALINGS IN THE SOFTWARE. 25 */ 26 27 /** 28 * \file glsl_to_tgsi.cpp 29 * 30 * Translate GLSL IR to TGSI. 31 */ 32 33 #include <stdio.h> 34 #include "main/compiler.h" 35 #include "ir.h" 36 #include "ir_visitor.h" 37 #include "ir_print_visitor.h" 38 #include "ir_expression_flattening.h" 39 #include "glsl_types.h" 40 #include "glsl_parser_extras.h" 41 #include "../glsl/program.h" 42 #include "ir_optimization.h" 43 #include "ast.h" 44 45 #include "main/mtypes.h" 46 #include "main/shaderobj.h" 47 #include "program/hash_table.h" 48 49 extern "C" { 50 #include "main/shaderapi.h" 51 #include "main/uniforms.h" 52 #include "program/prog_instruction.h" 53 #include "program/prog_optimize.h" 54 #include "program/prog_print.h" 55 #include "program/program.h" 56 #include "program/prog_parameter.h" 57 #include "program/sampler.h" 58 59 #include "pipe/p_compiler.h" 60 #include "pipe/p_context.h" 61 #include "pipe/p_screen.h" 62 #include "pipe/p_shader_tokens.h" 63 #include "pipe/p_state.h" 64 #include "util/u_math.h" 65 #include "tgsi/tgsi_ureg.h" 66 #include "tgsi/tgsi_info.h" 67 #include "st_context.h" 68 #include "st_program.h" 69 #include "st_glsl_to_tgsi.h" 70 #include "st_mesa_to_tgsi.h" 71 } 72 73 #define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX 74 #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \ 75 (1 << PROGRAM_ENV_PARAM) | \ 76 (1 << PROGRAM_STATE_VAR) | \ 77 (1 << PROGRAM_NAMED_PARAM) | \ 78 (1 << PROGRAM_CONSTANT) | \ 79 (1 << PROGRAM_UNIFORM)) 80 81 /** 82 * Maximum number of temporary registers. 83 * 84 * It is too big for stack allocated arrays -- it will cause stack overflow on 85 * Windows and likely Mac OS X. 86 */ 87 #define MAX_TEMPS 4096 88 89 /* will be 4 for GLSL 4.00 */ 90 #define MAX_GLSL_TEXTURE_OFFSET 1 91 92 class st_src_reg; 93 class st_dst_reg; 94 95 static int swizzle_for_size(int size); 96 97 /** 98 * This struct is a corresponding struct to TGSI ureg_src. 99 */ 100 class st_src_reg { 101 public: 102 st_src_reg(gl_register_file file, int index, const glsl_type *type) 103 { 104 this->file = file; 105 this->index = index; 106 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) 107 this->swizzle = swizzle_for_size(type->vector_elements); 108 else 109 this->swizzle = SWIZZLE_XYZW; 110 this->negate = 0; 111 this->type = type ? type->base_type : GLSL_TYPE_ERROR; 112 this->reladdr = NULL; 113 } 114 115 st_src_reg(gl_register_file file, int index, int type) 116 { 117 this->type = type; 118 this->file = file; 119 this->index = index; 120 this->swizzle = SWIZZLE_XYZW; 121 this->negate = 0; 122 this->reladdr = NULL; 123 } 124 125 st_src_reg() 126 { 127 this->type = GLSL_TYPE_ERROR; 128 this->file = PROGRAM_UNDEFINED; 129 this->index = 0; 130 this->swizzle = 0; 131 this->negate = 0; 132 this->reladdr = NULL; 133 } 134 135 explicit st_src_reg(st_dst_reg reg); 136 137 gl_register_file file; /**< PROGRAM_* from Mesa */ 138 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 139 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ 140 int negate; /**< NEGATE_XYZW mask from mesa */ 141 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 142 /** Register index should be offset by the integer in this reg. */ 143 st_src_reg *reladdr; 144 }; 145 146 class st_dst_reg { 147 public: 148 st_dst_reg(gl_register_file file, int writemask, int type) 149 { 150 this->file = file; 151 this->index = 0; 152 this->writemask = writemask; 153 this->cond_mask = COND_TR; 154 this->reladdr = NULL; 155 this->type = type; 156 } 157 158 st_dst_reg() 159 { 160 this->type = GLSL_TYPE_ERROR; 161 this->file = PROGRAM_UNDEFINED; 162 this->index = 0; 163 this->writemask = 0; 164 this->cond_mask = COND_TR; 165 this->reladdr = NULL; 166 } 167 168 explicit st_dst_reg(st_src_reg reg); 169 170 gl_register_file file; /**< PROGRAM_* from Mesa */ 171 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */ 172 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ 173 GLuint cond_mask:4; 174 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */ 175 /** Register index should be offset by the integer in this reg. */ 176 st_src_reg *reladdr; 177 }; 178 179 st_src_reg::st_src_reg(st_dst_reg reg) 180 { 181 this->type = reg.type; 182 this->file = reg.file; 183 this->index = reg.index; 184 this->swizzle = SWIZZLE_XYZW; 185 this->negate = 0; 186 this->reladdr = reg.reladdr; 187 } 188 189 st_dst_reg::st_dst_reg(st_src_reg reg) 190 { 191 this->type = reg.type; 192 this->file = reg.file; 193 this->index = reg.index; 194 this->writemask = WRITEMASK_XYZW; 195 this->cond_mask = COND_TR; 196 this->reladdr = reg.reladdr; 197 } 198 199 class glsl_to_tgsi_instruction : public exec_node { 200 public: 201 /* Callers of this ralloc-based new need not call delete. It's 202 * easier to just ralloc_free 'ctx' (or any of its ancestors). */ 203 static void* operator new(size_t size, void *ctx) 204 { 205 void *node; 206 207 node = rzalloc_size(ctx, size); 208 assert(node != NULL); 209 210 return node; 211 } 212 213 unsigned op; 214 st_dst_reg dst; 215 st_src_reg src[3]; 216 /** Pointer to the ir source this tree came from for debugging */ 217 ir_instruction *ir; 218 GLboolean cond_update; 219 bool saturate; 220 int sampler; /**< sampler index */ 221 int tex_target; /**< One of TEXTURE_*_INDEX */ 222 GLboolean tex_shadow; 223 struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET]; 224 unsigned tex_offset_num_offset; 225 int dead_mask; /**< Used in dead code elimination */ 226 227 class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */ 228 }; 229 230 class variable_storage : public exec_node { 231 public: 232 variable_storage(ir_variable *var, gl_register_file file, int index) 233 : file(file), index(index), var(var) 234 { 235 /* empty */ 236 } 237 238 gl_register_file file; 239 int index; 240 ir_variable *var; /* variable that maps to this, if any */ 241 }; 242 243 class immediate_storage : public exec_node { 244 public: 245 immediate_storage(gl_constant_value *values, int size, int type) 246 { 247 memcpy(this->values, values, size * sizeof(gl_constant_value)); 248 this->size = size; 249 this->type = type; 250 } 251 252 gl_constant_value values[4]; 253 int size; /**< Number of components (1-4) */ 254 int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ 255 }; 256 257 class function_entry : public exec_node { 258 public: 259 ir_function_signature *sig; 260 261 /** 262 * identifier of this function signature used by the program. 263 * 264 * At the point that TGSI instructions for function calls are 265 * generated, we don't know the address of the first instruction of 266 * the function body. So we make the BranchTarget that is called a 267 * small integer and rewrite them during set_branchtargets(). 268 */ 269 int sig_id; 270 271 /** 272 * Pointer to first instruction of the function body. 273 * 274 * Set during function body emits after main() is processed. 275 */ 276 glsl_to_tgsi_instruction *bgn_inst; 277 278 /** 279 * Index of the first instruction of the function body in actual TGSI. 280 * 281 * Set after conversion from glsl_to_tgsi_instruction to TGSI. 282 */ 283 int inst; 284 285 /** Storage for the return value. */ 286 st_src_reg return_reg; 287 }; 288 289 class glsl_to_tgsi_visitor : public ir_visitor { 290 public: 291 glsl_to_tgsi_visitor(); 292 ~glsl_to_tgsi_visitor(); 293 294 function_entry *current_function; 295 296 struct gl_context *ctx; 297 struct gl_program *prog; 298 struct gl_shader_program *shader_program; 299 struct gl_shader_compiler_options *options; 300 301 int next_temp; 302 303 int num_address_regs; 304 int samplers_used; 305 bool indirect_addr_temps; 306 bool indirect_addr_consts; 307 308 int glsl_version; 309 bool native_integers; 310 311 variable_storage *find_variable_storage(ir_variable *var); 312 313 int add_constant(gl_register_file file, gl_constant_value values[4], 314 int size, int datatype, GLuint *swizzle_out); 315 316 function_entry *get_function_signature(ir_function_signature *sig); 317 318 st_src_reg get_temp(const glsl_type *type); 319 void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); 320 321 st_src_reg st_src_reg_for_float(float val); 322 st_src_reg st_src_reg_for_int(int val); 323 st_src_reg st_src_reg_for_type(int type, int val); 324 325 /** 326 * \name Visit methods 327 * 328 * As typical for the visitor pattern, there must be one \c visit method for 329 * each concrete subclass of \c ir_instruction. Virtual base classes within 330 * the hierarchy should not have \c visit methods. 331 */ 332 /*@{*/ 333 virtual void visit(ir_variable *); 334 virtual void visit(ir_loop *); 335 virtual void visit(ir_loop_jump *); 336 virtual void visit(ir_function_signature *); 337 virtual void visit(ir_function *); 338 virtual void visit(ir_expression *); 339 virtual void visit(ir_swizzle *); 340 virtual void visit(ir_dereference_variable *); 341 virtual void visit(ir_dereference_array *); 342 virtual void visit(ir_dereference_record *); 343 virtual void visit(ir_assignment *); 344 virtual void visit(ir_constant *); 345 virtual void visit(ir_call *); 346 virtual void visit(ir_return *); 347 virtual void visit(ir_discard *); 348 virtual void visit(ir_texture *); 349 virtual void visit(ir_if *); 350 /*@}*/ 351 352 st_src_reg result; 353 354 /** List of variable_storage */ 355 exec_list variables; 356 357 /** List of immediate_storage */ 358 exec_list immediates; 359 unsigned num_immediates; 360 361 /** List of function_entry */ 362 exec_list function_signatures; 363 int next_signature_id; 364 365 /** List of glsl_to_tgsi_instruction */ 366 exec_list instructions; 367 368 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op); 369 370 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 371 st_dst_reg dst, st_src_reg src0); 372 373 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 374 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 375 376 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op, 377 st_dst_reg dst, 378 st_src_reg src0, st_src_reg src1, st_src_reg src2); 379 380 unsigned get_opcode(ir_instruction *ir, unsigned op, 381 st_dst_reg dst, 382 st_src_reg src0, st_src_reg src1); 383 384 /** 385 * Emit the correct dot-product instruction for the type of arguments 386 */ 387 glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir, 388 st_dst_reg dst, 389 st_src_reg src0, 390 st_src_reg src1, 391 unsigned elements); 392 393 void emit_scalar(ir_instruction *ir, unsigned op, 394 st_dst_reg dst, st_src_reg src0); 395 396 void emit_scalar(ir_instruction *ir, unsigned op, 397 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 398 399 void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst); 400 401 void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); 402 403 void emit_scs(ir_instruction *ir, unsigned op, 404 st_dst_reg dst, const st_src_reg &src); 405 406 bool try_emit_mad(ir_expression *ir, 407 int mul_operand); 408 bool try_emit_mad_for_and_not(ir_expression *ir, 409 int mul_operand); 410 bool try_emit_sat(ir_expression *ir); 411 412 void emit_swz(ir_expression *ir); 413 414 bool process_move_condition(ir_rvalue *ir); 415 416 void simplify_cmp(void); 417 418 void rename_temp_register(int index, int new_index); 419 int get_first_temp_read(int index); 420 int get_first_temp_write(int index); 421 int get_last_temp_read(int index); 422 int get_last_temp_write(int index); 423 424 void copy_propagate(void); 425 void eliminate_dead_code(void); 426 int eliminate_dead_code_advanced(void); 427 void merge_registers(void); 428 void renumber_registers(void); 429 430 void *mem_ctx; 431 }; 432 433 static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); 434 435 static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); 436 437 static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT); 438 439 static void 440 fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); 441 442 static void 443 fail_link(struct gl_shader_program *prog, const char *fmt, ...) 444 { 445 va_list args; 446 va_start(args, fmt); 447 ralloc_vasprintf_append(&prog->InfoLog, fmt, args); 448 va_end(args); 449 450 prog->LinkStatus = GL_FALSE; 451 } 452 453 static int 454 swizzle_for_size(int size) 455 { 456 int size_swizzles[4] = { 457 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), 458 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), 459 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), 460 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), 461 }; 462 463 assert((size >= 1) && (size <= 4)); 464 return size_swizzles[size - 1]; 465 } 466 467 static bool 468 is_tex_instruction(unsigned opcode) 469 { 470 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 471 return info->is_tex; 472 } 473 474 static unsigned 475 num_inst_dst_regs(unsigned opcode) 476 { 477 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 478 return info->num_dst; 479 } 480 481 static unsigned 482 num_inst_src_regs(unsigned opcode) 483 { 484 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode); 485 return info->is_tex ? info->num_src - 1 : info->num_src; 486 } 487 488 glsl_to_tgsi_instruction * 489 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 490 st_dst_reg dst, 491 st_src_reg src0, st_src_reg src1, st_src_reg src2) 492 { 493 glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); 494 int num_reladdr = 0, i; 495 496 op = get_opcode(ir, op, dst, src0, src1); 497 498 /* If we have to do relative addressing, we want to load the ARL 499 * reg directly for one of the regs, and preload the other reladdr 500 * sources into temps. 501 */ 502 num_reladdr += dst.reladdr != NULL; 503 num_reladdr += src0.reladdr != NULL; 504 num_reladdr += src1.reladdr != NULL; 505 num_reladdr += src2.reladdr != NULL; 506 507 reladdr_to_temp(ir, &src2, &num_reladdr); 508 reladdr_to_temp(ir, &src1, &num_reladdr); 509 reladdr_to_temp(ir, &src0, &num_reladdr); 510 511 if (dst.reladdr) { 512 emit_arl(ir, address_reg, *dst.reladdr); 513 num_reladdr--; 514 } 515 assert(num_reladdr == 0); 516 517 inst->op = op; 518 inst->dst = dst; 519 inst->src[0] = src0; 520 inst->src[1] = src1; 521 inst->src[2] = src2; 522 inst->ir = ir; 523 inst->dead_mask = 0; 524 525 inst->function = NULL; 526 527 if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL) 528 this->num_address_regs = 1; 529 530 /* Update indirect addressing status used by TGSI */ 531 if (dst.reladdr) { 532 switch(dst.file) { 533 case PROGRAM_TEMPORARY: 534 this->indirect_addr_temps = true; 535 break; 536 case PROGRAM_LOCAL_PARAM: 537 case PROGRAM_ENV_PARAM: 538 case PROGRAM_STATE_VAR: 539 case PROGRAM_NAMED_PARAM: 540 case PROGRAM_CONSTANT: 541 case PROGRAM_UNIFORM: 542 this->indirect_addr_consts = true; 543 break; 544 case PROGRAM_IMMEDIATE: 545 assert(!"immediates should not have indirect addressing"); 546 break; 547 default: 548 break; 549 } 550 } 551 else { 552 for (i=0; i<3; i++) { 553 if(inst->src[i].reladdr) { 554 switch(inst->src[i].file) { 555 case PROGRAM_TEMPORARY: 556 this->indirect_addr_temps = true; 557 break; 558 case PROGRAM_LOCAL_PARAM: 559 case PROGRAM_ENV_PARAM: 560 case PROGRAM_STATE_VAR: 561 case PROGRAM_NAMED_PARAM: 562 case PROGRAM_CONSTANT: 563 case PROGRAM_UNIFORM: 564 this->indirect_addr_consts = true; 565 break; 566 case PROGRAM_IMMEDIATE: 567 assert(!"immediates should not have indirect addressing"); 568 break; 569 default: 570 break; 571 } 572 } 573 } 574 } 575 576 this->instructions.push_tail(inst); 577 578 if (native_integers) 579 try_emit_float_set(ir, op, dst); 580 581 return inst; 582 } 583 584 585 glsl_to_tgsi_instruction * 586 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 587 st_dst_reg dst, st_src_reg src0, st_src_reg src1) 588 { 589 return emit(ir, op, dst, src0, src1, undef_src); 590 } 591 592 glsl_to_tgsi_instruction * 593 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op, 594 st_dst_reg dst, st_src_reg src0) 595 { 596 assert(dst.writemask != 0); 597 return emit(ir, op, dst, src0, undef_src, undef_src); 598 } 599 600 glsl_to_tgsi_instruction * 601 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op) 602 { 603 return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); 604 } 605 606 /** 607 * Emits the code to convert the result of float SET instructions to integers. 608 */ 609 void 610 glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op, 611 st_dst_reg dst) 612 { 613 if ((op == TGSI_OPCODE_SEQ || 614 op == TGSI_OPCODE_SNE || 615 op == TGSI_OPCODE_SGE || 616 op == TGSI_OPCODE_SLT)) 617 { 618 st_src_reg src = st_src_reg(dst); 619 src.negate = ~src.negate; 620 dst.type = GLSL_TYPE_FLOAT; 621 emit(ir, TGSI_OPCODE_F2I, dst, src); 622 } 623 } 624 625 /** 626 * Determines whether to use an integer, unsigned integer, or float opcode 627 * based on the operands and input opcode, then emits the result. 628 */ 629 unsigned 630 glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op, 631 st_dst_reg dst, 632 st_src_reg src0, st_src_reg src1) 633 { 634 int type = GLSL_TYPE_FLOAT; 635 636 assert(src0.type != GLSL_TYPE_ARRAY); 637 assert(src0.type != GLSL_TYPE_STRUCT); 638 assert(src1.type != GLSL_TYPE_ARRAY); 639 assert(src1.type != GLSL_TYPE_STRUCT); 640 641 if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) 642 type = GLSL_TYPE_FLOAT; 643 else if (native_integers) 644 type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type; 645 646 #define case4(c, f, i, u) \ 647 case TGSI_OPCODE_##c: \ 648 if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \ 649 else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \ 650 else op = TGSI_OPCODE_##f; \ 651 break; 652 #define case3(f, i, u) case4(f, f, i, u) 653 #define case2fi(f, i) case4(f, f, i, i) 654 #define case2iu(i, u) case4(i, LAST, i, u) 655 656 switch(op) { 657 case2fi(ADD, UADD); 658 case2fi(MUL, UMUL); 659 case2fi(MAD, UMAD); 660 case3(DIV, IDIV, UDIV); 661 case3(MAX, IMAX, UMAX); 662 case3(MIN, IMIN, UMIN); 663 case2iu(MOD, UMOD); 664 665 case2fi(SEQ, USEQ); 666 case2fi(SNE, USNE); 667 case3(SGE, ISGE, USGE); 668 case3(SLT, ISLT, USLT); 669 670 case2iu(ISHR, USHR); 671 672 case2fi(SSG, ISSG); 673 case3(ABS, IABS, IABS); 674 675 default: break; 676 } 677 678 assert(op != TGSI_OPCODE_LAST); 679 return op; 680 } 681 682 glsl_to_tgsi_instruction * 683 glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, 684 st_dst_reg dst, st_src_reg src0, st_src_reg src1, 685 unsigned elements) 686 { 687 static const unsigned dot_opcodes[] = { 688 TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 689 }; 690 691 return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); 692 } 693 694 /** 695 * Emits TGSI scalar opcodes to produce unique answers across channels. 696 * 697 * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X 698 * channel determines the result across all channels. So to do a vec4 699 * of this operation, we want to emit a scalar per source channel used 700 * to produce dest channels. 701 */ 702 void 703 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 704 st_dst_reg dst, 705 st_src_reg orig_src0, st_src_reg orig_src1) 706 { 707 int i, j; 708 int done_mask = ~dst.writemask; 709 710 /* TGSI RCP is a scalar operation splatting results to all channels, 711 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our 712 * dst channels. 713 */ 714 for (i = 0; i < 4; i++) { 715 GLuint this_mask = (1 << i); 716 glsl_to_tgsi_instruction *inst; 717 st_src_reg src0 = orig_src0; 718 st_src_reg src1 = orig_src1; 719 720 if (done_mask & this_mask) 721 continue; 722 723 GLuint src0_swiz = GET_SWZ(src0.swizzle, i); 724 GLuint src1_swiz = GET_SWZ(src1.swizzle, i); 725 for (j = i + 1; j < 4; j++) { 726 /* If there is another enabled component in the destination that is 727 * derived from the same inputs, generate its value on this pass as 728 * well. 729 */ 730 if (!(done_mask & (1 << j)) && 731 GET_SWZ(src0.swizzle, j) == src0_swiz && 732 GET_SWZ(src1.swizzle, j) == src1_swiz) { 733 this_mask |= (1 << j); 734 } 735 } 736 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 737 src0_swiz, src0_swiz); 738 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, 739 src1_swiz, src1_swiz); 740 741 inst = emit(ir, op, dst, src0, src1); 742 inst->dst.writemask = this_mask; 743 done_mask |= this_mask; 744 } 745 } 746 747 void 748 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 749 st_dst_reg dst, st_src_reg src0) 750 { 751 st_src_reg undef = undef_src; 752 753 undef.swizzle = SWIZZLE_XXXX; 754 755 emit_scalar(ir, op, dst, src0, undef); 756 } 757 758 void 759 glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, 760 st_dst_reg dst, st_src_reg src0) 761 { 762 int op = TGSI_OPCODE_ARL; 763 764 if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT) 765 op = TGSI_OPCODE_UARL; 766 767 emit(NULL, op, dst, src0); 768 } 769 770 /** 771 * Emit an TGSI_OPCODE_SCS instruction 772 * 773 * The \c SCS opcode functions a bit differently than the other TGSI opcodes. 774 * Instead of splatting its result across all four components of the 775 * destination, it writes one value to the \c x component and another value to 776 * the \c y component. 777 * 778 * \param ir IR instruction being processed 779 * \param op Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending 780 * on which value is desired. 781 * \param dst Destination register 782 * \param src Source register 783 */ 784 void 785 glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, 786 st_dst_reg dst, 787 const st_src_reg &src) 788 { 789 /* Vertex programs cannot use the SCS opcode. 790 */ 791 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) { 792 emit_scalar(ir, op, dst, src); 793 return; 794 } 795 796 const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1; 797 const unsigned scs_mask = (1U << component); 798 int done_mask = ~dst.writemask; 799 st_src_reg tmp; 800 801 assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS); 802 803 /* If there are compnents in the destination that differ from the component 804 * that will be written by the SCS instrution, we'll need a temporary. 805 */ 806 if (scs_mask != unsigned(dst.writemask)) { 807 tmp = get_temp(glsl_type::vec4_type); 808 } 809 810 for (unsigned i = 0; i < 4; i++) { 811 unsigned this_mask = (1U << i); 812 st_src_reg src0 = src; 813 814 if ((done_mask & this_mask) != 0) 815 continue; 816 817 /* The source swizzle specified which component of the source generates 818 * sine / cosine for the current component in the destination. The SCS 819 * instruction requires that this value be swizzle to the X component. 820 * Replace the current swizzle with a swizzle that puts the source in 821 * the X component. 822 */ 823 unsigned src0_swiz = GET_SWZ(src.swizzle, i); 824 825 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 826 src0_swiz, src0_swiz); 827 for (unsigned j = i + 1; j < 4; j++) { 828 /* If there is another enabled component in the destination that is 829 * derived from the same inputs, generate its value on this pass as 830 * well. 831 */ 832 if (!(done_mask & (1 << j)) && 833 GET_SWZ(src0.swizzle, j) == src0_swiz) { 834 this_mask |= (1 << j); 835 } 836 } 837 838 if (this_mask != scs_mask) { 839 glsl_to_tgsi_instruction *inst; 840 st_dst_reg tmp_dst = st_dst_reg(tmp); 841 842 /* Emit the SCS instruction. 843 */ 844 inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0); 845 inst->dst.writemask = scs_mask; 846 847 /* Move the result of the SCS instruction to the desired location in 848 * the destination. 849 */ 850 tmp.swizzle = MAKE_SWIZZLE4(component, component, 851 component, component); 852 inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp); 853 inst->dst.writemask = this_mask; 854 } else { 855 /* Emit the SCS instruction to write directly to the destination. 856 */ 857 glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0); 858 inst->dst.writemask = scs_mask; 859 } 860 861 done_mask |= this_mask; 862 } 863 } 864 865 int 866 glsl_to_tgsi_visitor::add_constant(gl_register_file file, 867 gl_constant_value values[4], int size, int datatype, 868 GLuint *swizzle_out) 869 { 870 if (file == PROGRAM_CONSTANT) { 871 return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values, 872 size, datatype, swizzle_out); 873 } else { 874 int index = 0; 875 immediate_storage *entry; 876 assert(file == PROGRAM_IMMEDIATE); 877 878 /* Search immediate storage to see if we already have an identical 879 * immediate that we can use instead of adding a duplicate entry. 880 */ 881 foreach_iter(exec_list_iterator, iter, this->immediates) { 882 entry = (immediate_storage *)iter.get(); 883 884 if (entry->size == size && 885 entry->type == datatype && 886 !memcmp(entry->values, values, size * sizeof(gl_constant_value))) { 887 return index; 888 } 889 index++; 890 } 891 892 /* Add this immediate to the list. */ 893 entry = new(mem_ctx) immediate_storage(values, size, datatype); 894 this->immediates.push_tail(entry); 895 this->num_immediates++; 896 return index; 897 } 898 } 899 900 st_src_reg 901 glsl_to_tgsi_visitor::st_src_reg_for_float(float val) 902 { 903 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT); 904 union gl_constant_value uval; 905 906 uval.f = val; 907 src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle); 908 909 return src; 910 } 911 912 st_src_reg 913 glsl_to_tgsi_visitor::st_src_reg_for_int(int val) 914 { 915 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); 916 union gl_constant_value uval; 917 918 assert(native_integers); 919 920 uval.i = val; 921 src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle); 922 923 return src; 924 } 925 926 st_src_reg 927 glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val) 928 { 929 if (native_integers) 930 return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 931 st_src_reg_for_int(val); 932 else 933 return st_src_reg_for_float(val); 934 } 935 936 static int 937 type_size(const struct glsl_type *type) 938 { 939 unsigned int i; 940 int size; 941 942 switch (type->base_type) { 943 case GLSL_TYPE_UINT: 944 case GLSL_TYPE_INT: 945 case GLSL_TYPE_FLOAT: 946 case GLSL_TYPE_BOOL: 947 if (type->is_matrix()) { 948 return type->matrix_columns; 949 } else { 950 /* Regardless of size of vector, it gets a vec4. This is bad 951 * packing for things like floats, but otherwise arrays become a 952 * mess. Hopefully a later pass over the code can pack scalars 953 * down if appropriate. 954 */ 955 return 1; 956 } 957 case GLSL_TYPE_ARRAY: 958 assert(type->length > 0); 959 return type_size(type->fields.array) * type->length; 960 case GLSL_TYPE_STRUCT: 961 size = 0; 962 for (i = 0; i < type->length; i++) { 963 size += type_size(type->fields.structure[i].type); 964 } 965 return size; 966 case GLSL_TYPE_SAMPLER: 967 /* Samplers take up one slot in UNIFORMS[], but they're baked in 968 * at link time. 969 */ 970 return 1; 971 default: 972 assert(0); 973 return 0; 974 } 975 } 976 977 /** 978 * In the initial pass of codegen, we assign temporary numbers to 979 * intermediate results. (not SSA -- variable assignments will reuse 980 * storage). 981 */ 982 st_src_reg 983 glsl_to_tgsi_visitor::get_temp(const glsl_type *type) 984 { 985 st_src_reg src; 986 987 src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; 988 src.file = PROGRAM_TEMPORARY; 989 src.index = next_temp; 990 src.reladdr = NULL; 991 next_temp += type_size(type); 992 993 if (type->is_array() || type->is_record()) { 994 src.swizzle = SWIZZLE_NOOP; 995 } else { 996 src.swizzle = swizzle_for_size(type->vector_elements); 997 } 998 src.negate = 0; 999 1000 return src; 1001 } 1002 1003 variable_storage * 1004 glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) 1005 { 1006 1007 variable_storage *entry; 1008 1009 foreach_iter(exec_list_iterator, iter, this->variables) { 1010 entry = (variable_storage *)iter.get(); 1011 1012 if (entry->var == var) 1013 return entry; 1014 } 1015 1016 return NULL; 1017 } 1018 1019 void 1020 glsl_to_tgsi_visitor::visit(ir_variable *ir) 1021 { 1022 if (strcmp(ir->name, "gl_FragCoord") == 0) { 1023 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog; 1024 1025 fp->OriginUpperLeft = ir->origin_upper_left; 1026 fp->PixelCenterInteger = ir->pixel_center_integer; 1027 } 1028 1029 if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { 1030 unsigned int i; 1031 const ir_state_slot *const slots = ir->state_slots; 1032 assert(ir->state_slots != NULL); 1033 1034 /* Check if this statevar's setup in the STATE file exactly 1035 * matches how we'll want to reference it as a 1036 * struct/array/whatever. If not, then we need to move it into 1037 * temporary storage and hope that it'll get copy-propagated 1038 * out. 1039 */ 1040 for (i = 0; i < ir->num_state_slots; i++) { 1041 if (slots[i].swizzle != SWIZZLE_XYZW) { 1042 break; 1043 } 1044 } 1045 1046 variable_storage *storage; 1047 st_dst_reg dst; 1048 if (i == ir->num_state_slots) { 1049 /* We'll set the index later. */ 1050 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); 1051 this->variables.push_tail(storage); 1052 1053 dst = undef_dst; 1054 } else { 1055 /* The variable_storage constructor allocates slots based on the size 1056 * of the type. However, this had better match the number of state 1057 * elements that we're going to copy into the new temporary. 1058 */ 1059 assert((int) ir->num_state_slots == type_size(ir->type)); 1060 1061 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, 1062 this->next_temp); 1063 this->variables.push_tail(storage); 1064 this->next_temp += type_size(ir->type); 1065 1066 dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, 1067 native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT)); 1068 } 1069 1070 1071 for (unsigned int i = 0; i < ir->num_state_slots; i++) { 1072 int index = _mesa_add_state_reference(this->prog->Parameters, 1073 (gl_state_index *)slots[i].tokens); 1074 1075 if (storage->file == PROGRAM_STATE_VAR) { 1076 if (storage->index == -1) { 1077 storage->index = index; 1078 } else { 1079 assert(index == storage->index + (int)i); 1080 } 1081 } else { 1082 /* We use GLSL_TYPE_FLOAT here regardless of the actual type of 1083 * the data being moved since MOV does not care about the type of 1084 * data it is moving, and we don't want to declare registers with 1085 * array or struct types. 1086 */ 1087 st_src_reg src(PROGRAM_STATE_VAR, index, GLSL_TYPE_FLOAT); 1088 src.swizzle = slots[i].swizzle; 1089 emit(ir, TGSI_OPCODE_MOV, dst, src); 1090 /* even a float takes up a whole vec4 reg in a struct/array. */ 1091 dst.index++; 1092 } 1093 } 1094 1095 if (storage->file == PROGRAM_TEMPORARY && 1096 dst.index != storage->index + (int) ir->num_state_slots) { 1097 fail_link(this->shader_program, 1098 "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", 1099 ir->name, dst.index - storage->index, 1100 type_size(ir->type)); 1101 } 1102 } 1103 } 1104 1105 void 1106 glsl_to_tgsi_visitor::visit(ir_loop *ir) 1107 { 1108 ir_dereference_variable *counter = NULL; 1109 1110 if (ir->counter != NULL) 1111 counter = new(ir) ir_dereference_variable(ir->counter); 1112 1113 if (ir->from != NULL) { 1114 assert(ir->counter != NULL); 1115 1116 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL); 1117 1118 a->accept(this); 1119 delete a; 1120 } 1121 1122 emit(NULL, TGSI_OPCODE_BGNLOOP); 1123 1124 if (ir->to) { 1125 ir_expression *e = 1126 new(ir) ir_expression(ir->cmp, glsl_type::bool_type, 1127 counter, ir->to); 1128 ir_if *if_stmt = new(ir) ir_if(e); 1129 1130 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break); 1131 1132 if_stmt->then_instructions.push_tail(brk); 1133 1134 if_stmt->accept(this); 1135 1136 delete if_stmt; 1137 delete e; 1138 delete brk; 1139 } 1140 1141 visit_exec_list(&ir->body_instructions, this); 1142 1143 if (ir->increment) { 1144 ir_expression *e = 1145 new(ir) ir_expression(ir_binop_add, counter->type, 1146 counter, ir->increment); 1147 1148 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL); 1149 1150 a->accept(this); 1151 delete a; 1152 delete e; 1153 } 1154 1155 emit(NULL, TGSI_OPCODE_ENDLOOP); 1156 } 1157 1158 void 1159 glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) 1160 { 1161 switch (ir->mode) { 1162 case ir_loop_jump::jump_break: 1163 emit(NULL, TGSI_OPCODE_BRK); 1164 break; 1165 case ir_loop_jump::jump_continue: 1166 emit(NULL, TGSI_OPCODE_CONT); 1167 break; 1168 } 1169 } 1170 1171 1172 void 1173 glsl_to_tgsi_visitor::visit(ir_function_signature *ir) 1174 { 1175 assert(0); 1176 (void)ir; 1177 } 1178 1179 void 1180 glsl_to_tgsi_visitor::visit(ir_function *ir) 1181 { 1182 /* Ignore function bodies other than main() -- we shouldn't see calls to 1183 * them since they should all be inlined before we get to glsl_to_tgsi. 1184 */ 1185 if (strcmp(ir->name, "main") == 0) { 1186 const ir_function_signature *sig; 1187 exec_list empty; 1188 1189 sig = ir->matching_signature(&empty); 1190 1191 assert(sig); 1192 1193 foreach_iter(exec_list_iterator, iter, sig->body) { 1194 ir_instruction *ir = (ir_instruction *)iter.get(); 1195 1196 ir->accept(this); 1197 } 1198 } 1199 } 1200 1201 bool 1202 glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) 1203 { 1204 int nonmul_operand = 1 - mul_operand; 1205 st_src_reg a, b, c; 1206 st_dst_reg result_dst; 1207 1208 ir_expression *expr = ir->operands[mul_operand]->as_expression(); 1209 if (!expr || expr->operation != ir_binop_mul) 1210 return false; 1211 1212 expr->operands[0]->accept(this); 1213 a = this->result; 1214 expr->operands[1]->accept(this); 1215 b = this->result; 1216 ir->operands[nonmul_operand]->accept(this); 1217 c = this->result; 1218 1219 this->result = get_temp(ir->type); 1220 result_dst = st_dst_reg(this->result); 1221 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1222 emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); 1223 1224 return true; 1225 } 1226 1227 /** 1228 * Emit MAD(a, -b, a) instead of AND(a, NOT(b)) 1229 * 1230 * The logic values are 1.0 for true and 0.0 for false. Logical-and is 1231 * implemented using multiplication, and logical-or is implemented using 1232 * addition. Logical-not can be implemented as (true - x), or (1.0 - x). 1233 * As result, the logical expression (a & !b) can be rewritten as: 1234 * 1235 * - a * !b 1236 * - a * (1 - b) 1237 * - (a * 1) - (a * b) 1238 * - a + -(a * b) 1239 * - a + (a * -b) 1240 * 1241 * This final expression can be implemented as a single MAD(a, -b, a) 1242 * instruction. 1243 */ 1244 bool 1245 glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) 1246 { 1247 const int other_operand = 1 - try_operand; 1248 st_src_reg a, b; 1249 1250 ir_expression *expr = ir->operands[try_operand]->as_expression(); 1251 if (!expr || expr->operation != ir_unop_logic_not) 1252 return false; 1253 1254 ir->operands[other_operand]->accept(this); 1255 a = this->result; 1256 expr->operands[0]->accept(this); 1257 b = this->result; 1258 1259 b.negate = ~b.negate; 1260 1261 this->result = get_temp(ir->type); 1262 emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a); 1263 1264 return true; 1265 } 1266 1267 bool 1268 glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) 1269 { 1270 /* Saturates were only introduced to vertex programs in 1271 * NV_vertex_program3, so don't give them to drivers in the VP. 1272 */ 1273 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) 1274 return false; 1275 1276 ir_rvalue *sat_src = ir->as_rvalue_to_saturate(); 1277 if (!sat_src) 1278 return false; 1279 1280 sat_src->accept(this); 1281 st_src_reg src = this->result; 1282 1283 /* If we generated an expression instruction into a temporary in 1284 * processing the saturate's operand, apply the saturate to that 1285 * instruction. Otherwise, generate a MOV to do the saturate. 1286 * 1287 * Note that we have to be careful to only do this optimization if 1288 * the instruction in question was what generated src->result. For 1289 * example, ir_dereference_array might generate a MUL instruction 1290 * to create the reladdr, and return us a src reg using that 1291 * reladdr. That MUL result is not the value we're trying to 1292 * saturate. 1293 */ 1294 ir_expression *sat_src_expr = sat_src->as_expression(); 1295 if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul || 1296 sat_src_expr->operation == ir_binop_add || 1297 sat_src_expr->operation == ir_binop_dot)) { 1298 glsl_to_tgsi_instruction *new_inst; 1299 new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 1300 new_inst->saturate = true; 1301 } else { 1302 this->result = get_temp(ir->type); 1303 st_dst_reg result_dst = st_dst_reg(this->result); 1304 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1305 glsl_to_tgsi_instruction *inst; 1306 inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src); 1307 inst->saturate = true; 1308 } 1309 1310 return true; 1311 } 1312 1313 void 1314 glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, 1315 st_src_reg *reg, int *num_reladdr) 1316 { 1317 if (!reg->reladdr) 1318 return; 1319 1320 emit_arl(ir, address_reg, *reg->reladdr); 1321 1322 if (*num_reladdr != 1) { 1323 st_src_reg temp = get_temp(glsl_type::vec4_type); 1324 1325 emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); 1326 *reg = temp; 1327 } 1328 1329 (*num_reladdr)--; 1330 } 1331 1332 void 1333 glsl_to_tgsi_visitor::visit(ir_expression *ir) 1334 { 1335 unsigned int operand; 1336 st_src_reg op[Elements(ir->operands)]; 1337 st_src_reg result_src; 1338 st_dst_reg result_dst; 1339 1340 /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) 1341 */ 1342 if (ir->operation == ir_binop_add) { 1343 if (try_emit_mad(ir, 1)) 1344 return; 1345 if (try_emit_mad(ir, 0)) 1346 return; 1347 } 1348 1349 /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) 1350 */ 1351 if (ir->operation == ir_binop_logic_and) { 1352 if (try_emit_mad_for_and_not(ir, 1)) 1353 return; 1354 if (try_emit_mad_for_and_not(ir, 0)) 1355 return; 1356 } 1357 1358 if (try_emit_sat(ir)) 1359 return; 1360 1361 if (ir->operation == ir_quadop_vector) 1362 assert(!"ir_quadop_vector should have been lowered"); 1363 1364 for (operand = 0; operand < ir->get_num_operands(); operand++) { 1365 this->result.file = PROGRAM_UNDEFINED; 1366 ir->operands[operand]->accept(this); 1367 if (this->result.file == PROGRAM_UNDEFINED) { 1368 ir_print_visitor v; 1369 printf("Failed to get tree for expression operand:\n"); 1370 ir->operands[operand]->accept(&v); 1371 exit(1); 1372 } 1373 op[operand] = this->result; 1374 1375 /* Matrix expression operands should have been broken down to vector 1376 * operations already. 1377 */ 1378 assert(!ir->operands[operand]->type->is_matrix()); 1379 } 1380 1381 int vector_elements = ir->operands[0]->type->vector_elements; 1382 if (ir->operands[1]) { 1383 vector_elements = MAX2(vector_elements, 1384 ir->operands[1]->type->vector_elements); 1385 } 1386 1387 this->result.file = PROGRAM_UNDEFINED; 1388 1389 /* Storage for our result. Ideally for an assignment we'd be using 1390 * the actual storage for the result here, instead. 1391 */ 1392 result_src = get_temp(ir->type); 1393 /* convenience for the emit functions below. */ 1394 result_dst = st_dst_reg(result_src); 1395 /* Limit writes to the channels that will be used by result_src later. 1396 * This does limit this temp's use as a temporary for multi-instruction 1397 * sequences. 1398 */ 1399 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1400 1401 switch (ir->operation) { 1402 case ir_unop_logic_not: 1403 if (result_dst.type != GLSL_TYPE_FLOAT) 1404 emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 1405 else { 1406 /* Previously 'SEQ dst, src, 0.0' was used for this. However, many 1407 * older GPUs implement SEQ using multiple instructions (i915 uses two 1408 * SGE instructions and a MUL instruction). Since our logic values are 1409 * 0.0 and 1.0, 1-x also implements !x. 1410 */ 1411 op[0].negate = ~op[0].negate; 1412 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0)); 1413 } 1414 break; 1415 case ir_unop_neg: 1416 if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT) 1417 emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); 1418 else { 1419 op[0].negate = ~op[0].negate; 1420 result_src = op[0]; 1421 } 1422 break; 1423 case ir_unop_abs: 1424 emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]); 1425 break; 1426 case ir_unop_sign: 1427 emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]); 1428 break; 1429 case ir_unop_rcp: 1430 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]); 1431 break; 1432 1433 case ir_unop_exp2: 1434 emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]); 1435 break; 1436 case ir_unop_exp: 1437 case ir_unop_log: 1438 assert(!"not reached: should be handled by ir_explog_to_explog2"); 1439 break; 1440 case ir_unop_log2: 1441 emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]); 1442 break; 1443 case ir_unop_sin: 1444 emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1445 break; 1446 case ir_unop_cos: 1447 emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1448 break; 1449 case ir_unop_sin_reduced: 1450 emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1451 break; 1452 case ir_unop_cos_reduced: 1453 emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1454 break; 1455 1456 case ir_unop_dFdx: 1457 emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]); 1458 break; 1459 case ir_unop_dFdy: 1460 { 1461 /* The X component contains 1 or -1 depending on whether the framebuffer 1462 * is a FBO or the window system buffer, respectively. 1463 * It is then multiplied with the source operand of DDY. 1464 */ 1465 static const gl_state_index transform_y_state[STATE_LENGTH] 1466 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM }; 1467 1468 unsigned transform_y_index = 1469 _mesa_add_state_reference(this->prog->Parameters, 1470 transform_y_state); 1471 1472 st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR, 1473 transform_y_index, 1474 glsl_type::vec4_type); 1475 transform_y.swizzle = SWIZZLE_XXXX; 1476 1477 st_src_reg temp = get_temp(glsl_type::vec4_type); 1478 1479 emit(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]); 1480 emit(ir, TGSI_OPCODE_DDY, result_dst, temp); 1481 break; 1482 } 1483 1484 case ir_unop_noise: { 1485 /* At some point, a motivated person could add a better 1486 * implementation of noise. Currently not even the nvidia 1487 * binary drivers do anything more than this. In any case, the 1488 * place to do this is in the GL state tracker, not the poor 1489 * driver. 1490 */ 1491 emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); 1492 break; 1493 } 1494 1495 case ir_binop_add: 1496 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1497 break; 1498 case ir_binop_sub: 1499 emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]); 1500 break; 1501 1502 case ir_binop_mul: 1503 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1504 break; 1505 case ir_binop_div: 1506 if (result_dst.type == GLSL_TYPE_FLOAT) 1507 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 1508 else 1509 emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); 1510 break; 1511 case ir_binop_mod: 1512 if (result_dst.type == GLSL_TYPE_FLOAT) 1513 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 1514 else 1515 emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); 1516 break; 1517 1518 case ir_binop_less: 1519 emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); 1520 break; 1521 case ir_binop_greater: 1522 emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]); 1523 break; 1524 case ir_binop_lequal: 1525 emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]); 1526 break; 1527 case ir_binop_gequal: 1528 emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); 1529 break; 1530 case ir_binop_equal: 1531 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1532 break; 1533 case ir_binop_nequal: 1534 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1535 break; 1536 case ir_binop_all_equal: 1537 /* "==" operator producing a scalar boolean. */ 1538 if (ir->operands[0]->type->is_vector() || 1539 ir->operands[1]->type->is_vector()) { 1540 st_src_reg temp = get_temp(native_integers ? 1541 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : 1542 glsl_type::vec4_type); 1543 1544 if (native_integers) { 1545 st_dst_reg temp_dst = st_dst_reg(temp); 1546 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); 1547 1548 emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]); 1549 1550 /* Emit 1-3 AND operations to combine the SEQ results. */ 1551 switch (ir->operands[0]->type->vector_elements) { 1552 case 2: 1553 break; 1554 case 3: 1555 temp_dst.writemask = WRITEMASK_Y; 1556 temp1.swizzle = SWIZZLE_YYYY; 1557 temp2.swizzle = SWIZZLE_ZZZZ; 1558 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1559 break; 1560 case 4: 1561 temp_dst.writemask = WRITEMASK_X; 1562 temp1.swizzle = SWIZZLE_XXXX; 1563 temp2.swizzle = SWIZZLE_YYYY; 1564 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1565 temp_dst.writemask = WRITEMASK_Y; 1566 temp1.swizzle = SWIZZLE_ZZZZ; 1567 temp2.swizzle = SWIZZLE_WWWW; 1568 emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1569 } 1570 1571 temp1.swizzle = SWIZZLE_XXXX; 1572 temp2.swizzle = SWIZZLE_YYYY; 1573 emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2); 1574 } else { 1575 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1576 1577 /* After the dot-product, the value will be an integer on the 1578 * range [0,4]. Zero becomes 1.0, and positive values become zero. 1579 */ 1580 emit_dp(ir, result_dst, temp, temp, vector_elements); 1581 1582 /* Negating the result of the dot-product gives values on the range 1583 * [-4, 0]. Zero becomes 1.0, and negative values become zero. 1584 * This is achieved using SGE. 1585 */ 1586 st_src_reg sge_src = result_src; 1587 sge_src.negate = ~sge_src.negate; 1588 emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0)); 1589 } 1590 } else { 1591 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1592 } 1593 break; 1594 case ir_binop_any_nequal: 1595 /* "!=" operator producing a scalar boolean. */ 1596 if (ir->operands[0]->type->is_vector() || 1597 ir->operands[1]->type->is_vector()) { 1598 st_src_reg temp = get_temp(native_integers ? 1599 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : 1600 glsl_type::vec4_type); 1601 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1602 1603 if (native_integers) { 1604 st_dst_reg temp_dst = st_dst_reg(temp); 1605 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); 1606 1607 /* Emit 1-3 OR operations to combine the SNE results. */ 1608 switch (ir->operands[0]->type->vector_elements) { 1609 case 2: 1610 break; 1611 case 3: 1612 temp_dst.writemask = WRITEMASK_Y; 1613 temp1.swizzle = SWIZZLE_YYYY; 1614 temp2.swizzle = SWIZZLE_ZZZZ; 1615 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1616 break; 1617 case 4: 1618 temp_dst.writemask = WRITEMASK_X; 1619 temp1.swizzle = SWIZZLE_XXXX; 1620 temp2.swizzle = SWIZZLE_YYYY; 1621 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1622 temp_dst.writemask = WRITEMASK_Y; 1623 temp1.swizzle = SWIZZLE_ZZZZ; 1624 temp2.swizzle = SWIZZLE_WWWW; 1625 emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1626 } 1627 1628 temp1.swizzle = SWIZZLE_XXXX; 1629 temp2.swizzle = SWIZZLE_YYYY; 1630 emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2); 1631 } else { 1632 /* After the dot-product, the value will be an integer on the 1633 * range [0,4]. Zero stays zero, and positive values become 1.0. 1634 */ 1635 glsl_to_tgsi_instruction *const dp = 1636 emit_dp(ir, result_dst, temp, temp, vector_elements); 1637 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1638 /* The clamping to [0,1] can be done for free in the fragment 1639 * shader with a saturate. 1640 */ 1641 dp->saturate = true; 1642 } else { 1643 /* Negating the result of the dot-product gives values on the range 1644 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1645 * achieved using SLT. 1646 */ 1647 st_src_reg slt_src = result_src; 1648 slt_src.negate = ~slt_src.negate; 1649 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1650 } 1651 } 1652 } else { 1653 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1654 } 1655 break; 1656 1657 case ir_unop_any: { 1658 assert(ir->operands[0]->type->is_vector()); 1659 1660 /* After the dot-product, the value will be an integer on the 1661 * range [0,4]. Zero stays zero, and positive values become 1.0. 1662 */ 1663 glsl_to_tgsi_instruction *const dp = 1664 emit_dp(ir, result_dst, op[0], op[0], 1665 ir->operands[0]->type->vector_elements); 1666 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && 1667 result_dst.type == GLSL_TYPE_FLOAT) { 1668 /* The clamping to [0,1] can be done for free in the fragment 1669 * shader with a saturate. 1670 */ 1671 dp->saturate = true; 1672 } else if (result_dst.type == GLSL_TYPE_FLOAT) { 1673 /* Negating the result of the dot-product gives values on the range 1674 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1675 * is achieved using SLT. 1676 */ 1677 st_src_reg slt_src = result_src; 1678 slt_src.negate = ~slt_src.negate; 1679 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1680 } 1681 else { 1682 /* Use SNE 0 if integers are being used as boolean values. */ 1683 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0)); 1684 } 1685 break; 1686 } 1687 1688 case ir_binop_logic_xor: 1689 if (native_integers) 1690 emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); 1691 else 1692 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1693 break; 1694 1695 case ir_binop_logic_or: { 1696 if (native_integers) { 1697 /* If integers are used as booleans, we can use an actual "or" 1698 * instruction. 1699 */ 1700 assert(native_integers); 1701 emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); 1702 } else { 1703 /* After the addition, the value will be an integer on the 1704 * range [0,2]. Zero stays zero, and positive values become 1.0. 1705 */ 1706 glsl_to_tgsi_instruction *add = 1707 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1708 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1709 /* The clamping to [0,1] can be done for free in the fragment 1710 * shader with a saturate if floats are being used as boolean values. 1711 */ 1712 add->saturate = true; 1713 } else { 1714 /* Negating the result of the addition gives values on the range 1715 * [-2, 0]. Zero stays zero, and negative values become 1.0. This 1716 * is achieved using SLT. 1717 */ 1718 st_src_reg slt_src = result_src; 1719 slt_src.negate = ~slt_src.negate; 1720 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1721 } 1722 } 1723 break; 1724 } 1725 1726 case ir_binop_logic_and: 1727 /* If native integers are disabled, the bool args are stored as float 0.0 1728 * or 1.0, so "mul" gives us "and". If they're enabled, just use the 1729 * actual AND opcode. 1730 */ 1731 if (native_integers) 1732 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); 1733 else 1734 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1735 break; 1736 1737 case ir_binop_dot: 1738 assert(ir->operands[0]->type->is_vector()); 1739 assert(ir->operands[0]->type == ir->operands[1]->type); 1740 emit_dp(ir, result_dst, op[0], op[1], 1741 ir->operands[0]->type->vector_elements); 1742 break; 1743 1744 case ir_unop_sqrt: 1745 /* sqrt(x) = x * rsq(x). */ 1746 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1747 emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]); 1748 /* For incoming channels <= 0, set the result to 0. */ 1749 op[0].negate = ~op[0].negate; 1750 emit(ir, TGSI_OPCODE_CMP, result_dst, 1751 op[0], result_src, st_src_reg_for_float(0.0)); 1752 break; 1753 case ir_unop_rsq: 1754 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1755 break; 1756 case ir_unop_i2f: 1757 if (native_integers) { 1758 emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]); 1759 break; 1760 } 1761 /* fallthrough to next case otherwise */ 1762 case ir_unop_b2f: 1763 if (native_integers) { 1764 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0)); 1765 break; 1766 } 1767 /* fallthrough to next case otherwise */ 1768 case ir_unop_i2u: 1769 case ir_unop_u2i: 1770 /* Converting between signed and unsigned integers is a no-op. */ 1771 result_src = op[0]; 1772 break; 1773 case ir_unop_b2i: 1774 if (native_integers) { 1775 /* Booleans are stored as integers using ~0 for true and 0 for false. 1776 * GLSL requires that int(bool) return 1 for true and 0 for false. 1777 * This conversion is done with AND, but it could be done with NEG. 1778 */ 1779 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1)); 1780 } else { 1781 /* Booleans and integers are both stored as floats when native 1782 * integers are disabled. 1783 */ 1784 result_src = op[0]; 1785 } 1786 break; 1787 case ir_unop_f2i: 1788 if (native_integers) 1789 emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]); 1790 else 1791 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1792 break; 1793 case ir_unop_f2u: 1794 if (native_integers) 1795 emit(ir, TGSI_OPCODE_F2U, result_dst, op[0]); 1796 else 1797 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1798 break; 1799 case ir_unop_bitcast_f2i: 1800 case ir_unop_bitcast_f2u: 1801 case ir_unop_bitcast_i2f: 1802 case ir_unop_bitcast_u2f: 1803 result_src = op[0]; 1804 break; 1805 case ir_unop_f2b: 1806 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); 1807 break; 1808 case ir_unop_i2b: 1809 if (native_integers) 1810 emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]); 1811 else 1812 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); 1813 break; 1814 case ir_unop_trunc: 1815 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1816 break; 1817 case ir_unop_ceil: 1818 emit(ir, TGSI_OPCODE_CEIL, result_dst, op[0]); 1819 break; 1820 case ir_unop_floor: 1821 emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]); 1822 break; 1823 case ir_unop_round_even: 1824 emit(ir, TGSI_OPCODE_ROUND, result_dst, op[0]); 1825 break; 1826 case ir_unop_fract: 1827 emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]); 1828 break; 1829 1830 case ir_binop_min: 1831 emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); 1832 break; 1833 case ir_binop_max: 1834 emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); 1835 break; 1836 case ir_binop_pow: 1837 emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]); 1838 break; 1839 1840 case ir_unop_bit_not: 1841 if (native_integers) { 1842 emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 1843 break; 1844 } 1845 case ir_unop_u2f: 1846 if (native_integers) { 1847 emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]); 1848 break; 1849 } 1850 case ir_binop_lshift: 1851 if (native_integers) { 1852 emit(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]); 1853 break; 1854 } 1855 case ir_binop_rshift: 1856 if (native_integers) { 1857 emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]); 1858 break; 1859 } 1860 case ir_binop_bit_and: 1861 if (native_integers) { 1862 emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); 1863 break; 1864 } 1865 case ir_binop_bit_xor: 1866 if (native_integers) { 1867 emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); 1868 break; 1869 } 1870 case ir_binop_bit_or: 1871 if (native_integers) { 1872 emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); 1873 break; 1874 } 1875 1876 assert(!"GLSL 1.30 features unsupported"); 1877 break; 1878 1879 case ir_binop_ubo_load: 1880 assert(!"not yet supported"); 1881 break; 1882 1883 case ir_quadop_vector: 1884 /* This operation should have already been handled. 1885 */ 1886 assert(!"Should not get here."); 1887 break; 1888 } 1889 1890 this->result = result_src; 1891 } 1892 1893 1894 void 1895 glsl_to_tgsi_visitor::visit(ir_swizzle *ir) 1896 { 1897 st_src_reg src; 1898 int i; 1899 int swizzle[4]; 1900 1901 /* Note that this is only swizzles in expressions, not those on the left 1902 * hand side of an assignment, which do write masking. See ir_assignment 1903 * for that. 1904 */ 1905 1906 ir->val->accept(this); 1907 src = this->result; 1908 assert(src.file != PROGRAM_UNDEFINED); 1909 1910 for (i = 0; i < 4; i++) { 1911 if (i < ir->type->vector_elements) { 1912 switch (i) { 1913 case 0: 1914 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); 1915 break; 1916 case 1: 1917 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); 1918 break; 1919 case 2: 1920 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); 1921 break; 1922 case 3: 1923 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); 1924 break; 1925 } 1926 } else { 1927 /* If the type is smaller than a vec4, replicate the last 1928 * channel out. 1929 */ 1930 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1931 } 1932 } 1933 1934 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1935 1936 this->result = src; 1937 } 1938 1939 void 1940 glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) 1941 { 1942 variable_storage *entry = find_variable_storage(ir->var); 1943 ir_variable *var = ir->var; 1944 1945 if (!entry) { 1946 switch (var->mode) { 1947 case ir_var_uniform: 1948 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, 1949 var->location); 1950 this->variables.push_tail(entry); 1951 break; 1952 case ir_var_in: 1953 case ir_var_inout: 1954 /* The linker assigns locations for varyings and attributes, 1955 * including deprecated builtins (like gl_Color), user-assign 1956 * generic attributes (glBindVertexLocation), and 1957 * user-defined varyings. 1958 * 1959 * FINISHME: We would hit this path for function arguments. Fix! 1960 */ 1961 assert(var->location != -1); 1962 entry = new(mem_ctx) variable_storage(var, 1963 PROGRAM_INPUT, 1964 var->location); 1965 break; 1966 case ir_var_out: 1967 assert(var->location != -1); 1968 entry = new(mem_ctx) variable_storage(var, 1969 PROGRAM_OUTPUT, 1970 var->location + var->index); 1971 break; 1972 case ir_var_system_value: 1973 entry = new(mem_ctx) variable_storage(var, 1974 PROGRAM_SYSTEM_VALUE, 1975 var->location); 1976 break; 1977 case ir_var_auto: 1978 case ir_var_temporary: 1979 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, 1980 this->next_temp); 1981 this->variables.push_tail(entry); 1982 1983 next_temp += type_size(var->type); 1984 break; 1985 } 1986 1987 if (!entry) { 1988 printf("Failed to make storage for %s\n", var->name); 1989 exit(1); 1990 } 1991 } 1992 1993 this->result = st_src_reg(entry->file, entry->index, var->type); 1994 if (!native_integers) 1995 this->result.type = GLSL_TYPE_FLOAT; 1996 } 1997 1998 void 1999 glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) 2000 { 2001 ir_constant *index; 2002 st_src_reg src; 2003 int element_size = type_size(ir->type); 2004 2005 index = ir->array_index->constant_expression_value(); 2006 2007 ir->array->accept(this); 2008 src = this->result; 2009 2010 if (index) { 2011 src.index += index->value.i[0] * element_size; 2012 } else { 2013 /* Variable index array dereference. It eats the "vec4" of the 2014 * base of the array and an index that offsets the TGSI register 2015 * index. 2016 */ 2017 ir->array_index->accept(this); 2018 2019 st_src_reg index_reg; 2020 2021 if (element_size == 1) { 2022 index_reg = this->result; 2023 } else { 2024 index_reg = get_temp(native_integers ? 2025 glsl_type::int_type : glsl_type::float_type); 2026 2027 emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), 2028 this->result, st_src_reg_for_type(index_reg.type, element_size)); 2029 } 2030 2031 /* If there was already a relative address register involved, add the 2032 * new and the old together to get the new offset. 2033 */ 2034 if (src.reladdr != NULL) { 2035 st_src_reg accum_reg = get_temp(native_integers ? 2036 glsl_type::int_type : glsl_type::float_type); 2037 2038 emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), 2039 index_reg, *src.reladdr); 2040 2041 index_reg = accum_reg; 2042 } 2043 2044 src.reladdr = ralloc(mem_ctx, st_src_reg); 2045 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 2046 } 2047 2048 /* If the type is smaller than a vec4, replicate the last channel out. */ 2049 if (ir->type->is_scalar() || ir->type->is_vector()) 2050 src.swizzle = swizzle_for_size(ir->type->vector_elements); 2051 else 2052 src.swizzle = SWIZZLE_NOOP; 2053 2054 /* Change the register type to the element type of the array. */ 2055 src.type = ir->type->base_type; 2056 2057 this->result = src; 2058 } 2059 2060 void 2061 glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) 2062 { 2063 unsigned int i; 2064 const glsl_type *struct_type = ir->record->type; 2065 int offset = 0; 2066 2067 ir->record->accept(this); 2068 2069 for (i = 0; i < struct_type->length; i++) { 2070 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) 2071 break; 2072 offset += type_size(struct_type->fields.structure[i].type); 2073 } 2074 2075 /* If the type is smaller than a vec4, replicate the last channel out. */ 2076 if (ir->type->is_scalar() || ir->type->is_vector()) 2077 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 2078 else 2079 this->result.swizzle = SWIZZLE_NOOP; 2080 2081 this->result.index += offset; 2082 this->result.type = ir->type->base_type; 2083 } 2084 2085 /** 2086 * We want to be careful in assignment setup to hit the actual storage 2087 * instead of potentially using a temporary like we might with the 2088 * ir_dereference handler. 2089 */ 2090 static st_dst_reg 2091 get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v) 2092 { 2093 /* The LHS must be a dereference. If the LHS is a variable indexed array 2094 * access of a vector, it must be separated into a series conditional moves 2095 * before reaching this point (see ir_vec_index_to_cond_assign). 2096 */ 2097 assert(ir->as_dereference()); 2098 ir_dereference_array *deref_array = ir->as_dereference_array(); 2099 if (deref_array) { 2100 assert(!deref_array->array->type->is_vector()); 2101 } 2102 2103 /* Use the rvalue deref handler for the most part. We'll ignore 2104 * swizzles in it and write swizzles using writemask, though. 2105 */ 2106 ir->accept(v); 2107 return st_dst_reg(v->result); 2108 } 2109 2110 /** 2111 * Process the condition of a conditional assignment 2112 * 2113 * Examines the condition of a conditional assignment to generate the optimal 2114 * first operand of a \c CMP instruction. If the condition is a relational 2115 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be 2116 * used as the source for the \c CMP instruction. Otherwise the comparison 2117 * is processed to a boolean result, and the boolean result is used as the 2118 * operand to the CMP instruction. 2119 */ 2120 bool 2121 glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) 2122 { 2123 ir_rvalue *src_ir = ir; 2124 bool negate = true; 2125 bool switch_order = false; 2126 2127 ir_expression *const expr = ir->as_expression(); 2128 if ((expr != NULL) && (expr->get_num_operands() == 2)) { 2129 bool zero_on_left = false; 2130 2131 if (expr->operands[0]->is_zero()) { 2132 src_ir = expr->operands[1]; 2133 zero_on_left = true; 2134 } else if (expr->operands[1]->is_zero()) { 2135 src_ir = expr->operands[0]; 2136 zero_on_left = false; 2137 } 2138 2139 /* a is - 0 + - 0 + 2140 * (a < 0) T F F ( a < 0) T F F 2141 * (0 < a) F F T (-a < 0) F F T 2142 * (a <= 0) T T F (-a < 0) F F T (swap order of other operands) 2143 * (0 <= a) F T T ( a < 0) T F F (swap order of other operands) 2144 * (a > 0) F F T (-a < 0) F F T 2145 * (0 > a) T F F ( a < 0) T F F 2146 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) 2147 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) 2148 * 2149 * Note that exchanging the order of 0 and 'a' in the comparison simply 2150 * means that the value of 'a' should be negated. 2151 */ 2152 if (src_ir != ir) { 2153 switch (expr->operation) { 2154 case ir_binop_less: 2155 switch_order = false; 2156 negate = zero_on_left; 2157 break; 2158 2159 case ir_binop_greater: 2160 switch_order = false; 2161 negate = !zero_on_left; 2162 break; 2163 2164 case ir_binop_lequal: 2165 switch_order = true; 2166 negate = !zero_on_left; 2167 break; 2168 2169 case ir_binop_gequal: 2170 switch_order = true; 2171 negate = zero_on_left; 2172 break; 2173 2174 default: 2175 /* This isn't the right kind of comparison afterall, so make sure 2176 * the whole condition is visited. 2177 */ 2178 src_ir = ir; 2179 break; 2180 } 2181 } 2182 } 2183 2184 src_ir->accept(this); 2185 2186 /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the 2187 * condition we produced is 0.0 or 1.0. By flipping the sign, we can 2188 * choose which value TGSI_OPCODE_CMP produces without an extra instruction 2189 * computing the condition. 2190 */ 2191 if (negate) 2192 this->result.negate = ~this->result.negate; 2193 2194 return switch_order; 2195 } 2196 2197 void 2198 glsl_to_tgsi_visitor::visit(ir_assignment *ir) 2199 { 2200 st_dst_reg l; 2201 st_src_reg r; 2202 int i; 2203 2204 ir->rhs->accept(this); 2205 r = this->result; 2206 2207 l = get_assignment_lhs(ir->lhs, this); 2208 2209 /* FINISHME: This should really set to the correct maximal writemask for each 2210 * FINISHME: component written (in the loops below). This case can only 2211 * FINISHME: occur for matrices, arrays, and structures. 2212 */ 2213 if (ir->write_mask == 0) { 2214 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 2215 l.writemask = WRITEMASK_XYZW; 2216 } else if (ir->lhs->type->is_scalar() && 2217 ir->lhs->variable_referenced()->mode == ir_var_out) { 2218 /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the 2219 * FINISHME: W component of fragment shader output zero, work correctly. 2220 */ 2221 l.writemask = WRITEMASK_XYZW; 2222 } else { 2223 int swizzles[4]; 2224 int first_enabled_chan = 0; 2225 int rhs_chan = 0; 2226 2227 l.writemask = ir->write_mask; 2228 2229 for (int i = 0; i < 4; i++) { 2230 if (l.writemask & (1 << i)) { 2231 first_enabled_chan = GET_SWZ(r.swizzle, i); 2232 break; 2233 } 2234 } 2235 2236 /* Swizzle a small RHS vector into the channels being written. 2237 * 2238 * glsl ir treats write_mask as dictating how many channels are 2239 * present on the RHS while TGSI treats write_mask as just 2240 * showing which channels of the vec4 RHS get written. 2241 */ 2242 for (int i = 0; i < 4; i++) { 2243 if (l.writemask & (1 << i)) 2244 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); 2245 else 2246 swizzles[i] = first_enabled_chan; 2247 } 2248 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], 2249 swizzles[2], swizzles[3]); 2250 } 2251 2252 assert(l.file != PROGRAM_UNDEFINED); 2253 assert(r.file != PROGRAM_UNDEFINED); 2254 2255 if (ir->condition) { 2256 const bool switch_order = this->process_move_condition(ir->condition); 2257 st_src_reg condition = this->result; 2258 2259 for (i = 0; i < type_size(ir->lhs->type); i++) { 2260 st_src_reg l_src = st_src_reg(l); 2261 st_src_reg condition_temp = condition; 2262 l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements); 2263 2264 if (native_integers) { 2265 /* This is necessary because TGSI's CMP instruction expects the 2266 * condition to be a float, and we store booleans as integers. 2267 * If TGSI had a UCMP instruction or similar, this extra 2268 * instruction would not be necessary. 2269 */ 2270 condition_temp = get_temp(glsl_type::vec4_type); 2271 condition.negate = 0; 2272 emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition); 2273 condition_temp.swizzle = condition.swizzle; 2274 } 2275 2276 if (switch_order) { 2277 emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r); 2278 } else { 2279 emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src); 2280 } 2281 2282 l.index++; 2283 r.index++; 2284 } 2285 } else if (ir->rhs->as_expression() && 2286 this->instructions.get_tail() && 2287 ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && 2288 type_size(ir->lhs->type) == 1 && 2289 l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) { 2290 /* To avoid emitting an extra MOV when assigning an expression to a 2291 * variable, emit the last instruction of the expression again, but 2292 * replace the destination register with the target of the assignment. 2293 * Dead code elimination will remove the original instruction. 2294 */ 2295 glsl_to_tgsi_instruction *inst, *new_inst; 2296 inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2297 new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]); 2298 new_inst->saturate = inst->saturate; 2299 inst->dead_mask = inst->dst.writemask; 2300 } else { 2301 for (i = 0; i < type_size(ir->lhs->type); i++) { 2302 if (ir->rhs->type->is_array()) 2303 r.type = ir->rhs->type->element_type()->base_type; 2304 else if (ir->rhs->type->is_record()) 2305 r.type = ir->rhs->type->fields.structure[i].type->base_type; 2306 emit(ir, TGSI_OPCODE_MOV, l, r); 2307 l.index++; 2308 r.index++; 2309 } 2310 } 2311 } 2312 2313 2314 void 2315 glsl_to_tgsi_visitor::visit(ir_constant *ir) 2316 { 2317 st_src_reg src; 2318 GLfloat stack_vals[4] = { 0 }; 2319 gl_constant_value *values = (gl_constant_value *) stack_vals; 2320 GLenum gl_type = GL_NONE; 2321 unsigned int i; 2322 static int in_array = 0; 2323 gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; 2324 2325 /* Unfortunately, 4 floats is all we can get into 2326 * _mesa_add_typed_unnamed_constant. So, make a temp to store an 2327 * aggregate constant and move each constant value into it. If we 2328 * get lucky, copy propagation will eliminate the extra moves. 2329 */ 2330 if (ir->type->base_type == GLSL_TYPE_STRUCT) { 2331 st_src_reg temp_base = get_temp(ir->type); 2332 st_dst_reg temp = st_dst_reg(temp_base); 2333 2334 foreach_iter(exec_list_iterator, iter, ir->components) { 2335 ir_constant *field_value = (ir_constant *)iter.get(); 2336 int size = type_size(field_value->type); 2337 2338 assert(size > 0); 2339 2340 field_value->accept(this); 2341 src = this->result; 2342 2343 for (i = 0; i < (unsigned int)size; i++) { 2344 emit(ir, TGSI_OPCODE_MOV, temp, src); 2345 2346 src.index++; 2347 temp.index++; 2348 } 2349 } 2350 this->result = temp_base; 2351 return; 2352 } 2353 2354 if (ir->type->is_array()) { 2355 st_src_reg temp_base = get_temp(ir->type); 2356 st_dst_reg temp = st_dst_reg(temp_base); 2357 int size = type_size(ir->type->fields.array); 2358 2359 assert(size > 0); 2360 in_array++; 2361 2362 for (i = 0; i < ir->type->length; i++) { 2363 ir->array_elements[i]->accept(this); 2364 src = this->result; 2365 for (int j = 0; j < size; j++) { 2366 emit(ir, TGSI_OPCODE_MOV, temp, src); 2367 2368 src.index++; 2369 temp.index++; 2370 } 2371 } 2372 this->result = temp_base; 2373 in_array--; 2374 return; 2375 } 2376 2377 if (ir->type->is_matrix()) { 2378 st_src_reg mat = get_temp(ir->type); 2379 st_dst_reg mat_column = st_dst_reg(mat); 2380 2381 for (i = 0; i < ir->type->matrix_columns; i++) { 2382 assert(ir->type->base_type == GLSL_TYPE_FLOAT); 2383 values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; 2384 2385 src = st_src_reg(file, -1, ir->type->base_type); 2386 src.index = add_constant(file, 2387 values, 2388 ir->type->vector_elements, 2389 GL_FLOAT, 2390 &src.swizzle); 2391 emit(ir, TGSI_OPCODE_MOV, mat_column, src); 2392 2393 mat_column.index++; 2394 } 2395 2396 this->result = mat; 2397 return; 2398 } 2399 2400 switch (ir->type->base_type) { 2401 case GLSL_TYPE_FLOAT: 2402 gl_type = GL_FLOAT; 2403 for (i = 0; i < ir->type->vector_elements; i++) { 2404 values[i].f = ir->value.f[i]; 2405 } 2406 break; 2407 case GLSL_TYPE_UINT: 2408 gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT; 2409 for (i = 0; i < ir->type->vector_elements; i++) { 2410 if (native_integers) 2411 values[i].u = ir->value.u[i]; 2412 else 2413 values[i].f = ir->value.u[i]; 2414 } 2415 break; 2416 case GLSL_TYPE_INT: 2417 gl_type = native_integers ? GL_INT : GL_FLOAT; 2418 for (i = 0; i < ir->type->vector_elements; i++) { 2419 if (native_integers) 2420 values[i].i = ir->value.i[i]; 2421 else 2422 values[i].f = ir->value.i[i]; 2423 } 2424 break; 2425 case GLSL_TYPE_BOOL: 2426 gl_type = native_integers ? GL_BOOL : GL_FLOAT; 2427 for (i = 0; i < ir->type->vector_elements; i++) { 2428 if (native_integers) 2429 values[i].u = ir->value.b[i] ? ~0 : 0; 2430 else 2431 values[i].f = ir->value.b[i]; 2432 } 2433 break; 2434 default: 2435 assert(!"Non-float/uint/int/bool constant"); 2436 } 2437 2438 this->result = st_src_reg(file, -1, ir->type); 2439 this->result.index = add_constant(file, 2440 values, 2441 ir->type->vector_elements, 2442 gl_type, 2443 &this->result.swizzle); 2444 } 2445 2446 function_entry * 2447 glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) 2448 { 2449 function_entry *entry; 2450 2451 foreach_iter(exec_list_iterator, iter, this->function_signatures) { 2452 entry = (function_entry *)iter.get(); 2453 2454 if (entry->sig == sig) 2455 return entry; 2456 } 2457 2458 entry = ralloc(mem_ctx, function_entry); 2459 entry->sig = sig; 2460 entry->sig_id = this->next_signature_id++; 2461 entry->bgn_inst = NULL; 2462 2463 /* Allocate storage for all the parameters. */ 2464 foreach_iter(exec_list_iterator, iter, sig->parameters) { 2465 ir_variable *param = (ir_variable *)iter.get(); 2466 variable_storage *storage; 2467 2468 storage = find_variable_storage(param); 2469 assert(!storage); 2470 2471 storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY, 2472 this->next_temp); 2473 this->variables.push_tail(storage); 2474 2475 this->next_temp += type_size(param->type); 2476 } 2477 2478 if (!sig->return_type->is_void()) { 2479 entry->return_reg = get_temp(sig->return_type); 2480 } else { 2481 entry->return_reg = undef_src; 2482 } 2483 2484 this->function_signatures.push_tail(entry); 2485 return entry; 2486 } 2487 2488 void 2489 glsl_to_tgsi_visitor::visit(ir_call *ir) 2490 { 2491 glsl_to_tgsi_instruction *call_inst; 2492 ir_function_signature *sig = ir->callee; 2493 function_entry *entry = get_function_signature(sig); 2494 int i; 2495 2496 /* Process in parameters. */ 2497 exec_list_iterator sig_iter = sig->parameters.iterator(); 2498 foreach_iter(exec_list_iterator, iter, *ir) { 2499 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 2500 ir_variable *param = (ir_variable *)sig_iter.get(); 2501 2502 if (param->mode == ir_var_in || 2503 param->mode == ir_var_inout) { 2504 variable_storage *storage = find_variable_storage(param); 2505 assert(storage); 2506 2507 param_rval->accept(this); 2508 st_src_reg r = this->result; 2509 2510 st_dst_reg l; 2511 l.file = storage->file; 2512 l.index = storage->index; 2513 l.reladdr = NULL; 2514 l.writemask = WRITEMASK_XYZW; 2515 l.cond_mask = COND_TR; 2516 2517 for (i = 0; i < type_size(param->type); i++) { 2518 emit(ir, TGSI_OPCODE_MOV, l, r); 2519 l.index++; 2520 r.index++; 2521 } 2522 } 2523 2524 sig_iter.next(); 2525 } 2526 assert(!sig_iter.has_next()); 2527 2528 /* Emit call instruction */ 2529 call_inst = emit(ir, TGSI_OPCODE_CAL); 2530 call_inst->function = entry; 2531 2532 /* Process out parameters. */ 2533 sig_iter = sig->parameters.iterator(); 2534 foreach_iter(exec_list_iterator, iter, *ir) { 2535 ir_rvalue *param_rval = (ir_rvalue *)iter.get(); 2536 ir_variable *param = (ir_variable *)sig_iter.get(); 2537 2538 if (param->mode == ir_var_out || 2539 param->mode == ir_var_inout) { 2540 variable_storage *storage = find_variable_storage(param); 2541 assert(storage); 2542 2543 st_src_reg r; 2544 r.file = storage->file; 2545 r.index = storage->index; 2546 r.reladdr = NULL; 2547 r.swizzle = SWIZZLE_NOOP; 2548 r.negate = 0; 2549 2550 param_rval->accept(this); 2551 st_dst_reg l = st_dst_reg(this->result); 2552 2553 for (i = 0; i < type_size(param->type); i++) { 2554 emit(ir, TGSI_OPCODE_MOV, l, r); 2555 l.index++; 2556 r.index++; 2557 } 2558 } 2559 2560 sig_iter.next(); 2561 } 2562 assert(!sig_iter.has_next()); 2563 2564 /* Process return value. */ 2565 this->result = entry->return_reg; 2566 } 2567 2568 void 2569 glsl_to_tgsi_visitor::visit(ir_texture *ir) 2570 { 2571 st_src_reg result_src, coord, lod_info, projector, dx, dy, offset; 2572 st_dst_reg result_dst, coord_dst; 2573 glsl_to_tgsi_instruction *inst = NULL; 2574 unsigned opcode = TGSI_OPCODE_NOP; 2575 2576 if (ir->coordinate) { 2577 ir->coordinate->accept(this); 2578 2579 /* Put our coords in a temp. We'll need to modify them for shadow, 2580 * projection, or LOD, so the only case we'd use it as is is if 2581 * we're doing plain old texturing. The optimization passes on 2582 * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. 2583 */ 2584 coord = get_temp(glsl_type::vec4_type); 2585 coord_dst = st_dst_reg(coord); 2586 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 2587 } 2588 2589 if (ir->projector) { 2590 ir->projector->accept(this); 2591 projector = this->result; 2592 } 2593 2594 /* Storage for our result. Ideally for an assignment we'd be using 2595 * the actual storage for the result here, instead. 2596 */ 2597 result_src = get_temp(ir->type); 2598 result_dst = st_dst_reg(result_src); 2599 2600 switch (ir->op) { 2601 case ir_tex: 2602 opcode = TGSI_OPCODE_TEX; 2603 break; 2604 case ir_txb: 2605 opcode = TGSI_OPCODE_TXB; 2606 ir->lod_info.bias->accept(this); 2607 lod_info = this->result; 2608 break; 2609 case ir_txl: 2610 opcode = TGSI_OPCODE_TXL; 2611 ir->lod_info.lod->accept(this); 2612 lod_info = this->result; 2613 break; 2614 case ir_txd: 2615 opcode = TGSI_OPCODE_TXD; 2616 ir->lod_info.grad.dPdx->accept(this); 2617 dx = this->result; 2618 ir->lod_info.grad.dPdy->accept(this); 2619 dy = this->result; 2620 break; 2621 case ir_txs: 2622 opcode = TGSI_OPCODE_TXQ; 2623 ir->lod_info.lod->accept(this); 2624 lod_info = this->result; 2625 break; 2626 case ir_txf: 2627 opcode = TGSI_OPCODE_TXF; 2628 ir->lod_info.lod->accept(this); 2629 lod_info = this->result; 2630 if (ir->offset) { 2631 ir->offset->accept(this); 2632 offset = this->result; 2633 } 2634 break; 2635 } 2636 2637 const glsl_type *sampler_type = ir->sampler->type; 2638 2639 if (ir->projector) { 2640 if (opcode == TGSI_OPCODE_TEX) { 2641 /* Slot the projector in as the last component of the coord. */ 2642 coord_dst.writemask = WRITEMASK_W; 2643 emit(ir, TGSI_OPCODE_MOV, coord_dst, projector); 2644 coord_dst.writemask = WRITEMASK_XYZW; 2645 opcode = TGSI_OPCODE_TXP; 2646 } else { 2647 st_src_reg coord_w = coord; 2648 coord_w.swizzle = SWIZZLE_WWWW; 2649 2650 /* For the other TEX opcodes there's no projective version 2651 * since the last slot is taken up by LOD info. Do the 2652 * projective divide now. 2653 */ 2654 coord_dst.writemask = WRITEMASK_W; 2655 emit(ir, TGSI_OPCODE_RCP, coord_dst, projector); 2656 2657 /* In the case where we have to project the coordinates "by hand," 2658 * the shadow comparator value must also be projected. 2659 */ 2660 st_src_reg tmp_src = coord; 2661 if (ir->shadow_comparitor) { 2662 /* Slot the shadow value in as the second to last component of the 2663 * coord. 2664 */ 2665 ir->shadow_comparitor->accept(this); 2666 2667 tmp_src = get_temp(glsl_type::vec4_type); 2668 st_dst_reg tmp_dst = st_dst_reg(tmp_src); 2669 2670 /* Projective division not allowed for array samplers. */ 2671 assert(!sampler_type->sampler_array); 2672 2673 tmp_dst.writemask = WRITEMASK_Z; 2674 emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); 2675 2676 tmp_dst.writemask = WRITEMASK_XY; 2677 emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord); 2678 } 2679 2680 coord_dst.writemask = WRITEMASK_XYZ; 2681 emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); 2682 2683 coord_dst.writemask = WRITEMASK_XYZW; 2684 coord.swizzle = SWIZZLE_XYZW; 2685 } 2686 } 2687 2688 /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow 2689 * comparator was put in the correct place (and projected) by the code, 2690 * above, that handles by-hand projection. 2691 */ 2692 if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { 2693 /* Slot the shadow value in as the second to last component of the 2694 * coord. 2695 */ 2696 ir->shadow_comparitor->accept(this); 2697 2698 /* XXX This will need to be updated for cubemap array samplers. */ 2699 if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && 2700 sampler_type->sampler_array) || 2701 sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) { 2702 coord_dst.writemask = WRITEMASK_W; 2703 } else { 2704 coord_dst.writemask = WRITEMASK_Z; 2705 } 2706 2707 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 2708 coord_dst.writemask = WRITEMASK_XYZW; 2709 } 2710 2711 if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB || 2712 opcode == TGSI_OPCODE_TXF) { 2713 /* TGSI stores LOD or LOD bias in the last channel of the coords. */ 2714 coord_dst.writemask = WRITEMASK_W; 2715 emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); 2716 coord_dst.writemask = WRITEMASK_XYZW; 2717 } 2718 2719 if (opcode == TGSI_OPCODE_TXD) 2720 inst = emit(ir, opcode, result_dst, coord, dx, dy); 2721 else if (opcode == TGSI_OPCODE_TXQ) 2722 inst = emit(ir, opcode, result_dst, lod_info); 2723 else if (opcode == TGSI_OPCODE_TXF) { 2724 inst = emit(ir, opcode, result_dst, coord); 2725 } else 2726 inst = emit(ir, opcode, result_dst, coord); 2727 2728 if (ir->shadow_comparitor) 2729 inst->tex_shadow = GL_TRUE; 2730 2731 inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler, 2732 this->shader_program, 2733 this->prog); 2734 2735 if (ir->offset) { 2736 inst->tex_offset_num_offset = 1; 2737 inst->tex_offsets[0].Index = offset.index; 2738 inst->tex_offsets[0].File = offset.file; 2739 inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0); 2740 inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1); 2741 inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2); 2742 } 2743 2744 switch (sampler_type->sampler_dimensionality) { 2745 case GLSL_SAMPLER_DIM_1D: 2746 inst->tex_target = (sampler_type->sampler_array) 2747 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; 2748 break; 2749 case GLSL_SAMPLER_DIM_2D: 2750 inst->tex_target = (sampler_type->sampler_array) 2751 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; 2752 break; 2753 case GLSL_SAMPLER_DIM_3D: 2754 inst->tex_target = TEXTURE_3D_INDEX; 2755 break; 2756 case GLSL_SAMPLER_DIM_CUBE: 2757 inst->tex_target = TEXTURE_CUBE_INDEX; 2758 break; 2759 case GLSL_SAMPLER_DIM_RECT: 2760 inst->tex_target = TEXTURE_RECT_INDEX; 2761 break; 2762 case GLSL_SAMPLER_DIM_BUF: 2763 assert(!"FINISHME: Implement ARB_texture_buffer_object"); 2764 break; 2765 case GLSL_SAMPLER_DIM_EXTERNAL: 2766 inst->tex_target = TEXTURE_EXTERNAL_INDEX; 2767 break; 2768 default: 2769 assert(!"Should not get here."); 2770 } 2771 2772 this->result = result_src; 2773 } 2774 2775 void 2776 glsl_to_tgsi_visitor::visit(ir_return *ir) 2777 { 2778 if (ir->get_value()) { 2779 st_dst_reg l; 2780 int i; 2781 2782 assert(current_function); 2783 2784 ir->get_value()->accept(this); 2785 st_src_reg r = this->result; 2786 2787 l = st_dst_reg(current_function->return_reg); 2788 2789 for (i = 0; i < type_size(current_function->sig->return_type); i++) { 2790 emit(ir, TGSI_OPCODE_MOV, l, r); 2791 l.index++; 2792 r.index++; 2793 } 2794 } 2795 2796 emit(ir, TGSI_OPCODE_RET); 2797 } 2798 2799 void 2800 glsl_to_tgsi_visitor::visit(ir_discard *ir) 2801 { 2802 if (ir->condition) { 2803 ir->condition->accept(this); 2804 this->result.negate = ~this->result.negate; 2805 emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result); 2806 } else { 2807 emit(ir, TGSI_OPCODE_KILP); 2808 } 2809 } 2810 2811 void 2812 glsl_to_tgsi_visitor::visit(ir_if *ir) 2813 { 2814 glsl_to_tgsi_instruction *cond_inst, *if_inst; 2815 glsl_to_tgsi_instruction *prev_inst; 2816 2817 prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2818 2819 ir->condition->accept(this); 2820 assert(this->result.file != PROGRAM_UNDEFINED); 2821 2822 if (this->options->EmitCondCodes) { 2823 cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 2824 2825 /* See if we actually generated any instruction for generating 2826 * the condition. If not, then cook up a move to a temp so we 2827 * have something to set cond_update on. 2828 */ 2829 if (cond_inst == prev_inst) { 2830 st_src_reg temp = get_temp(glsl_type::bool_type); 2831 cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result); 2832 } 2833 cond_inst->cond_update = GL_TRUE; 2834 2835 if_inst = emit(ir->condition, TGSI_OPCODE_IF); 2836 if_inst->dst.cond_mask = COND_NE; 2837 } else { 2838 if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result); 2839 } 2840 2841 this->instructions.push_tail(if_inst); 2842 2843 visit_exec_list(&ir->then_instructions, this); 2844 2845 if (!ir->else_instructions.is_empty()) { 2846 emit(ir->condition, TGSI_OPCODE_ELSE); 2847 visit_exec_list(&ir->else_instructions, this); 2848 } 2849 2850 if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF); 2851 } 2852 2853 glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() 2854 { 2855 result.file = PROGRAM_UNDEFINED; 2856 next_temp = 1; 2857 next_signature_id = 1; 2858 num_immediates = 0; 2859 current_function = NULL; 2860 num_address_regs = 0; 2861 samplers_used = 0; 2862 indirect_addr_temps = false; 2863 indirect_addr_consts = false; 2864 glsl_version = 0; 2865 native_integers = false; 2866 mem_ctx = ralloc_context(NULL); 2867 ctx = NULL; 2868 prog = NULL; 2869 shader_program = NULL; 2870 options = NULL; 2871 } 2872 2873 glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() 2874 { 2875 ralloc_free(mem_ctx); 2876 } 2877 2878 extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) 2879 { 2880 delete v; 2881 } 2882 2883 2884 /** 2885 * Count resources used by the given gpu program (number of texture 2886 * samplers, etc). 2887 */ 2888 static void 2889 count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) 2890 { 2891 v->samplers_used = 0; 2892 2893 foreach_iter(exec_list_iterator, iter, v->instructions) { 2894 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 2895 2896 if (is_tex_instruction(inst->op)) { 2897 v->samplers_used |= 1 << inst->sampler; 2898 2899 if (inst->tex_shadow) { 2900 prog->ShadowSamplers |= 1 << inst->sampler; 2901 } 2902 } 2903 } 2904 2905 prog->SamplersUsed = v->samplers_used; 2906 2907 if (v->shader_program != NULL) 2908 _mesa_update_shader_textures_used(v->shader_program, prog); 2909 } 2910 2911 static void 2912 set_uniform_initializer(struct gl_context *ctx, void *mem_ctx, 2913 struct gl_shader_program *shader_program, 2914 const char *name, const glsl_type *type, 2915 ir_constant *val) 2916 { 2917 if (type->is_record()) { 2918 ir_constant *field_constant; 2919 2920 field_constant = (ir_constant *)val->components.get_head(); 2921 2922 for (unsigned int i = 0; i < type->length; i++) { 2923 const glsl_type *field_type = type->fields.structure[i].type; 2924 const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name, 2925 type->fields.structure[i].name); 2926 set_uniform_initializer(ctx, mem_ctx, shader_program, field_name, 2927 field_type, field_constant); 2928 field_constant = (ir_constant *)field_constant->next; 2929 } 2930 return; 2931 } 2932 2933 unsigned offset; 2934 unsigned index = _mesa_get_uniform_location(ctx, shader_program, name, 2935 &offset); 2936 if (offset == GL_INVALID_INDEX) { 2937 fail_link(shader_program, 2938 "Couldn't find uniform for initializer %s\n", name); 2939 return; 2940 } 2941 int loc = _mesa_uniform_merge_location_offset(index, offset); 2942 2943 for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) { 2944 ir_constant *element; 2945 const glsl_type *element_type; 2946 if (type->is_array()) { 2947 element = val->array_elements[i]; 2948 element_type = type->fields.array; 2949 } else { 2950 element = val; 2951 element_type = type; 2952 } 2953 2954 void *values; 2955 2956 if (element_type->base_type == GLSL_TYPE_BOOL) { 2957 int *conv = ralloc_array(mem_ctx, int, element_type->components()); 2958 for (unsigned int j = 0; j < element_type->components(); j++) { 2959 conv[j] = element->value.b[j]; 2960 } 2961 values = (void *)conv; 2962 element_type = glsl_type::get_instance(GLSL_TYPE_INT, 2963 element_type->vector_elements, 2964 1); 2965 } else { 2966 values = &element->value; 2967 } 2968 2969 if (element_type->is_matrix()) { 2970 _mesa_uniform_matrix(ctx, shader_program, 2971 element_type->matrix_columns, 2972 element_type->vector_elements, 2973 loc, 1, GL_FALSE, (GLfloat *)values); 2974 } else { 2975 _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns, 2976 values, element_type->gl_type); 2977 } 2978 2979 loc++; 2980 } 2981 } 2982 2983 /** 2984 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which 2985 * are read from the given src in this instruction 2986 */ 2987 static int 2988 get_src_arg_mask(st_dst_reg dst, st_src_reg src) 2989 { 2990 int read_mask = 0, comp; 2991 2992 /* Now, given the src swizzle and the written channels, find which 2993 * components are actually read 2994 */ 2995 for (comp = 0; comp < 4; ++comp) { 2996 const unsigned coord = GET_SWZ(src.swizzle, comp); 2997 ASSERT(coord < 4); 2998 if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W) 2999 read_mask |= 1 << coord; 3000 } 3001 3002 return read_mask; 3003 } 3004 3005 /** 3006 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP 3007 * instruction is the first instruction to write to register T0. There are 3008 * several lowering passes done in GLSL IR (e.g. branches and 3009 * relative addressing) that create a large number of conditional assignments 3010 * that ir_to_mesa converts to CMP instructions like the one mentioned above. 3011 * 3012 * Here is why this conversion is safe: 3013 * CMP T0, T1 T2 T0 can be expanded to: 3014 * if (T1 < 0.0) 3015 * MOV T0, T2; 3016 * else 3017 * MOV T0, T0; 3018 * 3019 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same 3020 * as the original program. If (T1 < 0.0) evaluates to false, executing 3021 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. 3022 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 3023 * because any instruction that was going to read from T0 after this was going 3024 * to read a garbage value anyway. 3025 */ 3026 void 3027 glsl_to_tgsi_visitor::simplify_cmp(void) 3028 { 3029 unsigned *tempWrites; 3030 unsigned outputWrites[MAX_PROGRAM_OUTPUTS]; 3031 3032 tempWrites = new unsigned[MAX_TEMPS]; 3033 if (!tempWrites) { 3034 return; 3035 } 3036 memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS); 3037 memset(outputWrites, 0, sizeof(outputWrites)); 3038 3039 foreach_iter(exec_list_iterator, iter, this->instructions) { 3040 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3041 unsigned prevWriteMask = 0; 3042 3043 /* Give up if we encounter relative addressing or flow control. */ 3044 if (inst->dst.reladdr || 3045 tgsi_get_opcode_info(inst->op)->is_branch || 3046 inst->op == TGSI_OPCODE_BGNSUB || 3047 inst->op == TGSI_OPCODE_CONT || 3048 inst->op == TGSI_OPCODE_END || 3049 inst->op == TGSI_OPCODE_ENDSUB || 3050 inst->op == TGSI_OPCODE_RET) { 3051 break; 3052 } 3053 3054 if (inst->dst.file == PROGRAM_OUTPUT) { 3055 assert(inst->dst.index < MAX_PROGRAM_OUTPUTS); 3056 prevWriteMask = outputWrites[inst->dst.index]; 3057 outputWrites[inst->dst.index] |= inst->dst.writemask; 3058 } else if (inst->dst.file == PROGRAM_TEMPORARY) { 3059 assert(inst->dst.index < MAX_TEMPS); 3060 prevWriteMask = tempWrites[inst->dst.index]; 3061 tempWrites[inst->dst.index] |= inst->dst.writemask; 3062 } 3063 3064 /* For a CMP to be considered a conditional write, the destination 3065 * register and source register two must be the same. */ 3066 if (inst->op == TGSI_OPCODE_CMP 3067 && !(inst->dst.writemask & prevWriteMask) 3068 && inst->src[2].file == inst->dst.file 3069 && inst->src[2].index == inst->dst.index 3070 && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) { 3071 3072 inst->op = TGSI_OPCODE_MOV; 3073 inst->src[0] = inst->src[1]; 3074 } 3075 } 3076 3077 delete [] tempWrites; 3078 } 3079 3080 /* Replaces all references to a temporary register index with another index. */ 3081 void 3082 glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index) 3083 { 3084 foreach_iter(exec_list_iterator, iter, this->instructions) { 3085 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3086 unsigned j; 3087 3088 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3089 if (inst->src[j].file == PROGRAM_TEMPORARY && 3090 inst->src[j].index == index) { 3091 inst->src[j].index = new_index; 3092 } 3093 } 3094 3095 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 3096 inst->dst.index = new_index; 3097 } 3098 } 3099 } 3100 3101 int 3102 glsl_to_tgsi_visitor::get_first_temp_read(int index) 3103 { 3104 int depth = 0; /* loop depth */ 3105 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 3106 unsigned i = 0, j; 3107 3108 foreach_iter(exec_list_iterator, iter, this->instructions) { 3109 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3110 3111 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3112 if (inst->src[j].file == PROGRAM_TEMPORARY && 3113 inst->src[j].index == index) { 3114 return (depth == 0) ? i : loop_start; 3115 } 3116 } 3117 3118 if (inst->op == TGSI_OPCODE_BGNLOOP) { 3119 if(depth++ == 0) 3120 loop_start = i; 3121 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 3122 if (--depth == 0) 3123 loop_start = -1; 3124 } 3125 assert(depth >= 0); 3126 3127 i++; 3128 } 3129 3130 return -1; 3131 } 3132 3133 int 3134 glsl_to_tgsi_visitor::get_first_temp_write(int index) 3135 { 3136 int depth = 0; /* loop depth */ 3137 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 3138 int i = 0; 3139 3140 foreach_iter(exec_list_iterator, iter, this->instructions) { 3141 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3142 3143 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) { 3144 return (depth == 0) ? i : loop_start; 3145 } 3146 3147 if (inst->op == TGSI_OPCODE_BGNLOOP) { 3148 if(depth++ == 0) 3149 loop_start = i; 3150 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 3151 if (--depth == 0) 3152 loop_start = -1; 3153 } 3154 assert(depth >= 0); 3155 3156 i++; 3157 } 3158 3159 return -1; 3160 } 3161 3162 int 3163 glsl_to_tgsi_visitor::get_last_temp_read(int index) 3164 { 3165 int depth = 0; /* loop depth */ 3166 int last = -1; /* index of last instruction that reads the temporary */ 3167 unsigned i = 0, j; 3168 3169 foreach_iter(exec_list_iterator, iter, this->instructions) { 3170 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3171 3172 for (j=0; j < num_inst_src_regs(inst->op); j++) { 3173 if (inst->src[j].file == PROGRAM_TEMPORARY && 3174 inst->src[j].index == index) { 3175 last = (depth == 0) ? i : -2; 3176 } 3177 } 3178 3179 if (inst->op == TGSI_OPCODE_BGNLOOP) 3180 depth++; 3181 else if (inst->op == TGSI_OPCODE_ENDLOOP) 3182 if (--depth == 0 && last == -2) 3183 last = i; 3184 assert(depth >= 0); 3185 3186 i++; 3187 } 3188 3189 assert(last >= -1); 3190 return last; 3191 } 3192 3193 int 3194 glsl_to_tgsi_visitor::get_last_temp_write(int index) 3195 { 3196 int depth = 0; /* loop depth */ 3197 int last = -1; /* index of last instruction that writes to the temporary */ 3198 int i = 0; 3199 3200 foreach_iter(exec_list_iterator, iter, this->instructions) { 3201 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3202 3203 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) 3204 last = (depth == 0) ? i : -2; 3205 3206 if (inst->op == TGSI_OPCODE_BGNLOOP) 3207 depth++; 3208 else if (inst->op == TGSI_OPCODE_ENDLOOP) 3209 if (--depth == 0 && last == -2) 3210 last = i; 3211 assert(depth >= 0); 3212 3213 i++; 3214 } 3215 3216 assert(last >= -1); 3217 return last; 3218 } 3219 3220 /* 3221 * On a basic block basis, tracks available PROGRAM_TEMPORARY register 3222 * channels for copy propagation and updates following instructions to 3223 * use the original versions. 3224 * 3225 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3226 * will occur. As an example, a TXP production before this pass: 3227 * 3228 * 0: MOV TEMP[1], INPUT[4].xyyy; 3229 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3230 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; 3231 * 3232 * and after: 3233 * 3234 * 0: MOV TEMP[1], INPUT[4].xyyy; 3235 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3236 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3237 * 3238 * which allows for dead code elimination on TEMP[1]'s writes. 3239 */ 3240 void 3241 glsl_to_tgsi_visitor::copy_propagate(void) 3242 { 3243 glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, 3244 glsl_to_tgsi_instruction *, 3245 this->next_temp * 4); 3246 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 3247 int level = 0; 3248 3249 foreach_iter(exec_list_iterator, iter, this->instructions) { 3250 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3251 3252 assert(inst->dst.file != PROGRAM_TEMPORARY 3253 || inst->dst.index < this->next_temp); 3254 3255 /* First, do any copy propagation possible into the src regs. */ 3256 for (int r = 0; r < 3; r++) { 3257 glsl_to_tgsi_instruction *first = NULL; 3258 bool good = true; 3259 int acp_base = inst->src[r].index * 4; 3260 3261 if (inst->src[r].file != PROGRAM_TEMPORARY || 3262 inst->src[r].reladdr) 3263 continue; 3264 3265 /* See if we can find entries in the ACP consisting of MOVs 3266 * from the same src register for all the swizzled channels 3267 * of this src register reference. 3268 */ 3269 for (int i = 0; i < 4; i++) { 3270 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 3271 glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan]; 3272 3273 if (!copy_chan) { 3274 good = false; 3275 break; 3276 } 3277 3278 assert(acp_level[acp_base + src_chan] <= level); 3279 3280 if (!first) { 3281 first = copy_chan; 3282 } else { 3283 if (first->src[0].file != copy_chan->src[0].file || 3284 first->src[0].index != copy_chan->src[0].index) { 3285 good = false; 3286 break; 3287 } 3288 } 3289 } 3290 3291 if (good) { 3292 /* We've now validated that we can copy-propagate to 3293 * replace this src register reference. Do it. 3294 */ 3295 inst->src[r].file = first->src[0].file; 3296 inst->src[r].index = first->src[0].index; 3297 3298 int swizzle = 0; 3299 for (int i = 0; i < 4; i++) { 3300 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 3301 glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan]; 3302 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << 3303 (3 * i)); 3304 } 3305 inst->src[r].swizzle = swizzle; 3306 } 3307 } 3308 3309 switch (inst->op) { 3310 case TGSI_OPCODE_BGNLOOP: 3311 case TGSI_OPCODE_ENDLOOP: 3312 /* End of a basic block, clear the ACP entirely. */ 3313 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 3314 break; 3315 3316 case TGSI_OPCODE_IF: 3317 ++level; 3318 break; 3319 3320 case TGSI_OPCODE_ENDIF: 3321 case TGSI_OPCODE_ELSE: 3322 /* Clear all channels written inside the block from the ACP, but 3323 * leaving those that were not touched. 3324 */ 3325 for (int r = 0; r < this->next_temp; r++) { 3326 for (int c = 0; c < 4; c++) { 3327 if (!acp[4 * r + c]) 3328 continue; 3329 3330 if (acp_level[4 * r + c] >= level) 3331 acp[4 * r + c] = NULL; 3332 } 3333 } 3334 if (inst->op == TGSI_OPCODE_ENDIF) 3335 --level; 3336 break; 3337 3338 default: 3339 /* Continuing the block, clear any written channels from 3340 * the ACP. 3341 */ 3342 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { 3343 /* Any temporary might be written, so no copy propagation 3344 * across this instruction. 3345 */ 3346 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 3347 } else if (inst->dst.file == PROGRAM_OUTPUT && 3348 inst->dst.reladdr) { 3349 /* Any output might be written, so no copy propagation 3350 * from outputs across this instruction. 3351 */ 3352 for (int r = 0; r < this->next_temp; r++) { 3353 for (int c = 0; c < 4; c++) { 3354 if (!acp[4 * r + c]) 3355 continue; 3356 3357 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) 3358 acp[4 * r + c] = NULL; 3359 } 3360 } 3361 } else if (inst->dst.file == PROGRAM_TEMPORARY || 3362 inst->dst.file == PROGRAM_OUTPUT) { 3363 /* Clear where it's used as dst. */ 3364 if (inst->dst.file == PROGRAM_TEMPORARY) { 3365 for (int c = 0; c < 4; c++) { 3366 if (inst->dst.writemask & (1 << c)) { 3367 acp[4 * inst->dst.index + c] = NULL; 3368 } 3369 } 3370 } 3371 3372 /* Clear where it's used as src. */ 3373 for (int r = 0; r < this->next_temp; r++) { 3374 for (int c = 0; c < 4; c++) { 3375 if (!acp[4 * r + c]) 3376 continue; 3377 3378 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); 3379 3380 if (acp[4 * r + c]->src[0].file == inst->dst.file && 3381 acp[4 * r + c]->src[0].index == inst->dst.index && 3382 inst->dst.writemask & (1 << src_chan)) 3383 { 3384 acp[4 * r + c] = NULL; 3385 } 3386 } 3387 } 3388 } 3389 break; 3390 } 3391 3392 /* If this is a copy, add it to the ACP. */ 3393 if (inst->op == TGSI_OPCODE_MOV && 3394 inst->dst.file == PROGRAM_TEMPORARY && 3395 !inst->dst.reladdr && 3396 !inst->saturate && 3397 !inst->src[0].reladdr && 3398 !inst->src[0].negate) { 3399 for (int i = 0; i < 4; i++) { 3400 if (inst->dst.writemask & (1 << i)) { 3401 acp[4 * inst->dst.index + i] = inst; 3402 acp_level[4 * inst->dst.index + i] = level; 3403 } 3404 } 3405 } 3406 } 3407 3408 ralloc_free(acp_level); 3409 ralloc_free(acp); 3410 } 3411 3412 /* 3413 * Tracks available PROGRAM_TEMPORARY registers for dead code elimination. 3414 * 3415 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3416 * will occur. As an example, a TXP production after copy propagation but 3417 * before this pass: 3418 * 3419 * 0: MOV TEMP[1], INPUT[4].xyyy; 3420 * 1: MOV TEMP[1].w, INPUT[4].wwww; 3421 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3422 * 3423 * and after this pass: 3424 * 3425 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 3426 * 3427 * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB) 3428 * FIXME: doesn't eliminate all dead code inside of loops; it steps around them 3429 */ 3430 void 3431 glsl_to_tgsi_visitor::eliminate_dead_code(void) 3432 { 3433 int i; 3434 3435 for (i=0; i < this->next_temp; i++) { 3436 int last_read = get_last_temp_read(i); 3437 int j = 0; 3438 3439 foreach_iter(exec_list_iterator, iter, this->instructions) { 3440 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3441 3442 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i && 3443 j > last_read) 3444 { 3445 iter.remove(); 3446 delete inst; 3447 } 3448 3449 j++; 3450 } 3451 } 3452 } 3453 3454 /* 3455 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead 3456 * code elimination. This is less primitive than eliminate_dead_code(), as it 3457 * is per-channel and can detect consecutive writes without a read between them 3458 * as dead code. However, there is some dead code that can be eliminated by 3459 * eliminate_dead_code() but not this function - for example, this function 3460 * cannot eliminate an instruction writing to a register that is never read and 3461 * is the only instruction writing to that register. 3462 * 3463 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 3464 * will occur. 3465 */ 3466 int 3467 glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) 3468 { 3469 glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, 3470 glsl_to_tgsi_instruction *, 3471 this->next_temp * 4); 3472 int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 3473 int level = 0; 3474 int removed = 0; 3475 3476 foreach_iter(exec_list_iterator, iter, this->instructions) { 3477 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3478 3479 assert(inst->dst.file != PROGRAM_TEMPORARY 3480 || inst->dst.index < this->next_temp); 3481 3482 switch (inst->op) { 3483 case TGSI_OPCODE_BGNLOOP: 3484 case TGSI_OPCODE_ENDLOOP: 3485 case TGSI_OPCODE_CONT: 3486 case TGSI_OPCODE_BRK: 3487 /* End of a basic block, clear the write array entirely. 3488 * 3489 * This keeps us from killing dead code when the writes are 3490 * on either side of a loop, even when the register isn't touched 3491 * inside the loop. However, glsl_to_tgsi_visitor doesn't seem to emit 3492 * dead code of this type, so it shouldn't make a difference as long as 3493 * the dead code elimination pass in the GLSL compiler does its job. 3494 */ 3495 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 3496 break; 3497 3498 case TGSI_OPCODE_ENDIF: 3499 case TGSI_OPCODE_ELSE: 3500 /* Promote the recorded level of all channels written inside the 3501 * preceding if or else block to the level above the if/else block. 3502 */ 3503 for (int r = 0; r < this->next_temp; r++) { 3504 for (int c = 0; c < 4; c++) { 3505 if (!writes[4 * r + c]) 3506 continue; 3507 3508 if (write_level[4 * r + c] == level) 3509 write_level[4 * r + c] = level-1; 3510 } 3511 } 3512 3513 if(inst->op == TGSI_OPCODE_ENDIF) 3514 --level; 3515 3516 break; 3517 3518 case TGSI_OPCODE_IF: 3519 ++level; 3520 /* fallthrough to default case to mark the condition as read */ 3521 3522 default: 3523 /* Continuing the block, clear any channels from the write array that 3524 * are read by this instruction. 3525 */ 3526 for (unsigned i = 0; i < Elements(inst->src); i++) { 3527 if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ 3528 /* Any temporary might be read, so no dead code elimination 3529 * across this instruction. 3530 */ 3531 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 3532 } else if (inst->src[i].file == PROGRAM_TEMPORARY) { 3533 /* Clear where it's used as src. */ 3534 int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0); 3535 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1); 3536 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2); 3537 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3); 3538 3539 for (int c = 0; c < 4; c++) { 3540 if (src_chans & (1 << c)) { 3541 writes[4 * inst->src[i].index + c] = NULL; 3542 } 3543 } 3544 } 3545 } 3546 break; 3547 } 3548 3549 /* If this instruction writes to a temporary, add it to the write array. 3550 * If there is already an instruction in the write array for one or more 3551 * of the channels, flag that channel write as dead. 3552 */ 3553 if (inst->dst.file == PROGRAM_TEMPORARY && 3554 !inst->dst.reladdr && 3555 !inst->saturate) { 3556 for (int c = 0; c < 4; c++) { 3557 if (inst->dst.writemask & (1 << c)) { 3558 if (writes[4 * inst->dst.index + c]) { 3559 if (write_level[4 * inst->dst.index + c] < level) 3560 continue; 3561 else 3562 writes[4 * inst->dst.index + c]->dead_mask |= (1 << c); 3563 } 3564 writes[4 * inst->dst.index + c] = inst; 3565 write_level[4 * inst->dst.index + c] = level; 3566 } 3567 } 3568 } 3569 } 3570 3571 /* Anything still in the write array at this point is dead code. */ 3572 for (int r = 0; r < this->next_temp; r++) { 3573 for (int c = 0; c < 4; c++) { 3574 glsl_to_tgsi_instruction *inst = writes[4 * r + c]; 3575 if (inst) 3576 inst->dead_mask |= (1 << c); 3577 } 3578 } 3579 3580 /* Now actually remove the instructions that are completely dead and update 3581 * the writemask of other instructions with dead channels. 3582 */ 3583 foreach_iter(exec_list_iterator, iter, this->instructions) { 3584 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3585 3586 if (!inst->dead_mask || !inst->dst.writemask) 3587 continue; 3588 else if ((inst->dst.writemask & ~inst->dead_mask) == 0) { 3589 iter.remove(); 3590 delete inst; 3591 removed++; 3592 } else 3593 inst->dst.writemask &= ~(inst->dead_mask); 3594 } 3595 3596 ralloc_free(write_level); 3597 ralloc_free(writes); 3598 3599 return removed; 3600 } 3601 3602 /* Merges temporary registers together where possible to reduce the number of 3603 * registers needed to run a program. 3604 * 3605 * Produces optimal code only after copy propagation and dead code elimination 3606 * have been run. */ 3607 void 3608 glsl_to_tgsi_visitor::merge_registers(void) 3609 { 3610 int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp); 3611 int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp); 3612 int i, j; 3613 3614 /* Read the indices of the last read and first write to each temp register 3615 * into an array so that we don't have to traverse the instruction list as 3616 * much. */ 3617 for (i=0; i < this->next_temp; i++) { 3618 last_reads[i] = get_last_temp_read(i); 3619 first_writes[i] = get_first_temp_write(i); 3620 } 3621 3622 /* Start looking for registers with non-overlapping usages that can be 3623 * merged together. */ 3624 for (i=0; i < this->next_temp; i++) { 3625 /* Don't touch unused registers. */ 3626 if (last_reads[i] < 0 || first_writes[i] < 0) continue; 3627 3628 for (j=0; j < this->next_temp; j++) { 3629 /* Don't touch unused registers. */ 3630 if (last_reads[j] < 0 || first_writes[j] < 0) continue; 3631 3632 /* We can merge the two registers if the first write to j is after or 3633 * in the same instruction as the last read from i. Note that the 3634 * register at index i will always be used earlier or at the same time 3635 * as the register at index j. */ 3636 if (first_writes[i] <= first_writes[j] && 3637 last_reads[i] <= first_writes[j]) 3638 { 3639 rename_temp_register(j, i); /* Replace all references to j with i.*/ 3640 3641 /* Update the first_writes and last_reads arrays with the new 3642 * values for the merged register index, and mark the newly unused 3643 * register index as such. */ 3644 last_reads[i] = last_reads[j]; 3645 first_writes[j] = -1; 3646 last_reads[j] = -1; 3647 } 3648 } 3649 } 3650 3651 ralloc_free(last_reads); 3652 ralloc_free(first_writes); 3653 } 3654 3655 /* Reassign indices to temporary registers by reusing unused indices created 3656 * by optimization passes. */ 3657 void 3658 glsl_to_tgsi_visitor::renumber_registers(void) 3659 { 3660 int i = 0; 3661 int new_index = 0; 3662 3663 for (i=0; i < this->next_temp; i++) { 3664 if (get_first_temp_read(i) < 0) continue; 3665 if (i != new_index) 3666 rename_temp_register(i, new_index); 3667 new_index++; 3668 } 3669 3670 this->next_temp = new_index; 3671 } 3672 3673 /** 3674 * Returns a fragment program which implements the current pixel transfer ops. 3675 * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c. 3676 */ 3677 extern "C" void 3678 get_pixel_transfer_visitor(struct st_fragment_program *fp, 3679 glsl_to_tgsi_visitor *original, 3680 int scale_and_bias, int pixel_maps) 3681 { 3682 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); 3683 struct st_context *st = st_context(original->ctx); 3684 struct gl_program *prog = &fp->Base.Base; 3685 struct gl_program_parameter_list *params = _mesa_new_parameter_list(); 3686 st_src_reg coord, src0; 3687 st_dst_reg dst0; 3688 glsl_to_tgsi_instruction *inst; 3689 3690 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ 3691 v->ctx = original->ctx; 3692 v->prog = prog; 3693 v->shader_program = NULL; 3694 v->glsl_version = original->glsl_version; 3695 v->native_integers = original->native_integers; 3696 v->options = original->options; 3697 v->next_temp = original->next_temp; 3698 v->num_address_regs = original->num_address_regs; 3699 v->samplers_used = prog->SamplersUsed = original->samplers_used; 3700 v->indirect_addr_temps = original->indirect_addr_temps; 3701 v->indirect_addr_consts = original->indirect_addr_consts; 3702 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); 3703 v->num_immediates = original->num_immediates; 3704 3705 /* 3706 * Get initial pixel color from the texture. 3707 * TEX colorTemp, fragment.texcoord[0], texture[0], 2D; 3708 */ 3709 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); 3710 src0 = v->get_temp(glsl_type::vec4_type); 3711 dst0 = st_dst_reg(src0); 3712 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); 3713 inst->sampler = 0; 3714 inst->tex_target = TEXTURE_2D_INDEX; 3715 3716 prog->InputsRead |= FRAG_BIT_TEX0; 3717 prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */ 3718 v->samplers_used |= (1 << 0); 3719 3720 if (scale_and_bias) { 3721 static const gl_state_index scale_state[STATE_LENGTH] = 3722 { STATE_INTERNAL, STATE_PT_SCALE, 3723 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; 3724 static const gl_state_index bias_state[STATE_LENGTH] = 3725 { STATE_INTERNAL, STATE_PT_BIAS, 3726 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 }; 3727 GLint scale_p, bias_p; 3728 st_src_reg scale, bias; 3729 3730 scale_p = _mesa_add_state_reference(params, scale_state); 3731 bias_p = _mesa_add_state_reference(params, bias_state); 3732 3733 /* MAD colorTemp, colorTemp, scale, bias; */ 3734 scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT); 3735 bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT); 3736 inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias); 3737 } 3738 3739 if (pixel_maps) { 3740 st_src_reg temp = v->get_temp(glsl_type::vec4_type); 3741 st_dst_reg temp_dst = st_dst_reg(temp); 3742 3743 assert(st->pixel_xfer.pixelmap_texture); 3744 3745 /* With a little effort, we can do four pixel map look-ups with 3746 * two TEX instructions: 3747 */ 3748 3749 /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */ 3750 temp_dst.writemask = WRITEMASK_XY; /* write R,G */ 3751 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); 3752 inst->sampler = 1; 3753 inst->tex_target = TEXTURE_2D_INDEX; 3754 3755 /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */ 3756 src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); 3757 temp_dst.writemask = WRITEMASK_ZW; /* write B,A */ 3758 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0); 3759 inst->sampler = 1; 3760 inst->tex_target = TEXTURE_2D_INDEX; 3761 3762 prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */ 3763 v->samplers_used |= (1 << 1); 3764 3765 /* MOV colorTemp, temp; */ 3766 inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp); 3767 } 3768 3769 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the 3770 * new visitor. */ 3771 foreach_iter(exec_list_iterator, iter, original->instructions) { 3772 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3773 glsl_to_tgsi_instruction *newinst; 3774 st_src_reg src_regs[3]; 3775 3776 if (inst->dst.file == PROGRAM_OUTPUT) 3777 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); 3778 3779 for (int i=0; i<3; i++) { 3780 src_regs[i] = inst->src[i]; 3781 if (src_regs[i].file == PROGRAM_INPUT && 3782 src_regs[i].index == FRAG_ATTRIB_COL0) 3783 { 3784 src_regs[i].file = PROGRAM_TEMPORARY; 3785 src_regs[i].index = src0.index; 3786 } 3787 else if (src_regs[i].file == PROGRAM_INPUT) 3788 prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); 3789 } 3790 3791 newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); 3792 newinst->tex_target = inst->tex_target; 3793 } 3794 3795 /* Make modifications to fragment program info. */ 3796 prog->Parameters = _mesa_combine_parameter_lists(params, 3797 original->prog->Parameters); 3798 _mesa_free_parameter_list(params); 3799 count_resources(v, prog); 3800 fp->glsl_to_tgsi = v; 3801 } 3802 3803 /** 3804 * Make fragment program for glBitmap: 3805 * Sample the texture and kill the fragment if the bit is 0. 3806 * This program will be combined with the user's fragment program. 3807 * 3808 * Based on make_bitmap_fragment_program in st_cb_bitmap.c. 3809 */ 3810 extern "C" void 3811 get_bitmap_visitor(struct st_fragment_program *fp, 3812 glsl_to_tgsi_visitor *original, int samplerIndex) 3813 { 3814 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor(); 3815 struct st_context *st = st_context(original->ctx); 3816 struct gl_program *prog = &fp->Base.Base; 3817 st_src_reg coord, src0; 3818 st_dst_reg dst0; 3819 glsl_to_tgsi_instruction *inst; 3820 3821 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */ 3822 v->ctx = original->ctx; 3823 v->prog = prog; 3824 v->shader_program = NULL; 3825 v->glsl_version = original->glsl_version; 3826 v->native_integers = original->native_integers; 3827 v->options = original->options; 3828 v->next_temp = original->next_temp; 3829 v->num_address_regs = original->num_address_regs; 3830 v->samplers_used = prog->SamplersUsed = original->samplers_used; 3831 v->indirect_addr_temps = original->indirect_addr_temps; 3832 v->indirect_addr_consts = original->indirect_addr_consts; 3833 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates)); 3834 v->num_immediates = original->num_immediates; 3835 3836 /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ 3837 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); 3838 src0 = v->get_temp(glsl_type::vec4_type); 3839 dst0 = st_dst_reg(src0); 3840 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); 3841 inst->sampler = samplerIndex; 3842 inst->tex_target = TEXTURE_2D_INDEX; 3843 3844 prog->InputsRead |= FRAG_BIT_TEX0; 3845 prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */ 3846 v->samplers_used |= (1 << samplerIndex); 3847 3848 /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */ 3849 src0.negate = NEGATE_XYZW; 3850 if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM) 3851 src0.swizzle = SWIZZLE_XXXX; 3852 inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0); 3853 3854 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the 3855 * new visitor. */ 3856 foreach_iter(exec_list_iterator, iter, original->instructions) { 3857 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); 3858 glsl_to_tgsi_instruction *newinst; 3859 st_src_reg src_regs[3]; 3860 3861 if (inst->dst.file == PROGRAM_OUTPUT) 3862 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index); 3863 3864 for (int i=0; i<3; i++) { 3865 src_regs[i] = inst->src[i]; 3866 if (src_regs[i].file == PROGRAM_INPUT) 3867 prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index); 3868 } 3869 3870 newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]); 3871 newinst->tex_target = inst->tex_target; 3872 } 3873 3874 /* Make modifications to fragment program info. */ 3875 prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters); 3876 count_resources(v, prog); 3877 fp->glsl_to_tgsi = v; 3878 } 3879 3880 /* ------------------------- TGSI conversion stuff -------------------------- */ 3881 struct label { 3882 unsigned branch_target; 3883 unsigned token; 3884 }; 3885 3886 /** 3887 * Intermediate state used during shader translation. 3888 */ 3889 struct st_translate { 3890 struct ureg_program *ureg; 3891 3892 struct ureg_dst temps[MAX_TEMPS]; 3893 struct ureg_src *constants; 3894 struct ureg_src *immediates; 3895 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 3896 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 3897 struct ureg_dst address[1]; 3898 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 3899 struct ureg_src systemValues[SYSTEM_VALUE_MAX]; 3900 3901 const GLuint *inputMapping; 3902 const GLuint *outputMapping; 3903 3904 /* For every instruction that contains a label (eg CALL), keep 3905 * details so that we can go back afterwards and emit the correct 3906 * tgsi instruction number for each label. 3907 */ 3908 struct label *labels; 3909 unsigned labels_size; 3910 unsigned labels_count; 3911 3912 /* Keep a record of the tgsi instruction number that each mesa 3913 * instruction starts at, will be used to fix up labels after 3914 * translation. 3915 */ 3916 unsigned *insn; 3917 unsigned insn_size; 3918 unsigned insn_count; 3919 3920 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */ 3921 3922 boolean error; 3923 }; 3924 3925 /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ 3926 static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = { 3927 TGSI_SEMANTIC_FACE, 3928 TGSI_SEMANTIC_VERTEXID, 3929 TGSI_SEMANTIC_INSTANCEID 3930 }; 3931 3932 /** 3933 * Make note of a branch to a label in the TGSI code. 3934 * After we've emitted all instructions, we'll go over the list 3935 * of labels built here and patch the TGSI code with the actual 3936 * location of each label. 3937 */ 3938 static unsigned *get_label(struct st_translate *t, unsigned branch_target) 3939 { 3940 unsigned i; 3941 3942 if (t->labels_count + 1 >= t->labels_size) { 3943 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1); 3944 t->labels = (struct label *)realloc(t->labels, 3945 t->labels_size * sizeof(struct label)); 3946 if (t->labels == NULL) { 3947 static unsigned dummy; 3948 t->error = TRUE; 3949 return &dummy; 3950 } 3951 } 3952 3953 i = t->labels_count++; 3954 t->labels[i].branch_target = branch_target; 3955 return &t->labels[i].token; 3956 } 3957 3958 /** 3959 * Called prior to emitting the TGSI code for each instruction. 3960 * Allocate additional space for instructions if needed. 3961 * Update the insn[] array so the next glsl_to_tgsi_instruction points to 3962 * the next TGSI instruction. 3963 */ 3964 static void set_insn_start(struct st_translate *t, unsigned start) 3965 { 3966 if (t->insn_count + 1 >= t->insn_size) { 3967 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1); 3968 t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0])); 3969 if (t->insn == NULL) { 3970 t->error = TRUE; 3971 return; 3972 } 3973 } 3974 3975 t->insn[t->insn_count++] = start; 3976 } 3977 3978 /** 3979 * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. 3980 */ 3981 static struct ureg_src 3982 emit_immediate(struct st_translate *t, 3983 gl_constant_value values[4], 3984 int type, int size) 3985 { 3986 struct ureg_program *ureg = t->ureg; 3987 3988 switch(type) 3989 { 3990 case GL_FLOAT: 3991 return ureg_DECL_immediate(ureg, &values[0].f, size); 3992 case GL_INT: 3993 return ureg_DECL_immediate_int(ureg, &values[0].i, size); 3994 case GL_UNSIGNED_INT: 3995 case GL_BOOL: 3996 return ureg_DECL_immediate_uint(ureg, &values[0].u, size); 3997 default: 3998 assert(!"should not get here - type must be float, int, uint, or bool"); 3999 return ureg_src_undef(); 4000 } 4001 } 4002 4003 /** 4004 * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register. 4005 */ 4006 static struct ureg_dst 4007 dst_register(struct st_translate *t, 4008 gl_register_file file, 4009 GLuint index) 4010 { 4011 switch(file) { 4012 case PROGRAM_UNDEFINED: 4013 return ureg_dst_undef(); 4014 4015 case PROGRAM_TEMPORARY: 4016 if (ureg_dst_is_undef(t->temps[index])) 4017 t->temps[index] = ureg_DECL_local_temporary(t->ureg); 4018 4019 return t->temps[index]; 4020 4021 case PROGRAM_OUTPUT: 4022 if (t->procType == TGSI_PROCESSOR_VERTEX) 4023 assert(index < VERT_RESULT_MAX); 4024 else if (t->procType == TGSI_PROCESSOR_FRAGMENT) 4025 assert(index < FRAG_RESULT_MAX); 4026 else 4027 assert(index < GEOM_RESULT_MAX); 4028 4029 assert(t->outputMapping[index] < Elements(t->outputs)); 4030 4031 return t->outputs[t->outputMapping[index]]; 4032 4033 case PROGRAM_ADDRESS: 4034 return t->address[index]; 4035 4036 default: 4037 assert(!"unknown dst register file"); 4038 return ureg_dst_undef(); 4039 } 4040 } 4041 4042 /** 4043 * Map a glsl_to_tgsi src register to a TGSI ureg_src register. 4044 */ 4045 static struct ureg_src 4046 src_register(struct st_translate *t, 4047 gl_register_file file, 4048 GLint index) 4049 { 4050 switch(file) { 4051 case PROGRAM_UNDEFINED: 4052 return ureg_src_undef(); 4053 4054 case PROGRAM_TEMPORARY: 4055 assert(index >= 0); 4056 assert(index < (int) Elements(t->temps)); 4057 if (ureg_dst_is_undef(t->temps[index])) 4058 t->temps[index] = ureg_DECL_local_temporary(t->ureg); 4059 return ureg_src(t->temps[index]); 4060 4061 case PROGRAM_NAMED_PARAM: 4062 case PROGRAM_ENV_PARAM: 4063 case PROGRAM_LOCAL_PARAM: 4064 case PROGRAM_UNIFORM: 4065 assert(index >= 0); 4066 return t->constants[index]; 4067 case PROGRAM_STATE_VAR: 4068 case PROGRAM_CONSTANT: /* ie, immediate */ 4069 if (index < 0) 4070 return ureg_DECL_constant(t->ureg, 0); 4071 else 4072 return t->constants[index]; 4073 4074 case PROGRAM_IMMEDIATE: 4075 return t->immediates[index]; 4076 4077 case PROGRAM_INPUT: 4078 assert(t->inputMapping[index] < Elements(t->inputs)); 4079 return t->inputs[t->inputMapping[index]]; 4080 4081 case PROGRAM_OUTPUT: 4082 assert(t->outputMapping[index] < Elements(t->outputs)); 4083 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ 4084 4085 case PROGRAM_ADDRESS: 4086 return ureg_src(t->address[index]); 4087 4088 case PROGRAM_SYSTEM_VALUE: 4089 assert(index < (int) Elements(t->systemValues)); 4090 return t->systemValues[index]; 4091 4092 default: 4093 assert(!"unknown src register file"); 4094 return ureg_src_undef(); 4095 } 4096 } 4097 4098 /** 4099 * Create a TGSI ureg_dst register from an st_dst_reg. 4100 */ 4101 static struct ureg_dst 4102 translate_dst(struct st_translate *t, 4103 const st_dst_reg *dst_reg, 4104 bool saturate, bool clamp_color) 4105 { 4106 struct ureg_dst dst = dst_register(t, 4107 dst_reg->file, 4108 dst_reg->index); 4109 4110 dst = ureg_writemask(dst, dst_reg->writemask); 4111 4112 if (saturate) 4113 dst = ureg_saturate(dst); 4114 else if (clamp_color && dst_reg->file == PROGRAM_OUTPUT) { 4115 /* Clamp colors for ARB_color_buffer_float. */ 4116 switch (t->procType) { 4117 case TGSI_PROCESSOR_VERTEX: 4118 /* XXX if the geometry shader is present, this must be done there 4119 * instead of here. */ 4120 if (dst_reg->index == VERT_RESULT_COL0 || 4121 dst_reg->index == VERT_RESULT_COL1 || 4122 dst_reg->index == VERT_RESULT_BFC0 || 4123 dst_reg->index == VERT_RESULT_BFC1) { 4124 dst = ureg_saturate(dst); 4125 } 4126 break; 4127 4128 case TGSI_PROCESSOR_FRAGMENT: 4129 if (dst_reg->index >= FRAG_RESULT_COLOR) { 4130 dst = ureg_saturate(dst); 4131 } 4132 break; 4133 } 4134 } 4135 4136 if (dst_reg->reladdr != NULL) 4137 dst = ureg_dst_indirect(dst, ureg_src(t->address[0])); 4138 4139 return dst; 4140 } 4141 4142 /** 4143 * Create a TGSI ureg_src register from an st_src_reg. 4144 */ 4145 static struct ureg_src 4146 translate_src(struct st_translate *t, const st_src_reg *src_reg) 4147 { 4148 struct ureg_src src = src_register(t, src_reg->file, src_reg->index); 4149 4150 src = ureg_swizzle(src, 4151 GET_SWZ(src_reg->swizzle, 0) & 0x3, 4152 GET_SWZ(src_reg->swizzle, 1) & 0x3, 4153 GET_SWZ(src_reg->swizzle, 2) & 0x3, 4154 GET_SWZ(src_reg->swizzle, 3) & 0x3); 4155 4156 if ((src_reg->negate & 0xf) == NEGATE_XYZW) 4157 src = ureg_negate(src); 4158 4159 if (src_reg->reladdr != NULL) { 4160 /* Normally ureg_src_indirect() would be used here, but a stupid compiler 4161 * bug in g++ makes ureg_src_indirect (an inline C function) erroneously 4162 * set the bit for src.Negate. So we have to do the operation manually 4163 * here to work around the compiler's problems. */ 4164 /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/ 4165 struct ureg_src addr = ureg_src(t->address[0]); 4166 src.Indirect = 1; 4167 src.IndirectFile = addr.File; 4168 src.IndirectIndex = addr.Index; 4169 src.IndirectSwizzle = addr.SwizzleX; 4170 4171 if (src_reg->file != PROGRAM_INPUT && 4172 src_reg->file != PROGRAM_OUTPUT) { 4173 /* If src_reg->index was negative, it was set to zero in 4174 * src_register(). Reassign it now. But don't do this 4175 * for input/output regs since they get remapped while 4176 * const buffers don't. 4177 */ 4178 src.Index = src_reg->index; 4179 } 4180 } 4181 4182 return src; 4183 } 4184 4185 static struct tgsi_texture_offset 4186 translate_tex_offset(struct st_translate *t, 4187 const struct tgsi_texture_offset *in_offset) 4188 { 4189 struct tgsi_texture_offset offset; 4190 4191 assert(in_offset->File == PROGRAM_IMMEDIATE); 4192 4193 offset.File = TGSI_FILE_IMMEDIATE; 4194 offset.Index = in_offset->Index; 4195 offset.SwizzleX = in_offset->SwizzleX; 4196 offset.SwizzleY = in_offset->SwizzleY; 4197 offset.SwizzleZ = in_offset->SwizzleZ; 4198 offset.Padding = 0; 4199 4200 return offset; 4201 } 4202 4203 static void 4204 compile_tgsi_instruction(struct st_translate *t, 4205 const glsl_to_tgsi_instruction *inst, 4206 bool clamp_dst_color_output) 4207 { 4208 struct ureg_program *ureg = t->ureg; 4209 GLuint i; 4210 struct ureg_dst dst[1]; 4211 struct ureg_src src[4]; 4212 struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET]; 4213 4214 unsigned num_dst; 4215 unsigned num_src; 4216 4217 num_dst = num_inst_dst_regs(inst->op); 4218 num_src = num_inst_src_regs(inst->op); 4219 4220 if (num_dst) 4221 dst[0] = translate_dst(t, 4222 &inst->dst, 4223 inst->saturate, 4224 clamp_dst_color_output); 4225 4226 for (i = 0; i < num_src; i++) 4227 src[i] = translate_src(t, &inst->src[i]); 4228 4229 switch(inst->op) { 4230 case TGSI_OPCODE_BGNLOOP: 4231 case TGSI_OPCODE_CAL: 4232 case TGSI_OPCODE_ELSE: 4233 case TGSI_OPCODE_ENDLOOP: 4234 case TGSI_OPCODE_IF: 4235 assert(num_dst == 0); 4236 ureg_label_insn(ureg, 4237 inst->op, 4238 src, num_src, 4239 get_label(t, 4240 inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0)); 4241 return; 4242 4243 case TGSI_OPCODE_TEX: 4244 case TGSI_OPCODE_TXB: 4245 case TGSI_OPCODE_TXD: 4246 case TGSI_OPCODE_TXL: 4247 case TGSI_OPCODE_TXP: 4248 case TGSI_OPCODE_TXQ: 4249 case TGSI_OPCODE_TXF: 4250 src[num_src++] = t->samplers[inst->sampler]; 4251 for (i = 0; i < inst->tex_offset_num_offset; i++) { 4252 texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]); 4253 } 4254 ureg_tex_insn(ureg, 4255 inst->op, 4256 dst, num_dst, 4257 st_translate_texture_target(inst->tex_target, inst->tex_shadow), 4258 texoffsets, inst->tex_offset_num_offset, 4259 src, num_src); 4260 return; 4261 4262 case TGSI_OPCODE_SCS: 4263 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY); 4264 ureg_insn(ureg, inst->op, dst, num_dst, src, num_src); 4265 break; 4266 4267 default: 4268 ureg_insn(ureg, 4269 inst->op, 4270 dst, num_dst, 4271 src, num_src); 4272 break; 4273 } 4274 } 4275 4276 /** 4277 * Emit the TGSI instructions for inverting and adjusting WPOS. 4278 * This code is unavoidable because it also depends on whether 4279 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). 4280 */ 4281 static void 4282 emit_wpos_adjustment( struct st_translate *t, 4283 const struct gl_program *program, 4284 boolean invert, 4285 GLfloat adjX, GLfloat adjY[2]) 4286 { 4287 struct ureg_program *ureg = t->ureg; 4288 4289 /* Fragment program uses fragment position input. 4290 * Need to replace instances of INPUT[WPOS] with temp T 4291 * where T = INPUT[WPOS] by y is inverted. 4292 */ 4293 static const gl_state_index wposTransformState[STATE_LENGTH] 4294 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 4295 (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 }; 4296 4297 /* XXX: note we are modifying the incoming shader here! Need to 4298 * do this before emitting the constant decls below, or this 4299 * will be missed: 4300 */ 4301 unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, 4302 wposTransformState); 4303 4304 struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); 4305 struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg ); 4306 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]]; 4307 4308 /* First, apply the coordinate shift: */ 4309 if (adjX || adjY[0] || adjY[1]) { 4310 if (adjY[0] != adjY[1]) { 4311 /* Adjust the y coordinate by adjY[1] or adjY[0] respectively 4312 * depending on whether inversion is actually going to be applied 4313 * or not, which is determined by testing against the inversion 4314 * state variable used below, which will be either +1 or -1. 4315 */ 4316 struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg); 4317 4318 ureg_CMP(ureg, adj_temp, 4319 ureg_scalar(wpostrans, invert ? 2 : 0), 4320 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f), 4321 ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f)); 4322 ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp)); 4323 } else { 4324 ureg_ADD(ureg, wpos_temp, wpos_input, 4325 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f)); 4326 } 4327 wpos_input = ureg_src(wpos_temp); 4328 } else { 4329 /* MOV wpos_temp, input[wpos] 4330 */ 4331 ureg_MOV( ureg, wpos_temp, wpos_input ); 4332 } 4333 4334 /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be 4335 * inversion/identity, or the other way around if we're drawing to an FBO. 4336 */ 4337 if (invert) { 4338 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy 4339 */ 4340 ureg_MAD( ureg, 4341 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 4342 wpos_input, 4343 ureg_scalar(wpostrans, 0), 4344 ureg_scalar(wpostrans, 1)); 4345 } else { 4346 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww 4347 */ 4348 ureg_MAD( ureg, 4349 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 4350 wpos_input, 4351 ureg_scalar(wpostrans, 2), 4352 ureg_scalar(wpostrans, 3)); 4353 } 4354 4355 /* Use wpos_temp as position input from here on: 4356 */ 4357 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp); 4358 } 4359 4360 4361 /** 4362 * Emit fragment position/ooordinate code. 4363 */ 4364 static void 4365 emit_wpos(struct st_context *st, 4366 struct st_translate *t, 4367 const struct gl_program *program, 4368 struct ureg_program *ureg) 4369 { 4370 const struct gl_fragment_program *fp = 4371 (const struct gl_fragment_program *) program; 4372 struct pipe_screen *pscreen = st->pipe->screen; 4373 GLfloat adjX = 0.0f; 4374 GLfloat adjY[2] = { 0.0f, 0.0f }; 4375 boolean invert = FALSE; 4376 4377 /* Query the pixel center conventions supported by the pipe driver and set 4378 * adjX, adjY to help out if it cannot handle the requested one internally. 4379 * 4380 * The bias of the y-coordinate depends on whether y-inversion takes place 4381 * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are 4382 * drawing to an FBO (causes additional inversion), and whether the the pipe 4383 * driver origin and the requested origin differ (the latter condition is 4384 * stored in the 'invert' variable). 4385 * 4386 * For height = 100 (i = integer, h = half-integer, l = lower, u = upper): 4387 * 4388 * center shift only: 4389 * i -> h: +0.5 4390 * h -> i: -0.5 4391 * 4392 * inversion only: 4393 * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99 4394 * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5 4395 * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0 4396 * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5 4397 * 4398 * inversion and center shift: 4399 * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5 4400 * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99 4401 * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5 4402 * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0 4403 */ 4404 if (fp->OriginUpperLeft) { 4405 /* Fragment shader wants origin in upper-left */ 4406 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { 4407 /* the driver supports upper-left origin */ 4408 } 4409 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { 4410 /* the driver supports lower-left origin, need to invert Y */ 4411 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 4412 invert = TRUE; 4413 } 4414 else 4415 assert(0); 4416 } 4417 else { 4418 /* Fragment shader wants origin in lower-left */ 4419 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) 4420 /* the driver supports lower-left origin */ 4421 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 4422 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) 4423 /* the driver supports upper-left origin, need to invert Y */ 4424 invert = TRUE; 4425 else 4426 assert(0); 4427 } 4428 4429 if (fp->PixelCenterInteger) { 4430 /* Fragment shader wants pixel center integer */ 4431 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 4432 /* the driver supports pixel center integer */ 4433 adjY[1] = 1.0f; 4434 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 4435 } 4436 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 4437 /* the driver supports pixel center half integer, need to bias X,Y */ 4438 adjX = -0.5f; 4439 adjY[0] = -0.5f; 4440 adjY[1] = 0.5f; 4441 } 4442 else 4443 assert(0); 4444 } 4445 else { 4446 /* Fragment shader wants pixel center half integer */ 4447 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 4448 /* the driver supports pixel center half integer */ 4449 } 4450 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 4451 /* the driver supports pixel center integer, need to bias X,Y */ 4452 adjX = adjY[0] = adjY[1] = 0.5f; 4453 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 4454 } 4455 else 4456 assert(0); 4457 } 4458 4459 /* we invert after adjustment so that we avoid the MOV to temporary, 4460 * and reuse the adjustment ADD instead */ 4461 emit_wpos_adjustment(t, program, invert, adjX, adjY); 4462 } 4463 4464 /** 4465 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. 4466 * TGSI uses +1 for front, -1 for back. 4467 * This function converts the TGSI value to the GL value. Simply clamping/ 4468 * saturating the value to [0,1] does the job. 4469 */ 4470 static void 4471 emit_face_var(struct st_translate *t) 4472 { 4473 struct ureg_program *ureg = t->ureg; 4474 struct ureg_dst face_temp = ureg_DECL_temporary(ureg); 4475 struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]]; 4476 4477 /* MOV_SAT face_temp, input[face] */ 4478 face_temp = ureg_saturate(face_temp); 4479 ureg_MOV(ureg, face_temp, face_input); 4480 4481 /* Use face_temp as face input from here on: */ 4482 t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp); 4483 } 4484 4485 static void 4486 emit_edgeflags(struct st_translate *t) 4487 { 4488 struct ureg_program *ureg = t->ureg; 4489 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]]; 4490 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]]; 4491 4492 ureg_MOV(ureg, edge_dst, edge_src); 4493 } 4494 4495 /** 4496 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. 4497 * \param program the program to translate 4498 * \param numInputs number of input registers used 4499 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 4500 * input indexes 4501 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 4502 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 4503 * each input 4504 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 4505 * \param numOutputs number of output registers used 4506 * \param outputMapping maps Mesa fragment program outputs to TGSI 4507 * generic outputs 4508 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 4509 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 4510 * each output 4511 * 4512 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY 4513 */ 4514 extern "C" enum pipe_error 4515 st_translate_program( 4516 struct gl_context *ctx, 4517 uint procType, 4518 struct ureg_program *ureg, 4519 glsl_to_tgsi_visitor *program, 4520 const struct gl_program *proginfo, 4521 GLuint numInputs, 4522 const GLuint inputMapping[], 4523 const ubyte inputSemanticName[], 4524 const ubyte inputSemanticIndex[], 4525 const GLuint interpMode[], 4526 const GLboolean is_centroid[], 4527 GLuint numOutputs, 4528 const GLuint outputMapping[], 4529 const ubyte outputSemanticName[], 4530 const ubyte outputSemanticIndex[], 4531 boolean passthrough_edgeflags, 4532 boolean clamp_color) 4533 { 4534 struct st_translate *t; 4535 unsigned i; 4536 enum pipe_error ret = PIPE_OK; 4537 4538 assert(numInputs <= Elements(t->inputs)); 4539 assert(numOutputs <= Elements(t->outputs)); 4540 4541 t = CALLOC_STRUCT(st_translate); 4542 if (!t) { 4543 ret = PIPE_ERROR_OUT_OF_MEMORY; 4544 goto out; 4545 } 4546 4547 memset(t, 0, sizeof *t); 4548 4549 t->procType = procType; 4550 t->inputMapping = inputMapping; 4551 t->outputMapping = outputMapping; 4552 t->ureg = ureg; 4553 4554 if (program->shader_program) { 4555 for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) { 4556 struct gl_uniform_storage *const storage = 4557 &program->shader_program->UniformStorage[i]; 4558 4559 _mesa_uniform_detach_all_driver_storage(storage); 4560 } 4561 } 4562 4563 /* 4564 * Declare input attributes. 4565 */ 4566 if (procType == TGSI_PROCESSOR_FRAGMENT) { 4567 for (i = 0; i < numInputs; i++) { 4568 t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg, 4569 inputSemanticName[i], 4570 inputSemanticIndex[i], 4571 interpMode[i], 0, 4572 is_centroid[i]); 4573 } 4574 4575 if (proginfo->InputsRead & FRAG_BIT_WPOS) { 4576 /* Must do this after setting up t->inputs, and before 4577 * emitting constant references, below: 4578 */ 4579 emit_wpos(st_context(ctx), t, proginfo, ureg); 4580 } 4581 4582 if (proginfo->InputsRead & FRAG_BIT_FACE) 4583 emit_face_var(t); 4584 4585 /* 4586 * Declare output attributes. 4587 */ 4588 for (i = 0; i < numOutputs; i++) { 4589 switch (outputSemanticName[i]) { 4590 case TGSI_SEMANTIC_POSITION: 4591 t->outputs[i] = ureg_DECL_output(ureg, 4592 TGSI_SEMANTIC_POSITION, /* Z/Depth */ 4593 outputSemanticIndex[i]); 4594 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z); 4595 break; 4596 case TGSI_SEMANTIC_STENCIL: 4597 t->outputs[i] = ureg_DECL_output(ureg, 4598 TGSI_SEMANTIC_STENCIL, /* Stencil */ 4599 outputSemanticIndex[i]); 4600 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y); 4601 break; 4602 case TGSI_SEMANTIC_COLOR: 4603 t->outputs[i] = ureg_DECL_output(ureg, 4604 TGSI_SEMANTIC_COLOR, 4605 outputSemanticIndex[i]); 4606 break; 4607 default: 4608 assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); 4609 ret = PIPE_ERROR_BAD_INPUT; 4610 goto out; 4611 } 4612 } 4613 } 4614 else if (procType == TGSI_PROCESSOR_GEOMETRY) { 4615 for (i = 0; i < numInputs; i++) { 4616 t->inputs[i] = ureg_DECL_gs_input(ureg, 4617 i, 4618 inputSemanticName[i], 4619 inputSemanticIndex[i]); 4620 } 4621 4622 for (i = 0; i < numOutputs; i++) { 4623 t->outputs[i] = ureg_DECL_output(ureg, 4624 outputSemanticName[i], 4625 outputSemanticIndex[i]); 4626 } 4627 } 4628 else { 4629 assert(procType == TGSI_PROCESSOR_VERTEX); 4630 4631 for (i = 0; i < numInputs; i++) { 4632 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 4633 } 4634 4635 for (i = 0; i < numOutputs; i++) { 4636 t->outputs[i] = ureg_DECL_output(ureg, 4637 outputSemanticName[i], 4638 outputSemanticIndex[i]); 4639 } 4640 if (passthrough_edgeflags) 4641 emit_edgeflags(t); 4642 } 4643 4644 /* Declare address register. 4645 */ 4646 if (program->num_address_regs > 0) { 4647 assert(program->num_address_regs == 1); 4648 t->address[0] = ureg_DECL_address(ureg); 4649 } 4650 4651 /* Declare misc input registers 4652 */ 4653 { 4654 GLbitfield sysInputs = proginfo->SystemValuesRead; 4655 unsigned numSys = 0; 4656 for (i = 0; sysInputs; i++) { 4657 if (sysInputs & (1 << i)) { 4658 unsigned semName = mesa_sysval_to_semantic[i]; 4659 t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0); 4660 if (semName == TGSI_SEMANTIC_INSTANCEID || 4661 semName == TGSI_SEMANTIC_VERTEXID) { 4662 /* From Gallium perspective, these system values are always 4663 * integer, and require native integer support. However, if 4664 * native integer is supported on the vertex stage but not the 4665 * pixel stage (e.g, i915g + draw), Mesa will generate IR that 4666 * assumes these system values are floats. To resolve the 4667 * inconsistency, we insert a U2F. 4668 */ 4669 struct st_context *st = st_context(ctx); 4670 struct pipe_screen *pscreen = st->pipe->screen; 4671 assert(procType == TGSI_PROCESSOR_VERTEX); 4672 assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS)); 4673 if (!ctx->Const.NativeIntegers) { 4674 struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg); 4675 ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]); 4676 t->systemValues[i] = ureg_scalar(ureg_src(temp), 0); 4677 } 4678 } 4679 numSys++; 4680 sysInputs &= ~(1 << i); 4681 } 4682 } 4683 } 4684 4685 if (program->indirect_addr_temps) { 4686 /* If temps are accessed with indirect addressing, declare temporaries 4687 * in sequential order. Else, we declare them on demand elsewhere. 4688 * (Note: the number of temporaries is equal to program->next_temp) 4689 */ 4690 for (i = 0; i < (unsigned)program->next_temp; i++) { 4691 /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ 4692 t->temps[i] = ureg_DECL_local_temporary(t->ureg); 4693 } 4694 } 4695 4696 /* Emit constants and uniforms. TGSI uses a single index space for these, 4697 * so we put all the translated regs in t->constants. 4698 */ 4699 if (proginfo->Parameters) { 4700 t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0])); 4701 if (t->constants == NULL) { 4702 ret = PIPE_ERROR_OUT_OF_MEMORY; 4703 goto out; 4704 } 4705 4706 for (i = 0; i < proginfo->Parameters->NumParameters; i++) { 4707 switch (proginfo->Parameters->Parameters[i].Type) { 4708 case PROGRAM_ENV_PARAM: 4709 case PROGRAM_LOCAL_PARAM: 4710 case PROGRAM_STATE_VAR: 4711 case PROGRAM_NAMED_PARAM: 4712 case PROGRAM_UNIFORM: 4713 t->constants[i] = ureg_DECL_constant(ureg, i); 4714 break; 4715 4716 /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect 4717 * addressing of the const buffer. 4718 * FIXME: Be smarter and recognize param arrays: 4719 * indirect addressing is only valid within the referenced 4720 * array. 4721 */ 4722 case PROGRAM_CONSTANT: 4723 if (program->indirect_addr_consts) 4724 t->constants[i] = ureg_DECL_constant(ureg, i); 4725 else 4726 t->constants[i] = emit_immediate(t, 4727 proginfo->Parameters->ParameterValues[i], 4728 proginfo->Parameters->Parameters[i].DataType, 4729 4); 4730 break; 4731 default: 4732 break; 4733 } 4734 } 4735 } 4736 4737 /* Emit immediate values. 4738 */ 4739 t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src)); 4740 if (t->immediates == NULL) { 4741 ret = PIPE_ERROR_OUT_OF_MEMORY; 4742 goto out; 4743 } 4744 i = 0; 4745 foreach_iter(exec_list_iterator, iter, program->immediates) { 4746 immediate_storage *imm = (immediate_storage *)iter.get(); 4747 assert(i < program->num_immediates); 4748 t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size); 4749 } 4750 assert(i == program->num_immediates); 4751 4752 /* texture samplers */ 4753 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 4754 if (program->samplers_used & (1 << i)) { 4755 t->samplers[i] = ureg_DECL_sampler(ureg, i); 4756 } 4757 } 4758 4759 /* Emit each instruction in turn: 4760 */ 4761 foreach_iter(exec_list_iterator, iter, program->instructions) { 4762 set_insn_start(t, ureg_get_instruction_number(ureg)); 4763 compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get(), 4764 clamp_color); 4765 } 4766 4767 /* Fix up all emitted labels: 4768 */ 4769 for (i = 0; i < t->labels_count; i++) { 4770 ureg_fixup_label(ureg, t->labels[i].token, 4771 t->insn[t->labels[i].branch_target]); 4772 } 4773 4774 if (program->shader_program) { 4775 /* This has to be done last. Any operation the can cause 4776 * prog->ParameterValues to get reallocated (e.g., anything that adds a 4777 * program constant) has to happen before creating this linkage. 4778 */ 4779 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 4780 if (program->shader_program->_LinkedShaders[i] == NULL) 4781 continue; 4782 4783 _mesa_associate_uniform_storage(ctx, program->shader_program, 4784 program->shader_program->_LinkedShaders[i]->Program->Parameters); 4785 } 4786 } 4787 4788 out: 4789 if (t) { 4790 FREE(t->insn); 4791 FREE(t->labels); 4792 FREE(t->constants); 4793 FREE(t->immediates); 4794 4795 if (t->error) { 4796 debug_printf("%s: translate error flag set\n", __FUNCTION__); 4797 } 4798 4799 FREE(t); 4800 } 4801 4802 return ret; 4803 } 4804 /* ----------------------------- End TGSI code ------------------------------ */ 4805 4806 /** 4807 * Convert a shader's GLSL IR into a Mesa gl_program, although without 4808 * generating Mesa IR. 4809 */ 4810 static struct gl_program * 4811 get_mesa_program(struct gl_context *ctx, 4812 struct gl_shader_program *shader_program, 4813 struct gl_shader *shader) 4814 { 4815 glsl_to_tgsi_visitor* v; 4816 struct gl_program *prog; 4817 GLenum target; 4818 const char *target_string; 4819 bool progress; 4820 struct gl_shader_compiler_options *options = 4821 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)]; 4822 4823 switch (shader->Type) { 4824 case GL_VERTEX_SHADER: 4825 target = GL_VERTEX_PROGRAM_ARB; 4826 target_string = "vertex"; 4827 break; 4828 case GL_FRAGMENT_SHADER: 4829 target = GL_FRAGMENT_PROGRAM_ARB; 4830 target_string = "fragment"; 4831 break; 4832 case GL_GEOMETRY_SHADER: 4833 target = GL_GEOMETRY_PROGRAM_NV; 4834 target_string = "geometry"; 4835 break; 4836 default: 4837 assert(!"should not be reached"); 4838 return NULL; 4839 } 4840 4841 validate_ir_tree(shader->ir); 4842 4843 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name); 4844 if (!prog) 4845 return NULL; 4846 prog->Parameters = _mesa_new_parameter_list(); 4847 v = new glsl_to_tgsi_visitor(); 4848 v->ctx = ctx; 4849 v->prog = prog; 4850 v->shader_program = shader_program; 4851 v->options = options; 4852 v->glsl_version = ctx->Const.GLSLVersion; 4853 v->native_integers = ctx->Const.NativeIntegers; 4854 4855 _mesa_generate_parameters_list_for_uniforms(shader_program, shader, 4856 prog->Parameters); 4857 4858 /* Remove reads from output registers. */ 4859 lower_output_reads(shader->ir); 4860 4861 /* Emit intermediate IR for main(). */ 4862 visit_exec_list(shader->ir, v); 4863 4864 /* Now emit bodies for any functions that were used. */ 4865 do { 4866 progress = GL_FALSE; 4867 4868 foreach_iter(exec_list_iterator, iter, v->function_signatures) { 4869 function_entry *entry = (function_entry *)iter.get(); 4870 4871 if (!entry->bgn_inst) { 4872 v->current_function = entry; 4873 4874 entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB); 4875 entry->bgn_inst->function = entry; 4876 4877 visit_exec_list(&entry->sig->body, v); 4878 4879 glsl_to_tgsi_instruction *last; 4880 last = (glsl_to_tgsi_instruction *)v->instructions.get_tail(); 4881 if (last->op != TGSI_OPCODE_RET) 4882 v->emit(NULL, TGSI_OPCODE_RET); 4883 4884 glsl_to_tgsi_instruction *end; 4885 end = v->emit(NULL, TGSI_OPCODE_ENDSUB); 4886 end->function = entry; 4887 4888 progress = GL_TRUE; 4889 } 4890 } 4891 } while (progress); 4892 4893 #if 0 4894 /* Print out some information (for debugging purposes) used by the 4895 * optimization passes. */ 4896 for (i=0; i < v->next_temp; i++) { 4897 int fr = v->get_first_temp_read(i); 4898 int fw = v->get_first_temp_write(i); 4899 int lr = v->get_last_temp_read(i); 4900 int lw = v->get_last_temp_write(i); 4901 4902 printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw); 4903 assert(fw <= fr); 4904 } 4905 #endif 4906 4907 /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ 4908 v->simplify_cmp(); 4909 v->copy_propagate(); 4910 while (v->eliminate_dead_code_advanced()); 4911 4912 /* FIXME: These passes to optimize temporary registers don't work when there 4913 * is indirect addressing of the temporary register space. We need proper 4914 * array support so that we don't have to give up these passes in every 4915 * shader that uses arrays. 4916 */ 4917 if (!v->indirect_addr_temps) { 4918 v->eliminate_dead_code(); 4919 v->merge_registers(); 4920 v->renumber_registers(); 4921 } 4922 4923 /* Write the END instruction. */ 4924 v->emit(NULL, TGSI_OPCODE_END); 4925 4926 if (ctx->Shader.Flags & GLSL_DUMP) { 4927 printf("\n"); 4928 printf("GLSL IR for linked %s program %d:\n", target_string, 4929 shader_program->Name); 4930 _mesa_print_ir(shader->ir, NULL); 4931 printf("\n"); 4932 printf("\n"); 4933 fflush(stdout); 4934 } 4935 4936 prog->Instructions = NULL; 4937 prog->NumInstructions = 0; 4938 4939 do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER); 4940 count_resources(v, prog); 4941 4942 _mesa_reference_program(ctx, &shader->Program, prog); 4943 4944 /* This has to be done last. Any operation the can cause 4945 * prog->ParameterValues to get reallocated (e.g., anything that adds a 4946 * program constant) has to happen before creating this linkage. 4947 */ 4948 _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters); 4949 if (!shader_program->LinkStatus) { 4950 return NULL; 4951 } 4952 4953 struct st_vertex_program *stvp; 4954 struct st_fragment_program *stfp; 4955 struct st_geometry_program *stgp; 4956 4957 switch (shader->Type) { 4958 case GL_VERTEX_SHADER: 4959 stvp = (struct st_vertex_program *)prog; 4960 stvp->glsl_to_tgsi = v; 4961 break; 4962 case GL_FRAGMENT_SHADER: 4963 stfp = (struct st_fragment_program *)prog; 4964 stfp->glsl_to_tgsi = v; 4965 break; 4966 case GL_GEOMETRY_SHADER: 4967 stgp = (struct st_geometry_program *)prog; 4968 stgp->glsl_to_tgsi = v; 4969 break; 4970 default: 4971 assert(!"should not be reached"); 4972 return NULL; 4973 } 4974 4975 return prog; 4976 } 4977 4978 extern "C" { 4979 4980 struct gl_shader * 4981 st_new_shader(struct gl_context *ctx, GLuint name, GLuint type) 4982 { 4983 struct gl_shader *shader; 4984 assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER || 4985 type == GL_GEOMETRY_SHADER_ARB); 4986 shader = rzalloc(NULL, struct gl_shader); 4987 if (shader) { 4988 shader->Type = type; 4989 shader->Name = name; 4990 _mesa_init_shader(ctx, shader); 4991 } 4992 return shader; 4993 } 4994 4995 struct gl_shader_program * 4996 st_new_shader_program(struct gl_context *ctx, GLuint name) 4997 { 4998 struct gl_shader_program *shProg; 4999 shProg = rzalloc(NULL, struct gl_shader_program); 5000 if (shProg) { 5001 shProg->Name = name; 5002 _mesa_init_shader_program(ctx, shProg); 5003 } 5004 return shProg; 5005 } 5006 5007 /** 5008 * Link a shader. 5009 * Called via ctx->Driver.LinkShader() 5010 * This actually involves converting GLSL IR into an intermediate TGSI-like IR 5011 * with code lowering and other optimizations. 5012 */ 5013 GLboolean 5014 st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 5015 { 5016 assert(prog->LinkStatus); 5017 5018 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 5019 if (prog->_LinkedShaders[i] == NULL) 5020 continue; 5021 5022 bool progress; 5023 exec_list *ir = prog->_LinkedShaders[i]->ir; 5024 const struct gl_shader_compiler_options *options = 5025 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)]; 5026 5027 do { 5028 unsigned what_to_lower = MOD_TO_FRACT | DIV_TO_MUL_RCP | 5029 EXP_TO_EXP2 | LOG_TO_LOG2; 5030 if (options->EmitNoPow) 5031 what_to_lower |= POW_TO_EXP2; 5032 if (!ctx->Const.NativeIntegers) 5033 what_to_lower |= INT_DIV_TO_MUL_RCP; 5034 5035 progress = false; 5036 5037 /* Lowering */ 5038 do_mat_op_to_vec(ir); 5039 lower_instructions(ir, what_to_lower); 5040 5041 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress; 5042 5043 progress = do_common_optimization(ir, true, true, 5044 options->MaxUnrollIterations) 5045 || progress; 5046 5047 progress = lower_quadop_vector(ir, false) || progress; 5048 5049 if (options->MaxIfDepth == 0) 5050 progress = lower_discard(ir) || progress; 5051 5052 progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress; 5053 5054 if (options->EmitNoNoise) 5055 progress = lower_noise(ir) || progress; 5056 5057 /* If there are forms of indirect addressing that the driver 5058 * cannot handle, perform the lowering pass. 5059 */ 5060 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput 5061 || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) 5062 progress = 5063 lower_variable_index_to_cond_assign(ir, 5064 options->EmitNoIndirectInput, 5065 options->EmitNoIndirectOutput, 5066 options->EmitNoIndirectTemp, 5067 options->EmitNoIndirectUniform) 5068 || progress; 5069 5070 progress = do_vec_index_to_cond_assign(ir) || progress; 5071 } while (progress); 5072 5073 validate_ir_tree(ir); 5074 } 5075 5076 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) { 5077 struct gl_program *linked_prog; 5078 5079 if (prog->_LinkedShaders[i] == NULL) 5080 continue; 5081 5082 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); 5083 5084 if (linked_prog) { 5085 static const GLenum targets[] = { 5086 GL_VERTEX_PROGRAM_ARB, 5087 GL_FRAGMENT_PROGRAM_ARB, 5088 GL_GEOMETRY_PROGRAM_NV 5089 }; 5090 5091 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, 5092 linked_prog); 5093 if (!ctx->Driver.ProgramStringNotify(ctx, targets[i], linked_prog)) { 5094 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, 5095 NULL); 5096 _mesa_reference_program(ctx, &linked_prog, NULL); 5097 return GL_FALSE; 5098 } 5099 } 5100 5101 _mesa_reference_program(ctx, &linked_prog, NULL); 5102 } 5103 5104 return GL_TRUE; 5105 } 5106 5107 void 5108 st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi, 5109 const GLuint outputMapping[], 5110 struct pipe_stream_output_info *so) 5111 { 5112 unsigned i; 5113 struct gl_transform_feedback_info *info = 5114 &glsl_to_tgsi->shader_program->LinkedTransformFeedback; 5115 5116 for (i = 0; i < info->NumOutputs; i++) { 5117 so->output[i].register_index = 5118 outputMapping[info->Outputs[i].OutputRegister]; 5119 so->output[i].start_component = info->Outputs[i].ComponentOffset; 5120 so->output[i].num_components = info->Outputs[i].NumComponents; 5121 so->output[i].output_buffer = info->Outputs[i].OutputBuffer; 5122 so->output[i].dst_offset = info->Outputs[i].DstOffset; 5123 } 5124 5125 for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { 5126 so->stride[i] = info->BufferStride[i]; 5127 } 5128 so->num_outputs = info->NumOutputs; 5129 } 5130 5131 } /* extern "C" */ 5132