1 /* 2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. 3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved. 4 * Copyright 2010 Intel Corporation 5 * Copyright 2011 Bryan Cain 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 * DEALINGS IN THE SOFTWARE. 25 */ 26 27 /** 28 * \file glsl_to_tgsi.cpp 29 * 30 * Translate GLSL IR to TGSI. 31 */ 32 33 #include "st_glsl_to_tgsi.h" 34 35 #include "compiler/glsl/glsl_parser_extras.h" 36 #include "compiler/glsl/ir_optimization.h" 37 #include "compiler/glsl/program.h" 38 39 #include "main/errors.h" 40 #include "main/shaderobj.h" 41 #include "main/uniforms.h" 42 #include "main/shaderapi.h" 43 #include "main/shaderimage.h" 44 #include "program/prog_instruction.h" 45 46 #include "pipe/p_context.h" 47 #include "pipe/p_screen.h" 48 #include "tgsi/tgsi_ureg.h" 49 #include "tgsi/tgsi_info.h" 50 #include "util/u_math.h" 51 #include "util/u_memory.h" 52 #include "st_glsl_types.h" 53 #include "st_program.h" 54 #include "st_mesa_to_tgsi.h" 55 #include "st_format.h" 56 #include "st_nir.h" 57 #include "st_shader_cache.h" 58 #include "st_glsl_to_tgsi_temprename.h" 59 60 #include "util/hash_table.h" 61 #include <algorithm> 62 63 #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \ 64 (1 << PROGRAM_CONSTANT) | \ 65 (1 << PROGRAM_UNIFORM)) 66 67 #define MAX_GLSL_TEXTURE_OFFSET 4 68 69 static unsigned is_precise(const ir_variable *ir) 70 { 71 if (!ir) 72 return 0; 73 return ir->data.precise || ir->data.invariant; 74 } 75 76 class variable_storage { 77 DECLARE_RZALLOC_CXX_OPERATORS(variable_storage) 78 79 public: 80 variable_storage(ir_variable *var, gl_register_file file, int index, 81 unsigned array_id = 0) 82 : file(file), index(index), component(0), var(var), array_id(array_id) 83 { 84 assert(file != PROGRAM_ARRAY || array_id != 0); 85 } 86 87 gl_register_file file; 88 int index; 89 90 /* Explicit component location. This is given in terms of the GLSL-style 91 * swizzles where each double is a single component, i.e. for 64-bit types 92 * it can only be 0 or 1. 93 */ 94 int component; 95 ir_variable *var; /* variable that maps to this, if any */ 96 unsigned array_id; 97 }; 98 99 class immediate_storage : public exec_node { 100 public: 101 immediate_storage(gl_constant_value *values, int size32, int type) 102 { 103 memcpy(this->values, values, size32 * sizeof(gl_constant_value)); 104 this->size32 = size32; 105 this->type = type; 106 } 107 108 /* doubles are stored across 2 gl_constant_values */ 109 gl_constant_value values[4]; 110 int size32; /**< Number of 32-bit components (1-4) */ 111 int type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ 112 }; 113 114 static const st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); 115 static const st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); 116 117 struct inout_decl { 118 unsigned mesa_index; 119 unsigned array_id; /* TGSI ArrayID; 1-based: 0 means not an array */ 120 unsigned size; 121 unsigned interp_loc; 122 unsigned gs_out_streams; 123 enum glsl_interp_mode interp; 124 enum glsl_base_type base_type; 125 ubyte usage_mask; /* GLSL-style usage-mask, i.e. single bit per double */ 126 }; 127 128 static struct inout_decl * 129 find_inout_array(struct inout_decl *decls, unsigned count, unsigned array_id) 130 { 131 assert(array_id != 0); 132 133 for (unsigned i = 0; i < count; i++) { 134 struct inout_decl *decl = &decls[i]; 135 136 if (array_id == decl->array_id) { 137 return decl; 138 } 139 } 140 141 return NULL; 142 } 143 144 static enum glsl_base_type 145 find_array_type(struct inout_decl *decls, unsigned count, unsigned array_id) 146 { 147 if (!array_id) 148 return GLSL_TYPE_ERROR; 149 struct inout_decl *decl = find_inout_array(decls, count, array_id); 150 if (decl) 151 return decl->base_type; 152 return GLSL_TYPE_ERROR; 153 } 154 155 struct hwatomic_decl { 156 unsigned location; 157 unsigned binding; 158 unsigned size; 159 unsigned array_id; 160 }; 161 162 struct glsl_to_tgsi_visitor : public ir_visitor { 163 public: 164 glsl_to_tgsi_visitor(); 165 ~glsl_to_tgsi_visitor(); 166 167 struct gl_context *ctx; 168 struct gl_program *prog; 169 struct gl_shader_program *shader_program; 170 struct gl_linked_shader *shader; 171 struct gl_shader_compiler_options *options; 172 173 int next_temp; 174 175 unsigned *array_sizes; 176 unsigned max_num_arrays; 177 unsigned next_array; 178 179 struct inout_decl inputs[4 * PIPE_MAX_SHADER_INPUTS]; 180 unsigned num_inputs; 181 unsigned num_input_arrays; 182 struct inout_decl outputs[4 * PIPE_MAX_SHADER_OUTPUTS]; 183 unsigned num_outputs; 184 unsigned num_output_arrays; 185 186 struct hwatomic_decl atomic_info[PIPE_MAX_HW_ATOMIC_BUFFERS]; 187 unsigned num_atomics; 188 unsigned num_atomic_arrays; 189 int num_address_regs; 190 uint32_t samplers_used; 191 glsl_base_type sampler_types[PIPE_MAX_SAMPLERS]; 192 enum tgsi_texture_type sampler_targets[PIPE_MAX_SAMPLERS]; 193 int images_used; 194 int image_targets[PIPE_MAX_SHADER_IMAGES]; 195 enum pipe_format image_formats[PIPE_MAX_SHADER_IMAGES]; 196 bool indirect_addr_consts; 197 int wpos_transform_const; 198 199 bool native_integers; 200 bool have_sqrt; 201 bool have_fma; 202 bool use_shared_memory; 203 bool has_tex_txf_lz; 204 bool precise; 205 bool need_uarl; 206 207 variable_storage *find_variable_storage(ir_variable *var); 208 209 int add_constant(gl_register_file file, gl_constant_value values[8], 210 int size, int datatype, uint16_t *swizzle_out); 211 212 st_src_reg get_temp(const glsl_type *type); 213 void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); 214 215 st_src_reg st_src_reg_for_double(double val); 216 st_src_reg st_src_reg_for_float(float val); 217 st_src_reg st_src_reg_for_int(int val); 218 st_src_reg st_src_reg_for_int64(int64_t val); 219 st_src_reg st_src_reg_for_type(enum glsl_base_type type, int val); 220 221 /** 222 * \name Visit methods 223 * 224 * As typical for the visitor pattern, there must be one \c visit method for 225 * each concrete subclass of \c ir_instruction. Virtual base classes within 226 * the hierarchy should not have \c visit methods. 227 */ 228 /*@{*/ 229 virtual void visit(ir_variable *); 230 virtual void visit(ir_loop *); 231 virtual void visit(ir_loop_jump *); 232 virtual void visit(ir_function_signature *); 233 virtual void visit(ir_function *); 234 virtual void visit(ir_expression *); 235 virtual void visit(ir_swizzle *); 236 virtual void visit(ir_dereference_variable *); 237 virtual void visit(ir_dereference_array *); 238 virtual void visit(ir_dereference_record *); 239 virtual void visit(ir_assignment *); 240 virtual void visit(ir_constant *); 241 virtual void visit(ir_call *); 242 virtual void visit(ir_return *); 243 virtual void visit(ir_discard *); 244 virtual void visit(ir_texture *); 245 virtual void visit(ir_if *); 246 virtual void visit(ir_emit_vertex *); 247 virtual void visit(ir_end_primitive *); 248 virtual void visit(ir_barrier *); 249 /*@}*/ 250 251 void visit_expression(ir_expression *, st_src_reg *) ATTRIBUTE_NOINLINE; 252 253 void visit_atomic_counter_intrinsic(ir_call *); 254 void visit_ssbo_intrinsic(ir_call *); 255 void visit_membar_intrinsic(ir_call *); 256 void visit_shared_intrinsic(ir_call *); 257 void visit_image_intrinsic(ir_call *); 258 void visit_generic_intrinsic(ir_call *, unsigned op); 259 260 st_src_reg result; 261 262 /** List of variable_storage */ 263 struct hash_table *variables; 264 265 /** List of immediate_storage */ 266 exec_list immediates; 267 unsigned num_immediates; 268 269 /** List of glsl_to_tgsi_instruction */ 270 exec_list instructions; 271 272 glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op, 273 st_dst_reg dst = undef_dst, 274 st_src_reg src0 = undef_src, 275 st_src_reg src1 = undef_src, 276 st_src_reg src2 = undef_src, 277 st_src_reg src3 = undef_src); 278 279 glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op, 280 st_dst_reg dst, st_dst_reg dst1, 281 st_src_reg src0 = undef_src, 282 st_src_reg src1 = undef_src, 283 st_src_reg src2 = undef_src, 284 st_src_reg src3 = undef_src); 285 286 unsigned get_opcode(unsigned op, 287 st_dst_reg dst, 288 st_src_reg src0, st_src_reg src1); 289 290 /** 291 * Emit the correct dot-product instruction for the type of arguments 292 */ 293 glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir, 294 st_dst_reg dst, 295 st_src_reg src0, 296 st_src_reg src1, 297 unsigned elements); 298 299 void emit_scalar(ir_instruction *ir, unsigned op, 300 st_dst_reg dst, st_src_reg src0); 301 302 void emit_scalar(ir_instruction *ir, unsigned op, 303 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 304 305 void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); 306 307 void get_deref_offsets(ir_dereference *ir, 308 unsigned *array_size, 309 unsigned *base, 310 uint16_t *index, 311 st_src_reg *reladdr, 312 bool opaque); 313 void calc_deref_offsets(ir_dereference *tail, 314 unsigned *array_elements, 315 uint16_t *index, 316 st_src_reg *indirect, 317 unsigned *location); 318 st_src_reg canonicalize_gather_offset(st_src_reg offset); 319 320 bool try_emit_mad(ir_expression *ir, 321 int mul_operand); 322 bool try_emit_mad_for_and_not(ir_expression *ir, 323 int mul_operand); 324 325 void emit_swz(ir_expression *ir); 326 327 bool process_move_condition(ir_rvalue *ir); 328 329 void simplify_cmp(void); 330 331 void rename_temp_registers(struct rename_reg_pair *renames); 332 void get_first_temp_read(int *first_reads); 333 void get_first_temp_write(int *first_writes); 334 void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes); 335 void get_last_temp_write(int *last_writes); 336 337 void copy_propagate(void); 338 int eliminate_dead_code(void); 339 340 void merge_two_dsts(void); 341 void merge_registers(void); 342 void renumber_registers(void); 343 344 void emit_block_mov(ir_assignment *ir, const struct glsl_type *type, 345 st_dst_reg *l, st_src_reg *r, 346 st_src_reg *cond, bool cond_swap); 347 348 void *mem_ctx; 349 }; 350 351 static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 0); 352 static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 1); 353 static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 2); 354 355 static void 356 fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3); 357 358 static void 359 fail_link(struct gl_shader_program *prog, const char *fmt, ...) 360 { 361 va_list args; 362 va_start(args, fmt); 363 ralloc_vasprintf_append(&prog->data->InfoLog, fmt, args); 364 va_end(args); 365 366 prog->data->LinkStatus = linking_failure; 367 } 368 369 int 370 swizzle_for_size(int size) 371 { 372 static const int size_swizzles[4] = { 373 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), 374 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), 375 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), 376 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), 377 }; 378 379 assert((size >= 1) && (size <= 4)); 380 return size_swizzles[size - 1]; 381 } 382 383 384 glsl_to_tgsi_instruction * 385 glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, 386 st_dst_reg dst, st_dst_reg dst1, 387 st_src_reg src0, st_src_reg src1, 388 st_src_reg src2, st_src_reg src3) 389 { 390 glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); 391 int num_reladdr = 0, i, j; 392 bool dst_is_64bit[2]; 393 394 op = get_opcode(op, dst, src0, src1); 395 396 /* If we have to do relative addressing, we want to load the ARL 397 * reg directly for one of the regs, and preload the other reladdr 398 * sources into temps. 399 */ 400 num_reladdr += dst.reladdr != NULL || dst.reladdr2; 401 assert(!dst1.reladdr); /* should be lowered in earlier passes */ 402 num_reladdr += src0.reladdr != NULL || src0.reladdr2 != NULL; 403 num_reladdr += src1.reladdr != NULL || src1.reladdr2 != NULL; 404 num_reladdr += src2.reladdr != NULL || src2.reladdr2 != NULL; 405 num_reladdr += src3.reladdr != NULL || src3.reladdr2 != NULL; 406 407 reladdr_to_temp(ir, &src3, &num_reladdr); 408 reladdr_to_temp(ir, &src2, &num_reladdr); 409 reladdr_to_temp(ir, &src1, &num_reladdr); 410 reladdr_to_temp(ir, &src0, &num_reladdr); 411 412 if (dst.reladdr || dst.reladdr2) { 413 if (dst.reladdr) 414 emit_arl(ir, address_reg, *dst.reladdr); 415 if (dst.reladdr2) 416 emit_arl(ir, address_reg2, *dst.reladdr2); 417 num_reladdr--; 418 } 419 420 assert(num_reladdr == 0); 421 422 /* inst->op has only 8 bits. */ 423 STATIC_ASSERT(TGSI_OPCODE_LAST <= 255); 424 425 inst->op = op; 426 inst->precise = this->precise; 427 inst->info = tgsi_get_opcode_info(op); 428 inst->dst[0] = dst; 429 inst->dst[1] = dst1; 430 inst->src[0] = src0; 431 inst->src[1] = src1; 432 inst->src[2] = src2; 433 inst->src[3] = src3; 434 inst->is_64bit_expanded = false; 435 inst->ir = ir; 436 inst->dead_mask = 0; 437 inst->tex_offsets = NULL; 438 inst->tex_offset_num_offset = 0; 439 inst->saturate = 0; 440 inst->tex_shadow = 0; 441 /* default to float, for paths where this is not initialized 442 * (since 0==UINT which is likely wrong): 443 */ 444 inst->tex_type = GLSL_TYPE_FLOAT; 445 446 /* Update indirect addressing status used by TGSI */ 447 if (dst.reladdr || dst.reladdr2) { 448 switch(dst.file) { 449 case PROGRAM_STATE_VAR: 450 case PROGRAM_CONSTANT: 451 case PROGRAM_UNIFORM: 452 this->indirect_addr_consts = true; 453 break; 454 case PROGRAM_IMMEDIATE: 455 assert(!"immediates should not have indirect addressing"); 456 break; 457 default: 458 break; 459 } 460 } 461 else { 462 for (i = 0; i < 4; i++) { 463 if(inst->src[i].reladdr) { 464 switch(inst->src[i].file) { 465 case PROGRAM_STATE_VAR: 466 case PROGRAM_CONSTANT: 467 case PROGRAM_UNIFORM: 468 this->indirect_addr_consts = true; 469 break; 470 case PROGRAM_IMMEDIATE: 471 assert(!"immediates should not have indirect addressing"); 472 break; 473 default: 474 break; 475 } 476 } 477 } 478 } 479 480 /* 481 * This section contains the double processing. 482 * GLSL just represents doubles as single channel values, 483 * however most HW and TGSI represent doubles as pairs of register channels. 484 * 485 * so we have to fixup destination writemask/index and src swizzle/indexes. 486 * dest writemasks need to translate from single channel write mask 487 * to a dual-channel writemask, but also need to modify the index, 488 * if we are touching the Z,W fields in the pre-translated writemask. 489 * 490 * src channels have similiar index modifications along with swizzle 491 * changes to we pick the XY, ZW pairs from the correct index. 492 * 493 * GLSL [0].x -> TGSI [0].xy 494 * GLSL [0].y -> TGSI [0].zw 495 * GLSL [0].z -> TGSI [1].xy 496 * GLSL [0].w -> TGSI [1].zw 497 */ 498 for (j = 0; j < 2; j++) { 499 dst_is_64bit[j] = glsl_base_type_is_64bit(inst->dst[j].type); 500 if (!dst_is_64bit[j] && inst->dst[j].file == PROGRAM_OUTPUT && inst->dst[j].type == GLSL_TYPE_ARRAY) { 501 enum glsl_base_type type = find_array_type(this->outputs, this->num_outputs, inst->dst[j].array_id); 502 if (glsl_base_type_is_64bit(type)) 503 dst_is_64bit[j] = true; 504 } 505 } 506 507 if (dst_is_64bit[0] || dst_is_64bit[1] || 508 glsl_base_type_is_64bit(inst->src[0].type)) { 509 glsl_to_tgsi_instruction *dinst = NULL; 510 int initial_src_swz[4], initial_src_idx[4]; 511 int initial_dst_idx[2], initial_dst_writemask[2]; 512 /* select the writemask for dst0 or dst1 */ 513 unsigned writemask = inst->dst[1].file == PROGRAM_UNDEFINED ? inst->dst[0].writemask : inst->dst[1].writemask; 514 515 /* copy out the writemask, index and swizzles for all src/dsts. */ 516 for (j = 0; j < 2; j++) { 517 initial_dst_writemask[j] = inst->dst[j].writemask; 518 initial_dst_idx[j] = inst->dst[j].index; 519 } 520 521 for (j = 0; j < 4; j++) { 522 initial_src_swz[j] = inst->src[j].swizzle; 523 initial_src_idx[j] = inst->src[j].index; 524 } 525 526 /* 527 * scan all the components in the dst writemask 528 * generate an instruction for each of them if required. 529 */ 530 st_src_reg addr; 531 while (writemask) { 532 533 int i = u_bit_scan(&writemask); 534 535 /* before emitting the instruction, see if we have to adjust load / store 536 * address */ 537 if (i > 1 && (inst->op == TGSI_OPCODE_LOAD || inst->op == TGSI_OPCODE_STORE) && 538 addr.file == PROGRAM_UNDEFINED) { 539 /* We have to advance the buffer address by 16 */ 540 addr = get_temp(glsl_type::uint_type); 541 emit_asm(ir, TGSI_OPCODE_UADD, st_dst_reg(addr), 542 inst->src[0], st_src_reg_for_int(16)); 543 } 544 545 /* first time use previous instruction */ 546 if (dinst == NULL) { 547 dinst = inst; 548 } else { 549 /* create a new instructions for subsequent attempts */ 550 dinst = new(mem_ctx) glsl_to_tgsi_instruction(); 551 *dinst = *inst; 552 dinst->next = NULL; 553 dinst->prev = NULL; 554 } 555 this->instructions.push_tail(dinst); 556 dinst->is_64bit_expanded = true; 557 558 /* modify the destination if we are splitting */ 559 for (j = 0; j < 2; j++) { 560 if (dst_is_64bit[j]) { 561 dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY; 562 dinst->dst[j].index = initial_dst_idx[j]; 563 if (i > 1) { 564 if (dinst->op == TGSI_OPCODE_LOAD || dinst->op == TGSI_OPCODE_STORE) 565 dinst->src[0] = addr; 566 if (dinst->op != TGSI_OPCODE_STORE) 567 dinst->dst[j].index++; 568 } 569 } else { 570 /* if we aren't writing to a double, just get the bit of the initial writemask 571 for this channel */ 572 dinst->dst[j].writemask = initial_dst_writemask[j] & (1 << i); 573 } 574 } 575 576 /* modify the src registers */ 577 for (j = 0; j < 4; j++) { 578 int swz = GET_SWZ(initial_src_swz[j], i); 579 580 if (glsl_base_type_is_64bit(dinst->src[j].type)) { 581 dinst->src[j].index = initial_src_idx[j]; 582 if (swz > 1) { 583 dinst->src[j].double_reg2 = true; 584 dinst->src[j].index++; 585 } 586 587 if (swz & 1) 588 dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W); 589 else 590 dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y); 591 592 } else { 593 /* some opcodes are special case in what they use as sources 594 - [FUI]2D/[UI]2I64 is a float/[u]int src0, (D)LDEXP is integer src1 */ 595 if (op == TGSI_OPCODE_F2D || op == TGSI_OPCODE_U2D || op == TGSI_OPCODE_I2D || 596 op == TGSI_OPCODE_I2I64 || op == TGSI_OPCODE_U2I64 || 597 op == TGSI_OPCODE_DLDEXP || op == TGSI_OPCODE_LDEXP || 598 (op == TGSI_OPCODE_UCMP && dst_is_64bit[0])) { 599 dinst->src[j].swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz); 600 } 601 } 602 } 603 } 604 inst = dinst; 605 } else { 606 this->instructions.push_tail(inst); 607 } 608 609 610 return inst; 611 } 612 613 glsl_to_tgsi_instruction * 614 glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op, 615 st_dst_reg dst, 616 st_src_reg src0, st_src_reg src1, 617 st_src_reg src2, st_src_reg src3) 618 { 619 return emit_asm(ir, op, dst, undef_dst, src0, src1, src2, src3); 620 } 621 622 /** 623 * Determines whether to use an integer, unsigned integer, or float opcode 624 * based on the operands and input opcode, then emits the result. 625 */ 626 unsigned 627 glsl_to_tgsi_visitor::get_opcode(unsigned op, 628 st_dst_reg dst, 629 st_src_reg src0, st_src_reg src1) 630 { 631 enum glsl_base_type type = GLSL_TYPE_FLOAT; 632 633 if (op == TGSI_OPCODE_MOV) 634 return op; 635 636 assert(src0.type != GLSL_TYPE_ARRAY); 637 assert(src0.type != GLSL_TYPE_STRUCT); 638 assert(src1.type != GLSL_TYPE_ARRAY); 639 assert(src1.type != GLSL_TYPE_STRUCT); 640 641 if (is_resource_instruction(op)) 642 type = src1.type; 643 else if (src0.type == GLSL_TYPE_INT64 || src1.type == GLSL_TYPE_INT64) 644 type = GLSL_TYPE_INT64; 645 else if (src0.type == GLSL_TYPE_UINT64 || src1.type == GLSL_TYPE_UINT64) 646 type = GLSL_TYPE_UINT64; 647 else if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE) 648 type = GLSL_TYPE_DOUBLE; 649 else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) 650 type = GLSL_TYPE_FLOAT; 651 else if (native_integers) 652 type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type; 653 654 #define case7(c, f, i, u, d, i64, ui64) \ 655 case TGSI_OPCODE_##c: \ 656 if (type == GLSL_TYPE_UINT64) \ 657 op = TGSI_OPCODE_##ui64; \ 658 else if (type == GLSL_TYPE_INT64) \ 659 op = TGSI_OPCODE_##i64; \ 660 else if (type == GLSL_TYPE_DOUBLE) \ 661 op = TGSI_OPCODE_##d; \ 662 else if (type == GLSL_TYPE_INT) \ 663 op = TGSI_OPCODE_##i; \ 664 else if (type == GLSL_TYPE_UINT) \ 665 op = TGSI_OPCODE_##u; \ 666 else \ 667 op = TGSI_OPCODE_##f; \ 668 break; 669 670 #define casecomp(c, f, i, u, d, i64, ui64) \ 671 case TGSI_OPCODE_##c: \ 672 if (type == GLSL_TYPE_INT64) \ 673 op = TGSI_OPCODE_##i64; \ 674 else if (type == GLSL_TYPE_UINT64) \ 675 op = TGSI_OPCODE_##ui64; \ 676 else if (type == GLSL_TYPE_DOUBLE) \ 677 op = TGSI_OPCODE_##d; \ 678 else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE) \ 679 op = TGSI_OPCODE_##i; \ 680 else if (type == GLSL_TYPE_UINT) \ 681 op = TGSI_OPCODE_##u; \ 682 else if (native_integers) \ 683 op = TGSI_OPCODE_##f; \ 684 else \ 685 op = TGSI_OPCODE_##c; \ 686 break; 687 688 switch(op) { 689 /* Some instructions are initially selected without considering the type. 690 * This fixes the type: 691 * 692 * INIT FLOAT SINT UINT DOUBLE SINT64 UINT64 693 */ 694 case7(ADD, ADD, UADD, UADD, DADD, U64ADD, U64ADD); 695 case7(CEIL, CEIL, LAST, LAST, DCEIL, LAST, LAST); 696 case7(DIV, DIV, IDIV, UDIV, DDIV, I64DIV, U64DIV); 697 case7(FMA, FMA, UMAD, UMAD, DFMA, LAST, LAST); 698 case7(FLR, FLR, LAST, LAST, DFLR, LAST, LAST); 699 case7(FRC, FRC, LAST, LAST, DFRAC, LAST, LAST); 700 case7(MUL, MUL, UMUL, UMUL, DMUL, U64MUL, U64MUL); 701 case7(MAD, MAD, UMAD, UMAD, DMAD, LAST, LAST); 702 case7(MAX, MAX, IMAX, UMAX, DMAX, I64MAX, U64MAX); 703 case7(MIN, MIN, IMIN, UMIN, DMIN, I64MIN, U64MIN); 704 case7(RCP, RCP, LAST, LAST, DRCP, LAST, LAST); 705 case7(ROUND, ROUND,LAST, LAST, DROUND, LAST, LAST); 706 case7(RSQ, RSQ, LAST, LAST, DRSQ, LAST, LAST); 707 case7(SQRT, SQRT, LAST, LAST, DSQRT, LAST, LAST); 708 case7(SSG, SSG, ISSG, ISSG, DSSG, I64SSG, I64SSG); 709 case7(TRUNC, TRUNC,LAST, LAST, DTRUNC, LAST, LAST); 710 711 case7(MOD, LAST, MOD, UMOD, LAST, I64MOD, U64MOD); 712 case7(SHL, LAST, SHL, SHL, LAST, U64SHL, U64SHL); 713 case7(IBFE, LAST, IBFE, UBFE, LAST, LAST, LAST); 714 case7(IMSB, LAST, IMSB, UMSB, LAST, LAST, LAST); 715 case7(IMUL_HI, LAST, IMUL_HI, UMUL_HI, LAST, LAST, LAST); 716 case7(ISHR, LAST, ISHR, USHR, LAST, I64SHR, U64SHR); 717 case7(ATOMIMAX,LAST, ATOMIMAX,ATOMUMAX,LAST, LAST, LAST); 718 case7(ATOMIMIN,LAST, ATOMIMIN,ATOMUMIN,LAST, LAST, LAST); 719 720 casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ, U64SEQ, U64SEQ); 721 casecomp(SNE, FSNE, USNE, USNE, DSNE, U64SNE, U64SNE); 722 casecomp(SGE, FSGE, ISGE, USGE, DSGE, I64SGE, U64SGE); 723 casecomp(SLT, FSLT, ISLT, USLT, DSLT, I64SLT, U64SLT); 724 725 default: break; 726 } 727 728 assert(op != TGSI_OPCODE_LAST); 729 return op; 730 } 731 732 glsl_to_tgsi_instruction * 733 glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, 734 st_dst_reg dst, st_src_reg src0, st_src_reg src1, 735 unsigned elements) 736 { 737 static const unsigned dot_opcodes[] = { 738 TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 739 }; 740 741 return emit_asm(ir, dot_opcodes[elements - 2], dst, src0, src1); 742 } 743 744 /** 745 * Emits TGSI scalar opcodes to produce unique answers across channels. 746 * 747 * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X 748 * channel determines the result across all channels. So to do a vec4 749 * of this operation, we want to emit a scalar per source channel used 750 * to produce dest channels. 751 */ 752 void 753 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 754 st_dst_reg dst, 755 st_src_reg orig_src0, st_src_reg orig_src1) 756 { 757 int i, j; 758 int done_mask = ~dst.writemask; 759 760 /* TGSI RCP is a scalar operation splatting results to all channels, 761 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our 762 * dst channels. 763 */ 764 for (i = 0; i < 4; i++) { 765 GLuint this_mask = (1 << i); 766 st_src_reg src0 = orig_src0; 767 st_src_reg src1 = orig_src1; 768 769 if (done_mask & this_mask) 770 continue; 771 772 GLuint src0_swiz = GET_SWZ(src0.swizzle, i); 773 GLuint src1_swiz = GET_SWZ(src1.swizzle, i); 774 for (j = i + 1; j < 4; j++) { 775 /* If there is another enabled component in the destination that is 776 * derived from the same inputs, generate its value on this pass as 777 * well. 778 */ 779 if (!(done_mask & (1 << j)) && 780 GET_SWZ(src0.swizzle, j) == src0_swiz && 781 GET_SWZ(src1.swizzle, j) == src1_swiz) { 782 this_mask |= (1 << j); 783 } 784 } 785 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 786 src0_swiz, src0_swiz); 787 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, 788 src1_swiz, src1_swiz); 789 790 dst.writemask = this_mask; 791 emit_asm(ir, op, dst, src0, src1); 792 done_mask |= this_mask; 793 } 794 } 795 796 void 797 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op, 798 st_dst_reg dst, st_src_reg src0) 799 { 800 st_src_reg undef = undef_src; 801 802 undef.swizzle = SWIZZLE_XXXX; 803 804 emit_scalar(ir, op, dst, src0, undef); 805 } 806 807 void 808 glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, 809 st_dst_reg dst, st_src_reg src0) 810 { 811 int op = TGSI_OPCODE_ARL; 812 813 if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT) { 814 if (!this->need_uarl && src0.is_legal_tgsi_address_operand()) 815 return; 816 817 op = TGSI_OPCODE_UARL; 818 } 819 820 assert(dst.file == PROGRAM_ADDRESS); 821 if (dst.index >= this->num_address_regs) 822 this->num_address_regs = dst.index + 1; 823 824 emit_asm(NULL, op, dst, src0); 825 } 826 827 int 828 glsl_to_tgsi_visitor::add_constant(gl_register_file file, 829 gl_constant_value values[8], int size, int datatype, 830 uint16_t *swizzle_out) 831 { 832 if (file == PROGRAM_CONSTANT) { 833 GLuint swizzle = swizzle_out ? *swizzle_out : 0; 834 int result = _mesa_add_typed_unnamed_constant(this->prog->Parameters, values, 835 size, datatype, &swizzle); 836 if (swizzle_out) 837 *swizzle_out = swizzle; 838 return result; 839 } 840 841 assert(file == PROGRAM_IMMEDIATE); 842 843 int index = 0; 844 immediate_storage *entry; 845 int size32 = size * ((datatype == GL_DOUBLE || 846 datatype == GL_INT64_ARB || 847 datatype == GL_UNSIGNED_INT64_ARB)? 2 : 1); 848 int i; 849 850 /* Search immediate storage to see if we already have an identical 851 * immediate that we can use instead of adding a duplicate entry. 852 */ 853 foreach_in_list(immediate_storage, entry, &this->immediates) { 854 immediate_storage *tmp = entry; 855 856 for (i = 0; i * 4 < size32; i++) { 857 int slot_size = MIN2(size32 - (i * 4), 4); 858 if (tmp->type != datatype || tmp->size32 != slot_size) 859 break; 860 if (memcmp(tmp->values, &values[i * 4], 861 slot_size * sizeof(gl_constant_value))) 862 break; 863 864 /* Everything matches, keep going until the full size is matched */ 865 tmp = (immediate_storage *)tmp->next; 866 } 867 868 /* The full value matched */ 869 if (i * 4 >= size32) 870 return index; 871 872 index++; 873 } 874 875 for (i = 0; i * 4 < size32; i++) { 876 int slot_size = MIN2(size32 - (i * 4), 4); 877 /* Add this immediate to the list. */ 878 entry = new(mem_ctx) immediate_storage(&values[i * 4], slot_size, datatype); 879 this->immediates.push_tail(entry); 880 this->num_immediates++; 881 } 882 return index; 883 } 884 885 st_src_reg 886 glsl_to_tgsi_visitor::st_src_reg_for_float(float val) 887 { 888 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT); 889 union gl_constant_value uval; 890 891 uval.f = val; 892 src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle); 893 894 return src; 895 } 896 897 st_src_reg 898 glsl_to_tgsi_visitor::st_src_reg_for_double(double val) 899 { 900 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_DOUBLE); 901 union gl_constant_value uval[2]; 902 903 memcpy(uval, &val, sizeof(uval)); 904 src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle); 905 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y); 906 return src; 907 } 908 909 st_src_reg 910 glsl_to_tgsi_visitor::st_src_reg_for_int(int val) 911 { 912 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); 913 union gl_constant_value uval; 914 915 assert(native_integers); 916 917 uval.i = val; 918 src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle); 919 920 return src; 921 } 922 923 st_src_reg 924 glsl_to_tgsi_visitor::st_src_reg_for_int64(int64_t val) 925 { 926 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT64); 927 union gl_constant_value uval[2]; 928 929 memcpy(uval, &val, sizeof(uval)); 930 src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle); 931 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y); 932 933 return src; 934 } 935 936 st_src_reg 937 glsl_to_tgsi_visitor::st_src_reg_for_type(enum glsl_base_type type, int val) 938 { 939 if (native_integers) 940 return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 941 st_src_reg_for_int(val); 942 else 943 return st_src_reg_for_float(val); 944 } 945 946 static int 947 attrib_type_size(const struct glsl_type *type, bool is_vs_input) 948 { 949 return type->count_attribute_slots(is_vs_input); 950 } 951 952 static int 953 type_size(const struct glsl_type *type) 954 { 955 return type->count_attribute_slots(false); 956 } 957 958 static void 959 add_buffer_to_load_and_stores(glsl_to_tgsi_instruction *inst, st_src_reg *buf, 960 exec_list *instructions, ir_constant *access) 961 { 962 /** 963 * emit_asm() might have actually split the op into pieces, e.g. for 964 * double stores. We have to go back and fix up all the generated ops. 965 */ 966 unsigned op = inst->op; 967 do { 968 inst->resource = *buf; 969 if (access) 970 inst->buffer_access = access->value.u[0]; 971 972 if (inst == instructions->get_head_raw()) 973 break; 974 inst = (glsl_to_tgsi_instruction *)inst->get_prev(); 975 976 if (inst->op == TGSI_OPCODE_UADD) { 977 if (inst == instructions->get_head_raw()) 978 break; 979 inst = (glsl_to_tgsi_instruction *)inst->get_prev(); 980 } 981 } while (inst->op == op && inst->resource.file == PROGRAM_UNDEFINED); 982 } 983 984 /** 985 * If the given GLSL type is an array or matrix or a structure containing 986 * an array/matrix member, return true. Else return false. 987 * 988 * This is used to determine which kind of temp storage (PROGRAM_TEMPORARY 989 * or PROGRAM_ARRAY) should be used for variables of this type. Anytime 990 * we have an array that might be indexed with a variable, we need to use 991 * the later storage type. 992 */ 993 static bool 994 type_has_array_or_matrix(const glsl_type *type) 995 { 996 if (type->is_array() || type->is_matrix()) 997 return true; 998 999 if (type->is_record()) { 1000 for (unsigned i = 0; i < type->length; i++) { 1001 if (type_has_array_or_matrix(type->fields.structure[i].type)) { 1002 return true; 1003 } 1004 } 1005 } 1006 1007 return false; 1008 } 1009 1010 1011 /** 1012 * In the initial pass of codegen, we assign temporary numbers to 1013 * intermediate results. (not SSA -- variable assignments will reuse 1014 * storage). 1015 */ 1016 st_src_reg 1017 glsl_to_tgsi_visitor::get_temp(const glsl_type *type) 1018 { 1019 st_src_reg src; 1020 1021 src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; 1022 src.reladdr = NULL; 1023 src.negate = 0; 1024 src.abs = 0; 1025 1026 if (!options->EmitNoIndirectTemp && type_has_array_or_matrix(type)) { 1027 if (next_array >= max_num_arrays) { 1028 max_num_arrays += 32; 1029 array_sizes = (unsigned*) 1030 realloc(array_sizes, sizeof(array_sizes[0]) * max_num_arrays); 1031 } 1032 1033 src.file = PROGRAM_ARRAY; 1034 src.index = 0; 1035 src.array_id = next_array + 1; 1036 array_sizes[next_array] = type_size(type); 1037 ++next_array; 1038 1039 } else { 1040 src.file = PROGRAM_TEMPORARY; 1041 src.index = next_temp; 1042 next_temp += type_size(type); 1043 } 1044 1045 if (type->is_array() || type->is_record()) { 1046 src.swizzle = SWIZZLE_NOOP; 1047 } else { 1048 src.swizzle = swizzle_for_size(type->vector_elements); 1049 } 1050 1051 return src; 1052 } 1053 1054 variable_storage * 1055 glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) 1056 { 1057 struct hash_entry *entry; 1058 1059 entry = _mesa_hash_table_search(this->variables, var); 1060 if (!entry) 1061 return NULL; 1062 1063 return (variable_storage *)entry->data; 1064 } 1065 1066 void 1067 glsl_to_tgsi_visitor::visit(ir_variable *ir) 1068 { 1069 if (strcmp(ir->name, "gl_FragCoord") == 0) { 1070 this->prog->OriginUpperLeft = ir->data.origin_upper_left; 1071 this->prog->PixelCenterInteger = ir->data.pixel_center_integer; 1072 } 1073 1074 if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { 1075 unsigned int i; 1076 const ir_state_slot *const slots = ir->get_state_slots(); 1077 assert(slots != NULL); 1078 1079 /* Check if this statevar's setup in the STATE file exactly 1080 * matches how we'll want to reference it as a 1081 * struct/array/whatever. If not, then we need to move it into 1082 * temporary storage and hope that it'll get copy-propagated 1083 * out. 1084 */ 1085 for (i = 0; i < ir->get_num_state_slots(); i++) { 1086 if (slots[i].swizzle != SWIZZLE_XYZW) { 1087 break; 1088 } 1089 } 1090 1091 variable_storage *storage; 1092 st_dst_reg dst; 1093 if (i == ir->get_num_state_slots()) { 1094 /* We'll set the index later. */ 1095 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); 1096 1097 _mesa_hash_table_insert(this->variables, ir, storage); 1098 1099 dst = undef_dst; 1100 } else { 1101 /* The variable_storage constructor allocates slots based on the size 1102 * of the type. However, this had better match the number of state 1103 * elements that we're going to copy into the new temporary. 1104 */ 1105 assert((int) ir->get_num_state_slots() == type_size(ir->type)); 1106 1107 dst = st_dst_reg(get_temp(ir->type)); 1108 1109 storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index, 1110 dst.array_id); 1111 1112 _mesa_hash_table_insert(this->variables, ir, storage); 1113 } 1114 1115 1116 for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) { 1117 int index = _mesa_add_state_reference(this->prog->Parameters, 1118 (gl_state_index *)slots[i].tokens); 1119 1120 if (storage->file == PROGRAM_STATE_VAR) { 1121 if (storage->index == -1) { 1122 storage->index = index; 1123 } else { 1124 assert(index == storage->index + (int)i); 1125 } 1126 } else { 1127 /* We use GLSL_TYPE_FLOAT here regardless of the actual type of 1128 * the data being moved since MOV does not care about the type of 1129 * data it is moving, and we don't want to declare registers with 1130 * array or struct types. 1131 */ 1132 st_src_reg src(PROGRAM_STATE_VAR, index, GLSL_TYPE_FLOAT); 1133 src.swizzle = slots[i].swizzle; 1134 emit_asm(ir, TGSI_OPCODE_MOV, dst, src); 1135 /* even a float takes up a whole vec4 reg in a struct/array. */ 1136 dst.index++; 1137 } 1138 } 1139 1140 if (storage->file == PROGRAM_TEMPORARY && 1141 dst.index != storage->index + (int) ir->get_num_state_slots()) { 1142 fail_link(this->shader_program, 1143 "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", 1144 ir->name, dst.index - storage->index, 1145 type_size(ir->type)); 1146 } 1147 } 1148 } 1149 1150 void 1151 glsl_to_tgsi_visitor::visit(ir_loop *ir) 1152 { 1153 emit_asm(NULL, TGSI_OPCODE_BGNLOOP); 1154 1155 visit_exec_list(&ir->body_instructions, this); 1156 1157 emit_asm(NULL, TGSI_OPCODE_ENDLOOP); 1158 } 1159 1160 void 1161 glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) 1162 { 1163 switch (ir->mode) { 1164 case ir_loop_jump::jump_break: 1165 emit_asm(NULL, TGSI_OPCODE_BRK); 1166 break; 1167 case ir_loop_jump::jump_continue: 1168 emit_asm(NULL, TGSI_OPCODE_CONT); 1169 break; 1170 } 1171 } 1172 1173 1174 void 1175 glsl_to_tgsi_visitor::visit(ir_function_signature *ir) 1176 { 1177 assert(0); 1178 (void)ir; 1179 } 1180 1181 void 1182 glsl_to_tgsi_visitor::visit(ir_function *ir) 1183 { 1184 /* Ignore function bodies other than main() -- we shouldn't see calls to 1185 * them since they should all be inlined before we get to glsl_to_tgsi. 1186 */ 1187 if (strcmp(ir->name, "main") == 0) { 1188 const ir_function_signature *sig; 1189 exec_list empty; 1190 1191 sig = ir->matching_signature(NULL, &empty, false); 1192 1193 assert(sig); 1194 1195 foreach_in_list(ir_instruction, ir, &sig->body) { 1196 ir->accept(this); 1197 } 1198 } 1199 } 1200 1201 bool 1202 glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) 1203 { 1204 int nonmul_operand = 1 - mul_operand; 1205 st_src_reg a, b, c; 1206 st_dst_reg result_dst; 1207 1208 ir_expression *expr = ir->operands[mul_operand]->as_expression(); 1209 if (!expr || expr->operation != ir_binop_mul) 1210 return false; 1211 1212 expr->operands[0]->accept(this); 1213 a = this->result; 1214 expr->operands[1]->accept(this); 1215 b = this->result; 1216 ir->operands[nonmul_operand]->accept(this); 1217 c = this->result; 1218 1219 this->result = get_temp(ir->type); 1220 result_dst = st_dst_reg(this->result); 1221 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1222 emit_asm(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); 1223 1224 return true; 1225 } 1226 1227 /** 1228 * Emit MAD(a, -b, a) instead of AND(a, NOT(b)) 1229 * 1230 * The logic values are 1.0 for true and 0.0 for false. Logical-and is 1231 * implemented using multiplication, and logical-or is implemented using 1232 * addition. Logical-not can be implemented as (true - x), or (1.0 - x). 1233 * As result, the logical expression (a & !b) can be rewritten as: 1234 * 1235 * - a * !b 1236 * - a * (1 - b) 1237 * - (a * 1) - (a * b) 1238 * - a + -(a * b) 1239 * - a + (a * -b) 1240 * 1241 * This final expression can be implemented as a single MAD(a, -b, a) 1242 * instruction. 1243 */ 1244 bool 1245 glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) 1246 { 1247 const int other_operand = 1 - try_operand; 1248 st_src_reg a, b; 1249 1250 ir_expression *expr = ir->operands[try_operand]->as_expression(); 1251 if (!expr || expr->operation != ir_unop_logic_not) 1252 return false; 1253 1254 ir->operands[other_operand]->accept(this); 1255 a = this->result; 1256 expr->operands[0]->accept(this); 1257 b = this->result; 1258 1259 b.negate = ~b.negate; 1260 1261 this->result = get_temp(ir->type); 1262 emit_asm(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a); 1263 1264 return true; 1265 } 1266 1267 void 1268 glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, 1269 st_src_reg *reg, int *num_reladdr) 1270 { 1271 if (!reg->reladdr && !reg->reladdr2) 1272 return; 1273 1274 if (reg->reladdr) emit_arl(ir, address_reg, *reg->reladdr); 1275 if (reg->reladdr2) emit_arl(ir, address_reg2, *reg->reladdr2); 1276 1277 if (*num_reladdr != 1) { 1278 st_src_reg temp = get_temp(glsl_type::get_instance(reg->type, 4, 1)); 1279 1280 emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); 1281 *reg = temp; 1282 } 1283 1284 (*num_reladdr)--; 1285 } 1286 1287 void 1288 glsl_to_tgsi_visitor::visit(ir_expression *ir) 1289 { 1290 st_src_reg op[ARRAY_SIZE(ir->operands)]; 1291 1292 /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) 1293 */ 1294 if (!this->precise && ir->operation == ir_binop_add) { 1295 if (try_emit_mad(ir, 1)) 1296 return; 1297 if (try_emit_mad(ir, 0)) 1298 return; 1299 } 1300 1301 /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) 1302 */ 1303 if (!native_integers && ir->operation == ir_binop_logic_and) { 1304 if (try_emit_mad_for_and_not(ir, 1)) 1305 return; 1306 if (try_emit_mad_for_and_not(ir, 0)) 1307 return; 1308 } 1309 1310 if (ir->operation == ir_quadop_vector) 1311 assert(!"ir_quadop_vector should have been lowered"); 1312 1313 for (unsigned int operand = 0; operand < ir->num_operands; operand++) { 1314 this->result.file = PROGRAM_UNDEFINED; 1315 ir->operands[operand]->accept(this); 1316 if (this->result.file == PROGRAM_UNDEFINED) { 1317 printf("Failed to get tree for expression operand:\n"); 1318 ir->operands[operand]->print(); 1319 printf("\n"); 1320 exit(1); 1321 } 1322 op[operand] = this->result; 1323 1324 /* Matrix expression operands should have been broken down to vector 1325 * operations already. 1326 */ 1327 assert(!ir->operands[operand]->type->is_matrix()); 1328 } 1329 1330 visit_expression(ir, op); 1331 } 1332 1333 /* The non-recursive part of the expression visitor lives in a separate 1334 * function and should be prevented from being inlined, to avoid a stack 1335 * explosion when deeply nested expressions are visited. 1336 */ 1337 void 1338 glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) 1339 { 1340 st_src_reg result_src; 1341 st_dst_reg result_dst; 1342 1343 int vector_elements = ir->operands[0]->type->vector_elements; 1344 if (ir->operands[1] && 1345 ir->operation != ir_binop_interpolate_at_offset && 1346 ir->operation != ir_binop_interpolate_at_sample) { 1347 st_src_reg *swz_op = NULL; 1348 if (vector_elements > ir->operands[1]->type->vector_elements) { 1349 assert(ir->operands[1]->type->vector_elements == 1); 1350 swz_op = &op[1]; 1351 } else if (vector_elements < ir->operands[1]->type->vector_elements) { 1352 assert(ir->operands[0]->type->vector_elements == 1); 1353 swz_op = &op[0]; 1354 } 1355 if (swz_op) { 1356 uint16_t swizzle_x = GET_SWZ(swz_op->swizzle, 0); 1357 swz_op->swizzle = MAKE_SWIZZLE4(swizzle_x, swizzle_x, 1358 swizzle_x, swizzle_x); 1359 } 1360 vector_elements = MAX2(vector_elements, 1361 ir->operands[1]->type->vector_elements); 1362 } 1363 if (ir->operands[2] && 1364 ir->operands[2]->type->vector_elements != vector_elements) { 1365 /* This can happen with ir_triop_lrp, i.e. glsl mix */ 1366 assert(ir->operands[2]->type->vector_elements == 1); 1367 uint16_t swizzle_x = GET_SWZ(op[2].swizzle, 0); 1368 op[2].swizzle = MAKE_SWIZZLE4(swizzle_x, swizzle_x, 1369 swizzle_x, swizzle_x); 1370 } 1371 1372 this->result.file = PROGRAM_UNDEFINED; 1373 1374 /* Storage for our result. Ideally for an assignment we'd be using 1375 * the actual storage for the result here, instead. 1376 */ 1377 result_src = get_temp(ir->type); 1378 /* convenience for the emit functions below. */ 1379 result_dst = st_dst_reg(result_src); 1380 /* Limit writes to the channels that will be used by result_src later. 1381 * This does limit this temp's use as a temporary for multi-instruction 1382 * sequences. 1383 */ 1384 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1385 1386 switch (ir->operation) { 1387 case ir_unop_logic_not: 1388 if (result_dst.type != GLSL_TYPE_FLOAT) 1389 emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 1390 else { 1391 /* Previously 'SEQ dst, src, 0.0' was used for this. However, many 1392 * older GPUs implement SEQ using multiple instructions (i915 uses two 1393 * SGE instructions and a MUL instruction). Since our logic values are 1394 * 0.0 and 1.0, 1-x also implements !x. 1395 */ 1396 op[0].negate = ~op[0].negate; 1397 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0)); 1398 } 1399 break; 1400 case ir_unop_neg: 1401 if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64) 1402 emit_asm(ir, TGSI_OPCODE_I64NEG, result_dst, op[0]); 1403 else if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT) 1404 emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]); 1405 else if (result_dst.type == GLSL_TYPE_DOUBLE) 1406 emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]); 1407 else { 1408 op[0].negate = ~op[0].negate; 1409 result_src = op[0]; 1410 } 1411 break; 1412 case ir_unop_subroutine_to_int: 1413 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); 1414 break; 1415 case ir_unop_abs: 1416 if (result_dst.type == GLSL_TYPE_FLOAT) 1417 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0].get_abs()); 1418 else if (result_dst.type == GLSL_TYPE_DOUBLE) 1419 emit_asm(ir, TGSI_OPCODE_DABS, result_dst, op[0]); 1420 else if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64) 1421 emit_asm(ir, TGSI_OPCODE_I64ABS, result_dst, op[0]); 1422 else 1423 emit_asm(ir, TGSI_OPCODE_IABS, result_dst, op[0]); 1424 break; 1425 case ir_unop_sign: 1426 emit_asm(ir, TGSI_OPCODE_SSG, result_dst, op[0]); 1427 break; 1428 case ir_unop_rcp: 1429 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]); 1430 break; 1431 1432 case ir_unop_exp2: 1433 emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]); 1434 break; 1435 case ir_unop_exp: 1436 assert(!"not reached: should be handled by exp_to_exp2"); 1437 break; 1438 case ir_unop_log: 1439 assert(!"not reached: should be handled by log_to_log2"); 1440 break; 1441 case ir_unop_log2: 1442 emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]); 1443 break; 1444 case ir_unop_sin: 1445 emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1446 break; 1447 case ir_unop_cos: 1448 emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1449 break; 1450 case ir_unop_saturate: { 1451 glsl_to_tgsi_instruction *inst; 1452 inst = emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); 1453 inst->saturate = true; 1454 break; 1455 } 1456 1457 case ir_unop_dFdx: 1458 case ir_unop_dFdx_coarse: 1459 emit_asm(ir, TGSI_OPCODE_DDX, result_dst, op[0]); 1460 break; 1461 case ir_unop_dFdx_fine: 1462 emit_asm(ir, TGSI_OPCODE_DDX_FINE, result_dst, op[0]); 1463 break; 1464 case ir_unop_dFdy: 1465 case ir_unop_dFdy_coarse: 1466 case ir_unop_dFdy_fine: 1467 { 1468 /* The X component contains 1 or -1 depending on whether the framebuffer 1469 * is a FBO or the window system buffer, respectively. 1470 * It is then multiplied with the source operand of DDY. 1471 */ 1472 static const gl_state_index transform_y_state[STATE_LENGTH] 1473 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM }; 1474 1475 unsigned transform_y_index = 1476 _mesa_add_state_reference(this->prog->Parameters, 1477 transform_y_state); 1478 1479 st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR, 1480 transform_y_index, 1481 glsl_type::vec4_type); 1482 transform_y.swizzle = SWIZZLE_XXXX; 1483 1484 st_src_reg temp = get_temp(glsl_type::vec4_type); 1485 1486 emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]); 1487 emit_asm(ir, ir->operation == ir_unop_dFdy_fine ? 1488 TGSI_OPCODE_DDY_FINE : TGSI_OPCODE_DDY, result_dst, temp); 1489 break; 1490 } 1491 1492 case ir_unop_frexp_sig: 1493 emit_asm(ir, TGSI_OPCODE_DFRACEXP, result_dst, undef_dst, op[0]); 1494 break; 1495 1496 case ir_unop_frexp_exp: 1497 emit_asm(ir, TGSI_OPCODE_DFRACEXP, undef_dst, result_dst, op[0]); 1498 break; 1499 1500 case ir_unop_noise: { 1501 /* At some point, a motivated person could add a better 1502 * implementation of noise. Currently not even the nvidia 1503 * binary drivers do anything more than this. In any case, the 1504 * place to do this is in the GL state tracker, not the poor 1505 * driver. 1506 */ 1507 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5)); 1508 break; 1509 } 1510 1511 case ir_binop_add: 1512 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1513 break; 1514 case ir_binop_sub: 1515 op[1].negate = ~op[1].negate; 1516 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1517 break; 1518 1519 case ir_binop_mul: 1520 emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1521 break; 1522 case ir_binop_div: 1523 emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); 1524 break; 1525 case ir_binop_mod: 1526 if (result_dst.type == GLSL_TYPE_FLOAT) 1527 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 1528 else 1529 emit_asm(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); 1530 break; 1531 1532 case ir_binop_less: 1533 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); 1534 break; 1535 case ir_binop_gequal: 1536 emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); 1537 break; 1538 case ir_binop_equal: 1539 emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1540 break; 1541 case ir_binop_nequal: 1542 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1543 break; 1544 case ir_binop_all_equal: 1545 /* "==" operator producing a scalar boolean. */ 1546 if (ir->operands[0]->type->is_vector() || 1547 ir->operands[1]->type->is_vector()) { 1548 st_src_reg temp = get_temp(native_integers ? 1549 glsl_type::uvec4_type : 1550 glsl_type::vec4_type); 1551 1552 if (native_integers) { 1553 st_dst_reg temp_dst = st_dst_reg(temp); 1554 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); 1555 1556 if (ir->operands[0]->type->is_boolean() && 1557 ir->operands[1]->as_constant() && 1558 ir->operands[1]->as_constant()->is_one()) { 1559 emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), op[0]); 1560 } else { 1561 emit_asm(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]); 1562 } 1563 1564 /* Emit 1-3 AND operations to combine the SEQ results. */ 1565 switch (ir->operands[0]->type->vector_elements) { 1566 case 2: 1567 break; 1568 case 3: 1569 temp_dst.writemask = WRITEMASK_Y; 1570 temp1.swizzle = SWIZZLE_YYYY; 1571 temp2.swizzle = SWIZZLE_ZZZZ; 1572 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1573 break; 1574 case 4: 1575 temp_dst.writemask = WRITEMASK_X; 1576 temp1.swizzle = SWIZZLE_XXXX; 1577 temp2.swizzle = SWIZZLE_YYYY; 1578 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1579 temp_dst.writemask = WRITEMASK_Y; 1580 temp1.swizzle = SWIZZLE_ZZZZ; 1581 temp2.swizzle = SWIZZLE_WWWW; 1582 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1583 } 1584 1585 temp1.swizzle = SWIZZLE_XXXX; 1586 temp2.swizzle = SWIZZLE_YYYY; 1587 emit_asm(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2); 1588 } else { 1589 emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1590 1591 /* After the dot-product, the value will be an integer on the 1592 * range [0,4]. Zero becomes 1.0, and positive values become zero. 1593 */ 1594 emit_dp(ir, result_dst, temp, temp, vector_elements); 1595 1596 /* Negating the result of the dot-product gives values on the range 1597 * [-4, 0]. Zero becomes 1.0, and negative values become zero. 1598 * This is achieved using SGE. 1599 */ 1600 st_src_reg sge_src = result_src; 1601 sge_src.negate = ~sge_src.negate; 1602 emit_asm(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0)); 1603 } 1604 } else { 1605 emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1606 } 1607 break; 1608 case ir_binop_any_nequal: 1609 /* "!=" operator producing a scalar boolean. */ 1610 if (ir->operands[0]->type->is_vector() || 1611 ir->operands[1]->type->is_vector()) { 1612 st_src_reg temp = get_temp(native_integers ? 1613 glsl_type::uvec4_type : 1614 glsl_type::vec4_type); 1615 if (ir->operands[0]->type->is_boolean() && 1616 ir->operands[1]->as_constant() && 1617 ir->operands[1]->as_constant()->is_zero()) { 1618 emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), op[0]); 1619 } else { 1620 emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1621 } 1622 1623 if (native_integers) { 1624 st_dst_reg temp_dst = st_dst_reg(temp); 1625 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); 1626 1627 /* Emit 1-3 OR operations to combine the SNE results. */ 1628 switch (ir->operands[0]->type->vector_elements) { 1629 case 2: 1630 break; 1631 case 3: 1632 temp_dst.writemask = WRITEMASK_Y; 1633 temp1.swizzle = SWIZZLE_YYYY; 1634 temp2.swizzle = SWIZZLE_ZZZZ; 1635 emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1636 break; 1637 case 4: 1638 temp_dst.writemask = WRITEMASK_X; 1639 temp1.swizzle = SWIZZLE_XXXX; 1640 temp2.swizzle = SWIZZLE_YYYY; 1641 emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1642 temp_dst.writemask = WRITEMASK_Y; 1643 temp1.swizzle = SWIZZLE_ZZZZ; 1644 temp2.swizzle = SWIZZLE_WWWW; 1645 emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1646 } 1647 1648 temp1.swizzle = SWIZZLE_XXXX; 1649 temp2.swizzle = SWIZZLE_YYYY; 1650 emit_asm(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2); 1651 } else { 1652 /* After the dot-product, the value will be an integer on the 1653 * range [0,4]. Zero stays zero, and positive values become 1.0. 1654 */ 1655 glsl_to_tgsi_instruction *const dp = 1656 emit_dp(ir, result_dst, temp, temp, vector_elements); 1657 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1658 /* The clamping to [0,1] can be done for free in the fragment 1659 * shader with a saturate. 1660 */ 1661 dp->saturate = true; 1662 } else { 1663 /* Negating the result of the dot-product gives values on the range 1664 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1665 * achieved using SLT. 1666 */ 1667 st_src_reg slt_src = result_src; 1668 slt_src.negate = ~slt_src.negate; 1669 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1670 } 1671 } 1672 } else { 1673 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1674 } 1675 break; 1676 1677 case ir_binop_logic_xor: 1678 if (native_integers) 1679 emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); 1680 else 1681 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1682 break; 1683 1684 case ir_binop_logic_or: { 1685 if (native_integers) { 1686 /* If integers are used as booleans, we can use an actual "or" 1687 * instruction. 1688 */ 1689 assert(native_integers); 1690 emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); 1691 } else { 1692 /* After the addition, the value will be an integer on the 1693 * range [0,2]. Zero stays zero, and positive values become 1.0. 1694 */ 1695 glsl_to_tgsi_instruction *add = 1696 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1697 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1698 /* The clamping to [0,1] can be done for free in the fragment 1699 * shader with a saturate if floats are being used as boolean values. 1700 */ 1701 add->saturate = true; 1702 } else { 1703 /* Negating the result of the addition gives values on the range 1704 * [-2, 0]. Zero stays zero, and negative values become 1.0. This 1705 * is achieved using SLT. 1706 */ 1707 st_src_reg slt_src = result_src; 1708 slt_src.negate = ~slt_src.negate; 1709 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0)); 1710 } 1711 } 1712 break; 1713 } 1714 1715 case ir_binop_logic_and: 1716 /* If native integers are disabled, the bool args are stored as float 0.0 1717 * or 1.0, so "mul" gives us "and". If they're enabled, just use the 1718 * actual AND opcode. 1719 */ 1720 if (native_integers) 1721 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); 1722 else 1723 emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1724 break; 1725 1726 case ir_binop_dot: 1727 assert(ir->operands[0]->type->is_vector()); 1728 assert(ir->operands[0]->type == ir->operands[1]->type); 1729 emit_dp(ir, result_dst, op[0], op[1], 1730 ir->operands[0]->type->vector_elements); 1731 break; 1732 1733 case ir_unop_sqrt: 1734 if (have_sqrt) { 1735 emit_scalar(ir, TGSI_OPCODE_SQRT, result_dst, op[0]); 1736 } else { 1737 /* This is the only instruction sequence that makes the game "Risen" 1738 * render correctly. ABS is not required for the game, but since GLSL 1739 * declares negative values as "undefined", allowing us to do whatever 1740 * we want, I choose to use ABS to match DX9 and pre-GLSL RSQ 1741 * behavior. 1742 */ 1743 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0].get_abs()); 1744 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, result_src); 1745 } 1746 break; 1747 case ir_unop_rsq: 1748 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1749 break; 1750 case ir_unop_i2f: 1751 if (native_integers) { 1752 emit_asm(ir, TGSI_OPCODE_I2F, result_dst, op[0]); 1753 break; 1754 } 1755 /* fallthrough to next case otherwise */ 1756 case ir_unop_b2f: 1757 if (native_integers) { 1758 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0)); 1759 break; 1760 } 1761 /* fallthrough to next case otherwise */ 1762 case ir_unop_i2u: 1763 case ir_unop_u2i: 1764 case ir_unop_i642u64: 1765 case ir_unop_u642i64: 1766 /* Converting between signed and unsigned integers is a no-op. */ 1767 result_src = op[0]; 1768 result_src.type = result_dst.type; 1769 break; 1770 case ir_unop_b2i: 1771 if (native_integers) { 1772 /* Booleans are stored as integers using ~0 for true and 0 for false. 1773 * GLSL requires that int(bool) return 1 for true and 0 for false. 1774 * This conversion is done with AND, but it could be done with NEG. 1775 */ 1776 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1)); 1777 } else { 1778 /* Booleans and integers are both stored as floats when native 1779 * integers are disabled. 1780 */ 1781 result_src = op[0]; 1782 } 1783 break; 1784 case ir_unop_f2i: 1785 if (native_integers) 1786 emit_asm(ir, TGSI_OPCODE_F2I, result_dst, op[0]); 1787 else 1788 emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1789 break; 1790 case ir_unop_f2u: 1791 if (native_integers) 1792 emit_asm(ir, TGSI_OPCODE_F2U, result_dst, op[0]); 1793 else 1794 emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1795 break; 1796 case ir_unop_bitcast_f2i: 1797 case ir_unop_bitcast_f2u: 1798 /* Make sure we don't propagate the negate modifier to integer opcodes. */ 1799 if (op[0].negate || op[0].abs) 1800 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); 1801 else 1802 result_src = op[0]; 1803 result_src.type = ir->operation == ir_unop_bitcast_f2i ? GLSL_TYPE_INT : 1804 GLSL_TYPE_UINT; 1805 break; 1806 case ir_unop_bitcast_i2f: 1807 case ir_unop_bitcast_u2f: 1808 result_src = op[0]; 1809 result_src.type = GLSL_TYPE_FLOAT; 1810 break; 1811 case ir_unop_f2b: 1812 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); 1813 break; 1814 case ir_unop_d2b: 1815 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_double(0.0)); 1816 break; 1817 case ir_unop_i2b: 1818 if (native_integers) 1819 emit_asm(ir, TGSI_OPCODE_USNE, result_dst, op[0], st_src_reg_for_int(0)); 1820 else 1821 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0)); 1822 break; 1823 case ir_unop_bitcast_u642d: 1824 case ir_unop_bitcast_i642d: 1825 result_src = op[0]; 1826 result_src.type = GLSL_TYPE_DOUBLE; 1827 break; 1828 case ir_unop_bitcast_d2i64: 1829 result_src = op[0]; 1830 result_src.type = GLSL_TYPE_INT64; 1831 break; 1832 case ir_unop_bitcast_d2u64: 1833 result_src = op[0]; 1834 result_src.type = GLSL_TYPE_UINT64; 1835 break; 1836 case ir_unop_trunc: 1837 emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1838 break; 1839 case ir_unop_ceil: 1840 emit_asm(ir, TGSI_OPCODE_CEIL, result_dst, op[0]); 1841 break; 1842 case ir_unop_floor: 1843 emit_asm(ir, TGSI_OPCODE_FLR, result_dst, op[0]); 1844 break; 1845 case ir_unop_round_even: 1846 emit_asm(ir, TGSI_OPCODE_ROUND, result_dst, op[0]); 1847 break; 1848 case ir_unop_fract: 1849 emit_asm(ir, TGSI_OPCODE_FRC, result_dst, op[0]); 1850 break; 1851 1852 case ir_binop_min: 1853 emit_asm(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); 1854 break; 1855 case ir_binop_max: 1856 emit_asm(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); 1857 break; 1858 case ir_binop_pow: 1859 emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]); 1860 break; 1861 1862 case ir_unop_bit_not: 1863 if (native_integers) { 1864 emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 1865 break; 1866 } 1867 case ir_unop_u2f: 1868 if (native_integers) { 1869 emit_asm(ir, TGSI_OPCODE_U2F, result_dst, op[0]); 1870 break; 1871 } 1872 case ir_binop_lshift: 1873 case ir_binop_rshift: 1874 if (native_integers) { 1875 unsigned opcode = ir->operation == ir_binop_lshift ? TGSI_OPCODE_SHL 1876 : TGSI_OPCODE_ISHR; 1877 st_src_reg count; 1878 1879 if (glsl_base_type_is_64bit(op[0].type)) { 1880 /* GLSL shift operations have 32-bit shift counts, but TGSI uses 1881 * 64 bits. 1882 */ 1883 count = get_temp(glsl_type::u64vec(ir->operands[1]->type->components())); 1884 emit_asm(ir, TGSI_OPCODE_U2I64, st_dst_reg(count), op[1]); 1885 } else { 1886 count = op[1]; 1887 } 1888 1889 emit_asm(ir, opcode, result_dst, op[0], count); 1890 break; 1891 } 1892 case ir_binop_bit_and: 1893 if (native_integers) { 1894 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); 1895 break; 1896 } 1897 case ir_binop_bit_xor: 1898 if (native_integers) { 1899 emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); 1900 break; 1901 } 1902 case ir_binop_bit_or: 1903 if (native_integers) { 1904 emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); 1905 break; 1906 } 1907 1908 assert(!"GLSL 1.30 features unsupported"); 1909 break; 1910 1911 case ir_binop_ubo_load: { 1912 if (ctx->Const.UseSTD430AsDefaultPacking) { 1913 ir_rvalue *block = ir->operands[0]; 1914 ir_rvalue *offset = ir->operands[1]; 1915 ir_constant *const_block = block->as_constant(); 1916 1917 st_src_reg cbuf(PROGRAM_CONSTANT, 1918 (const_block ? const_block->value.u[0] + 1 : 1), 1919 ir->type->base_type); 1920 1921 cbuf.has_index2 = true; 1922 1923 if (!const_block) { 1924 block->accept(this); 1925 cbuf.reladdr = ralloc(mem_ctx, st_src_reg); 1926 *cbuf.reladdr = this->result; 1927 emit_arl(ir, sampler_reladdr, this->result); 1928 } 1929 1930 /* Calculate the surface offset */ 1931 offset->accept(this); 1932 st_src_reg off = this->result; 1933 1934 glsl_to_tgsi_instruction *inst = 1935 emit_asm(ir, TGSI_OPCODE_LOAD, result_dst, off); 1936 1937 if (result_dst.type == GLSL_TYPE_BOOL) 1938 emit_asm(ir, TGSI_OPCODE_USNE, result_dst, st_src_reg(result_dst), 1939 st_src_reg_for_int(0)); 1940 1941 add_buffer_to_load_and_stores(inst, &cbuf, &this->instructions, 1942 NULL); 1943 } else { 1944 ir_constant *const_uniform_block = ir->operands[0]->as_constant(); 1945 ir_constant *const_offset_ir = ir->operands[1]->as_constant(); 1946 unsigned const_offset = const_offset_ir ? 1947 const_offset_ir->value.u[0] : 0; 1948 unsigned const_block = const_uniform_block ? 1949 const_uniform_block->value.u[0] + 1 : 1; 1950 st_src_reg index_reg = get_temp(glsl_type::uint_type); 1951 st_src_reg cbuf; 1952 1953 cbuf.type = ir->type->base_type; 1954 cbuf.file = PROGRAM_CONSTANT; 1955 cbuf.index = 0; 1956 cbuf.reladdr = NULL; 1957 cbuf.negate = 0; 1958 cbuf.abs = 0; 1959 cbuf.index2D = const_block; 1960 1961 assert(ir->type->is_vector() || ir->type->is_scalar()); 1962 1963 if (const_offset_ir) { 1964 /* Constant index into constant buffer */ 1965 cbuf.reladdr = NULL; 1966 cbuf.index = const_offset / 16; 1967 } else { 1968 ir_expression *offset_expr = ir->operands[1]->as_expression(); 1969 st_src_reg offset = op[1]; 1970 1971 /* The OpenGL spec is written in such a way that accesses with 1972 * non-constant offset are almost always vec4-aligned. The only 1973 * exception to this are members of structs in arrays of structs: 1974 * each struct in an array of structs is at least vec4-aligned, 1975 * but single-element and [ui]vec2 members of the struct may be at 1976 * an offset that is not a multiple of 16 bytes. 1977 * 1978 * Here, we extract that offset, relying on previous passes to 1979 * always generate offset expressions of the form 1980 * (+ expr constant_offset). 1981 * 1982 * Note that the std430 layout, which allows more cases of 1983 * alignment less than vec4 in arrays, is not supported for 1984 * uniform blocks, so we do not have to deal with it here. 1985 */ 1986 if (offset_expr && offset_expr->operation == ir_binop_add) { 1987 const_offset_ir = offset_expr->operands[1]->as_constant(); 1988 if (const_offset_ir) { 1989 const_offset = const_offset_ir->value.u[0]; 1990 cbuf.index = const_offset / 16; 1991 offset_expr->operands[0]->accept(this); 1992 offset = this->result; 1993 } 1994 } 1995 1996 /* Relative/variable index into constant buffer */ 1997 emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), offset, 1998 st_src_reg_for_int(4)); 1999 cbuf.reladdr = ralloc(mem_ctx, st_src_reg); 2000 memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg)); 2001 } 2002 2003 if (const_uniform_block) { 2004 /* Constant constant buffer */ 2005 cbuf.reladdr2 = NULL; 2006 } else { 2007 /* Relative/variable constant buffer */ 2008 cbuf.reladdr2 = ralloc(mem_ctx, st_src_reg); 2009 memcpy(cbuf.reladdr2, &op[0], sizeof(st_src_reg)); 2010 } 2011 cbuf.has_index2 = true; 2012 2013 cbuf.swizzle = swizzle_for_size(ir->type->vector_elements); 2014 if (glsl_base_type_is_64bit(cbuf.type)) 2015 cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 8, 2016 const_offset % 16 / 8, 2017 const_offset % 16 / 8, 2018 const_offset % 16 / 8); 2019 else 2020 cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4, 2021 const_offset % 16 / 4, 2022 const_offset % 16 / 4, 2023 const_offset % 16 / 4); 2024 2025 if (ir->type->is_boolean()) { 2026 emit_asm(ir, TGSI_OPCODE_USNE, result_dst, cbuf, 2027 st_src_reg_for_int(0)); 2028 } else { 2029 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, cbuf); 2030 } 2031 } 2032 break; 2033 } 2034 case ir_triop_lrp: 2035 /* note: we have to reorder the three args here */ 2036 emit_asm(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]); 2037 break; 2038 case ir_triop_csel: 2039 if (this->ctx->Const.NativeIntegers) 2040 emit_asm(ir, TGSI_OPCODE_UCMP, result_dst, op[0], op[1], op[2]); 2041 else { 2042 op[0].negate = ~op[0].negate; 2043 emit_asm(ir, TGSI_OPCODE_CMP, result_dst, op[0], op[1], op[2]); 2044 } 2045 break; 2046 case ir_triop_bitfield_extract: 2047 emit_asm(ir, TGSI_OPCODE_IBFE, result_dst, op[0], op[1], op[2]); 2048 break; 2049 case ir_quadop_bitfield_insert: 2050 emit_asm(ir, TGSI_OPCODE_BFI, result_dst, op[0], op[1], op[2], op[3]); 2051 break; 2052 case ir_unop_bitfield_reverse: 2053 emit_asm(ir, TGSI_OPCODE_BREV, result_dst, op[0]); 2054 break; 2055 case ir_unop_bit_count: 2056 emit_asm(ir, TGSI_OPCODE_POPC, result_dst, op[0]); 2057 break; 2058 case ir_unop_find_msb: 2059 emit_asm(ir, TGSI_OPCODE_IMSB, result_dst, op[0]); 2060 break; 2061 case ir_unop_find_lsb: 2062 emit_asm(ir, TGSI_OPCODE_LSB, result_dst, op[0]); 2063 break; 2064 case ir_binop_imul_high: 2065 emit_asm(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]); 2066 break; 2067 case ir_triop_fma: 2068 /* In theory, MAD is incorrect here. */ 2069 if (have_fma) 2070 emit_asm(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]); 2071 else 2072 emit_asm(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]); 2073 break; 2074 case ir_unop_interpolate_at_centroid: 2075 emit_asm(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]); 2076 break; 2077 case ir_binop_interpolate_at_offset: { 2078 /* The y coordinate needs to be flipped for the default fb */ 2079 static const gl_state_index transform_y_state[STATE_LENGTH] 2080 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM }; 2081 2082 unsigned transform_y_index = 2083 _mesa_add_state_reference(this->prog->Parameters, 2084 transform_y_state); 2085 2086 st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR, 2087 transform_y_index, 2088 glsl_type::vec4_type); 2089 transform_y.swizzle = SWIZZLE_XXXX; 2090 2091 st_src_reg temp = get_temp(glsl_type::vec2_type); 2092 st_dst_reg temp_dst = st_dst_reg(temp); 2093 2094 emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[1]); 2095 temp_dst.writemask = WRITEMASK_Y; 2096 emit_asm(ir, TGSI_OPCODE_MUL, temp_dst, transform_y, op[1]); 2097 emit_asm(ir, TGSI_OPCODE_INTERP_OFFSET, result_dst, op[0], temp); 2098 break; 2099 } 2100 case ir_binop_interpolate_at_sample: 2101 emit_asm(ir, TGSI_OPCODE_INTERP_SAMPLE, result_dst, op[0], op[1]); 2102 break; 2103 2104 case ir_unop_d2f: 2105 emit_asm(ir, TGSI_OPCODE_D2F, result_dst, op[0]); 2106 break; 2107 case ir_unop_f2d: 2108 emit_asm(ir, TGSI_OPCODE_F2D, result_dst, op[0]); 2109 break; 2110 case ir_unop_d2i: 2111 emit_asm(ir, TGSI_OPCODE_D2I, result_dst, op[0]); 2112 break; 2113 case ir_unop_i2d: 2114 emit_asm(ir, TGSI_OPCODE_I2D, result_dst, op[0]); 2115 break; 2116 case ir_unop_d2u: 2117 emit_asm(ir, TGSI_OPCODE_D2U, result_dst, op[0]); 2118 break; 2119 case ir_unop_u2d: 2120 emit_asm(ir, TGSI_OPCODE_U2D, result_dst, op[0]); 2121 break; 2122 case ir_unop_unpack_double_2x32: 2123 case ir_unop_pack_double_2x32: 2124 case ir_unop_unpack_int_2x32: 2125 case ir_unop_pack_int_2x32: 2126 case ir_unop_unpack_uint_2x32: 2127 case ir_unop_pack_uint_2x32: 2128 case ir_unop_unpack_sampler_2x32: 2129 case ir_unop_pack_sampler_2x32: 2130 case ir_unop_unpack_image_2x32: 2131 case ir_unop_pack_image_2x32: 2132 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); 2133 break; 2134 2135 case ir_binop_ldexp: 2136 if (ir->operands[0]->type->is_double()) { 2137 emit_asm(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]); 2138 } else if (ir->operands[0]->type->is_float()) { 2139 emit_asm(ir, TGSI_OPCODE_LDEXP, result_dst, op[0], op[1]); 2140 } else { 2141 assert(!"Invalid ldexp for non-double opcode in glsl_to_tgsi_visitor::visit()"); 2142 } 2143 break; 2144 2145 case ir_unop_pack_half_2x16: 2146 emit_asm(ir, TGSI_OPCODE_PK2H, result_dst, op[0]); 2147 break; 2148 case ir_unop_unpack_half_2x16: 2149 emit_asm(ir, TGSI_OPCODE_UP2H, result_dst, op[0]); 2150 break; 2151 2152 case ir_unop_get_buffer_size: { 2153 ir_constant *const_offset = ir->operands[0]->as_constant(); 2154 int buf_base = ctx->st->has_hw_atomics ? 0 : ctx->Const.Program[shader->Stage].MaxAtomicBuffers; 2155 st_src_reg buffer( 2156 PROGRAM_BUFFER, 2157 buf_base + (const_offset ? const_offset->value.u[0] : 0), 2158 GLSL_TYPE_UINT); 2159 if (!const_offset) { 2160 buffer.reladdr = ralloc(mem_ctx, st_src_reg); 2161 *buffer.reladdr = op[0]; 2162 emit_arl(ir, sampler_reladdr, op[0]); 2163 } 2164 emit_asm(ir, TGSI_OPCODE_RESQ, result_dst)->resource = buffer; 2165 break; 2166 } 2167 2168 case ir_unop_u2i64: 2169 case ir_unop_u2u64: 2170 case ir_unop_b2i64: { 2171 st_src_reg temp = get_temp(glsl_type::uvec4_type); 2172 st_dst_reg temp_dst = st_dst_reg(temp); 2173 unsigned orig_swz = op[0].swizzle; 2174 /* 2175 * To convert unsigned to 64-bit: 2176 * zero Y channel, copy X channel. 2177 */ 2178 temp_dst.writemask = WRITEMASK_Y; 2179 if (vector_elements > 1) 2180 temp_dst.writemask |= WRITEMASK_W; 2181 emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, st_src_reg_for_int(0)); 2182 temp_dst.writemask = WRITEMASK_X; 2183 if (vector_elements > 1) 2184 temp_dst.writemask |= WRITEMASK_Z; 2185 op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 0), GET_SWZ(orig_swz, 0), 2186 GET_SWZ(orig_swz, 1), GET_SWZ(orig_swz, 1)); 2187 if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64) 2188 emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]); 2189 else 2190 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], st_src_reg_for_int(1)); 2191 result_src = temp; 2192 result_src.type = GLSL_TYPE_UINT64; 2193 if (vector_elements > 2) { 2194 /* Subtle: We rely on the fact that get_temp here returns the next 2195 * TGSI temporary register directly after the temp register used for 2196 * the first two components, so that the result gets picked up 2197 * automatically. 2198 */ 2199 st_src_reg temp = get_temp(glsl_type::uvec4_type); 2200 st_dst_reg temp_dst = st_dst_reg(temp); 2201 temp_dst.writemask = WRITEMASK_Y; 2202 if (vector_elements > 3) 2203 temp_dst.writemask |= WRITEMASK_W; 2204 emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, st_src_reg_for_int(0)); 2205 2206 temp_dst.writemask = WRITEMASK_X; 2207 if (vector_elements > 3) 2208 temp_dst.writemask |= WRITEMASK_Z; 2209 op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 2), GET_SWZ(orig_swz, 2), 2210 GET_SWZ(orig_swz, 3), GET_SWZ(orig_swz, 3)); 2211 if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64) 2212 emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]); 2213 else 2214 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], st_src_reg_for_int(1)); 2215 } 2216 break; 2217 } 2218 case ir_unop_i642i: 2219 case ir_unop_u642i: 2220 case ir_unop_u642u: 2221 case ir_unop_i642u: { 2222 st_src_reg temp = get_temp(glsl_type::uvec4_type); 2223 st_dst_reg temp_dst = st_dst_reg(temp); 2224 unsigned orig_swz = op[0].swizzle; 2225 unsigned orig_idx = op[0].index; 2226 int el; 2227 temp_dst.writemask = WRITEMASK_X; 2228 2229 for (el = 0; el < vector_elements; el++) { 2230 unsigned swz = GET_SWZ(orig_swz, el); 2231 if (swz & 1) 2232 op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z); 2233 else 2234 op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); 2235 if (swz > 2) 2236 op[0].index = orig_idx + 1; 2237 op[0].type = GLSL_TYPE_UINT; 2238 temp_dst.writemask = WRITEMASK_X << el; 2239 emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]); 2240 } 2241 result_src = temp; 2242 if (ir->operation == ir_unop_u642u || ir->operation == ir_unop_i642u) 2243 result_src.type = GLSL_TYPE_UINT; 2244 else 2245 result_src.type = GLSL_TYPE_INT; 2246 break; 2247 } 2248 case ir_unop_i642b: 2249 emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0], st_src_reg_for_int64(0)); 2250 break; 2251 case ir_unop_i642f: 2252 emit_asm(ir, TGSI_OPCODE_I642F, result_dst, op[0]); 2253 break; 2254 case ir_unop_u642f: 2255 emit_asm(ir, TGSI_OPCODE_U642F, result_dst, op[0]); 2256 break; 2257 case ir_unop_i642d: 2258 emit_asm(ir, TGSI_OPCODE_I642D, result_dst, op[0]); 2259 break; 2260 case ir_unop_u642d: 2261 emit_asm(ir, TGSI_OPCODE_U642D, result_dst, op[0]); 2262 break; 2263 case ir_unop_i2i64: 2264 emit_asm(ir, TGSI_OPCODE_I2I64, result_dst, op[0]); 2265 break; 2266 case ir_unop_f2i64: 2267 emit_asm(ir, TGSI_OPCODE_F2I64, result_dst, op[0]); 2268 break; 2269 case ir_unop_d2i64: 2270 emit_asm(ir, TGSI_OPCODE_D2I64, result_dst, op[0]); 2271 break; 2272 case ir_unop_i2u64: 2273 emit_asm(ir, TGSI_OPCODE_I2I64, result_dst, op[0]); 2274 break; 2275 case ir_unop_f2u64: 2276 emit_asm(ir, TGSI_OPCODE_F2U64, result_dst, op[0]); 2277 break; 2278 case ir_unop_d2u64: 2279 emit_asm(ir, TGSI_OPCODE_D2U64, result_dst, op[0]); 2280 break; 2281 /* these might be needed */ 2282 case ir_unop_pack_snorm_2x16: 2283 case ir_unop_pack_unorm_2x16: 2284 case ir_unop_pack_snorm_4x8: 2285 case ir_unop_pack_unorm_4x8: 2286 2287 case ir_unop_unpack_snorm_2x16: 2288 case ir_unop_unpack_unorm_2x16: 2289 case ir_unop_unpack_snorm_4x8: 2290 case ir_unop_unpack_unorm_4x8: 2291 2292 case ir_quadop_vector: 2293 case ir_binop_vector_extract: 2294 case ir_triop_vector_insert: 2295 case ir_binop_carry: 2296 case ir_binop_borrow: 2297 case ir_unop_ssbo_unsized_array_length: 2298 /* This operation is not supported, or should have already been handled. 2299 */ 2300 assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()"); 2301 break; 2302 } 2303 2304 this->result = result_src; 2305 } 2306 2307 2308 void 2309 glsl_to_tgsi_visitor::visit(ir_swizzle *ir) 2310 { 2311 st_src_reg src; 2312 int i; 2313 int swizzle[4]; 2314 2315 /* Note that this is only swizzles in expressions, not those on the left 2316 * hand side of an assignment, which do write masking. See ir_assignment 2317 * for that. 2318 */ 2319 2320 ir->val->accept(this); 2321 src = this->result; 2322 assert(src.file != PROGRAM_UNDEFINED); 2323 assert(ir->type->vector_elements > 0); 2324 2325 for (i = 0; i < 4; i++) { 2326 if (i < ir->type->vector_elements) { 2327 switch (i) { 2328 case 0: 2329 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); 2330 break; 2331 case 1: 2332 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); 2333 break; 2334 case 2: 2335 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); 2336 break; 2337 case 3: 2338 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); 2339 break; 2340 } 2341 } else { 2342 /* If the type is smaller than a vec4, replicate the last 2343 * channel out. 2344 */ 2345 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 2346 } 2347 } 2348 2349 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 2350 2351 this->result = src; 2352 } 2353 2354 /* Test if the variable is an array. Note that geometry and 2355 * tessellation shader inputs are outputs are always arrays (except 2356 * for patch inputs), so only the array element type is considered. 2357 */ 2358 static bool 2359 is_inout_array(unsigned stage, ir_variable *var, bool *remove_array) 2360 { 2361 const glsl_type *type = var->type; 2362 2363 *remove_array = false; 2364 2365 if ((stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in) || 2366 (stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out)) 2367 return false; 2368 2369 if (((stage == MESA_SHADER_GEOMETRY && var->data.mode == ir_var_shader_in) || 2370 (stage == MESA_SHADER_TESS_EVAL && var->data.mode == ir_var_shader_in) || 2371 stage == MESA_SHADER_TESS_CTRL) && 2372 !var->data.patch) { 2373 if (!var->type->is_array()) 2374 return false; /* a system value probably */ 2375 2376 type = var->type->fields.array; 2377 *remove_array = true; 2378 } 2379 2380 return type->is_array() || type->is_matrix(); 2381 } 2382 2383 static unsigned 2384 st_translate_interp_loc(ir_variable *var) 2385 { 2386 if (var->data.centroid) 2387 return TGSI_INTERPOLATE_LOC_CENTROID; 2388 else if (var->data.sample) 2389 return TGSI_INTERPOLATE_LOC_SAMPLE; 2390 else 2391 return TGSI_INTERPOLATE_LOC_CENTER; 2392 } 2393 2394 void 2395 glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) 2396 { 2397 variable_storage *entry = find_variable_storage(ir->var); 2398 ir_variable *var = ir->var; 2399 bool remove_array; 2400 2401 if (!entry) { 2402 switch (var->data.mode) { 2403 case ir_var_uniform: 2404 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, 2405 var->data.param_index); 2406 _mesa_hash_table_insert(this->variables, var, entry); 2407 break; 2408 case ir_var_shader_in: { 2409 /* The linker assigns locations for varyings and attributes, 2410 * including deprecated builtins (like gl_Color), user-assign 2411 * generic attributes (glBindVertexLocation), and 2412 * user-defined varyings. 2413 */ 2414 assert(var->data.location != -1); 2415 2416 const glsl_type *type_without_array = var->type->without_array(); 2417 struct inout_decl *decl = &inputs[num_inputs]; 2418 unsigned component = var->data.location_frac; 2419 unsigned num_components; 2420 num_inputs++; 2421 2422 if (type_without_array->is_64bit()) 2423 component = component / 2; 2424 if (type_without_array->vector_elements) 2425 num_components = type_without_array->vector_elements; 2426 else 2427 num_components = 4; 2428 2429 decl->mesa_index = var->data.location; 2430 decl->interp = (glsl_interp_mode) var->data.interpolation; 2431 decl->interp_loc = st_translate_interp_loc(var); 2432 decl->base_type = type_without_array->base_type; 2433 decl->usage_mask = u_bit_consecutive(component, num_components); 2434 2435 if (is_inout_array(shader->Stage, var, &remove_array)) { 2436 decl->array_id = num_input_arrays + 1; 2437 num_input_arrays++; 2438 } else { 2439 decl->array_id = 0; 2440 } 2441 2442 if (remove_array) 2443 decl->size = type_size(var->type->fields.array); 2444 else 2445 decl->size = type_size(var->type); 2446 2447 entry = new(mem_ctx) variable_storage(var, 2448 PROGRAM_INPUT, 2449 decl->mesa_index, 2450 decl->array_id); 2451 entry->component = component; 2452 2453 _mesa_hash_table_insert(this->variables, var, entry); 2454 2455 break; 2456 } 2457 case ir_var_shader_out: { 2458 assert(var->data.location != -1); 2459 2460 const glsl_type *type_without_array = var->type->without_array(); 2461 struct inout_decl *decl = &outputs[num_outputs]; 2462 unsigned component = var->data.location_frac; 2463 unsigned num_components; 2464 num_outputs++; 2465 2466 if (type_without_array->is_64bit()) 2467 component = component / 2; 2468 if (type_without_array->vector_elements) 2469 num_components = type_without_array->vector_elements; 2470 else 2471 num_components = 4; 2472 2473 decl->mesa_index = var->data.location + FRAG_RESULT_MAX * var->data.index; 2474 decl->base_type = type_without_array->base_type; 2475 decl->usage_mask = u_bit_consecutive(component, num_components); 2476 if (var->data.stream & (1u << 31)) { 2477 decl->gs_out_streams = var->data.stream & ~(1u << 31); 2478 } else { 2479 assert(var->data.stream < 4); 2480 decl->gs_out_streams = 0; 2481 for (unsigned i = 0; i < num_components; ++i) 2482 decl->gs_out_streams |= var->data.stream << (2 * (component + i)); 2483 } 2484 2485 if (is_inout_array(shader->Stage, var, &remove_array)) { 2486 decl->array_id = num_output_arrays + 1; 2487 num_output_arrays++; 2488 } else { 2489 decl->array_id = 0; 2490 } 2491 2492 if (remove_array) 2493 decl->size = type_size(var->type->fields.array); 2494 else 2495 decl->size = type_size(var->type); 2496 2497 if (var->data.fb_fetch_output) { 2498 st_dst_reg dst = st_dst_reg(get_temp(var->type)); 2499 st_src_reg src = st_src_reg(PROGRAM_OUTPUT, decl->mesa_index, 2500 var->type, component, decl->array_id); 2501 emit_asm(NULL, TGSI_OPCODE_FBFETCH, dst, src); 2502 entry = new(mem_ctx) variable_storage(var, dst.file, dst.index, 2503 dst.array_id); 2504 } else { 2505 entry = new(mem_ctx) variable_storage(var, 2506 PROGRAM_OUTPUT, 2507 decl->mesa_index, 2508 decl->array_id); 2509 } 2510 entry->component = component; 2511 2512 _mesa_hash_table_insert(this->variables, var, entry); 2513 2514 break; 2515 } 2516 case ir_var_system_value: 2517 entry = new(mem_ctx) variable_storage(var, 2518 PROGRAM_SYSTEM_VALUE, 2519 var->data.location); 2520 break; 2521 case ir_var_auto: 2522 case ir_var_temporary: 2523 st_src_reg src = get_temp(var->type); 2524 2525 entry = new(mem_ctx) variable_storage(var, src.file, src.index, 2526 src.array_id); 2527 _mesa_hash_table_insert(this->variables, var, entry); 2528 2529 break; 2530 } 2531 2532 if (!entry) { 2533 printf("Failed to make storage for %s\n", var->name); 2534 exit(1); 2535 } 2536 } 2537 2538 this->result = st_src_reg(entry->file, entry->index, var->type, 2539 entry->component, entry->array_id); 2540 if (this->shader->Stage == MESA_SHADER_VERTEX && 2541 var->data.mode == ir_var_shader_in && 2542 var->type->without_array()->is_double()) 2543 this->result.is_double_vertex_input = true; 2544 if (!native_integers) 2545 this->result.type = GLSL_TYPE_FLOAT; 2546 } 2547 2548 static void 2549 shrink_array_declarations(struct inout_decl *decls, unsigned count, 2550 GLbitfield64* usage_mask, 2551 GLbitfield64 double_usage_mask, 2552 GLbitfield* patch_usage_mask) 2553 { 2554 unsigned i; 2555 int j; 2556 2557 /* Fix array declarations by removing unused array elements at both ends 2558 * of the arrays. For example, mat4[3] where only mat[1] is used. 2559 */ 2560 for (i = 0; i < count; i++) { 2561 struct inout_decl *decl = &decls[i]; 2562 if (!decl->array_id) 2563 continue; 2564 2565 /* Shrink the beginning. */ 2566 for (j = 0; j < (int)decl->size; j++) { 2567 if (decl->mesa_index >= VARYING_SLOT_PATCH0) { 2568 if (*patch_usage_mask & 2569 BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j)) 2570 break; 2571 } 2572 else { 2573 if (*usage_mask & BITFIELD64_BIT(decl->mesa_index+j)) 2574 break; 2575 if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1)) 2576 break; 2577 } 2578 2579 decl->mesa_index++; 2580 decl->size--; 2581 j--; 2582 } 2583 2584 /* Shrink the end. */ 2585 for (j = decl->size-1; j >= 0; j--) { 2586 if (decl->mesa_index >= VARYING_SLOT_PATCH0) { 2587 if (*patch_usage_mask & 2588 BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j)) 2589 break; 2590 } 2591 else { 2592 if (*usage_mask & BITFIELD64_BIT(decl->mesa_index+j)) 2593 break; 2594 if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1)) 2595 break; 2596 } 2597 2598 decl->size--; 2599 } 2600 2601 /* When not all entries of an array are accessed, we mark them as used 2602 * here anyway, to ensure that the input/output mapping logic doesn't get 2603 * confused. 2604 * 2605 * TODO This happens when an array isn't used via indirect access, which 2606 * some game ports do (at least eON-based). There is an optimization 2607 * opportunity here by replacing the array declaration with non-array 2608 * declarations of those slots that are actually used. 2609 */ 2610 for (j = 1; j < (int)decl->size; ++j) { 2611 if (decl->mesa_index >= VARYING_SLOT_PATCH0) 2612 *patch_usage_mask |= BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j); 2613 else 2614 *usage_mask |= BITFIELD64_BIT(decl->mesa_index + j); 2615 } 2616 } 2617 } 2618 2619 void 2620 glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) 2621 { 2622 ir_constant *index; 2623 st_src_reg src; 2624 bool is_2D = false; 2625 ir_variable *var = ir->variable_referenced(); 2626 2627 /* We only need the logic provided by st_glsl_storage_type_size() 2628 * for arrays of structs. Indirect sampler and image indexing is handled 2629 * elsewhere. 2630 */ 2631 int element_size = ir->type->without_array()->is_record() ? 2632 st_glsl_storage_type_size(ir->type, var->data.bindless) : 2633 type_size(ir->type); 2634 2635 index = ir->array_index->constant_expression_value(ralloc_parent(ir)); 2636 2637 ir->array->accept(this); 2638 src = this->result; 2639 2640 if (!src.has_index2) { 2641 switch (this->prog->Target) { 2642 case GL_TESS_CONTROL_PROGRAM_NV: 2643 is_2D = (src.file == PROGRAM_INPUT || src.file == PROGRAM_OUTPUT) && 2644 !ir->variable_referenced()->data.patch; 2645 break; 2646 case GL_TESS_EVALUATION_PROGRAM_NV: 2647 is_2D = src.file == PROGRAM_INPUT && 2648 !ir->variable_referenced()->data.patch; 2649 break; 2650 case GL_GEOMETRY_PROGRAM_NV: 2651 is_2D = src.file == PROGRAM_INPUT; 2652 break; 2653 } 2654 } 2655 2656 if (is_2D) 2657 element_size = 1; 2658 2659 if (index) { 2660 2661 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB && 2662 src.file == PROGRAM_INPUT) 2663 element_size = attrib_type_size(ir->type, true); 2664 if (is_2D) { 2665 src.index2D = index->value.i[0]; 2666 src.has_index2 = true; 2667 } else 2668 src.index += index->value.i[0] * element_size; 2669 } else { 2670 /* Variable index array dereference. It eats the "vec4" of the 2671 * base of the array and an index that offsets the TGSI register 2672 * index. 2673 */ 2674 ir->array_index->accept(this); 2675 2676 st_src_reg index_reg; 2677 2678 if (element_size == 1) { 2679 index_reg = this->result; 2680 } else { 2681 index_reg = get_temp(native_integers ? 2682 glsl_type::int_type : glsl_type::float_type); 2683 2684 emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), 2685 this->result, st_src_reg_for_type(index_reg.type, element_size)); 2686 } 2687 2688 /* If there was already a relative address register involved, add the 2689 * new and the old together to get the new offset. 2690 */ 2691 if (!is_2D && src.reladdr != NULL) { 2692 st_src_reg accum_reg = get_temp(native_integers ? 2693 glsl_type::int_type : glsl_type::float_type); 2694 2695 emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), 2696 index_reg, *src.reladdr); 2697 2698 index_reg = accum_reg; 2699 } 2700 2701 if (is_2D) { 2702 src.reladdr2 = ralloc(mem_ctx, st_src_reg); 2703 memcpy(src.reladdr2, &index_reg, sizeof(index_reg)); 2704 src.index2D = 0; 2705 src.has_index2 = true; 2706 } else { 2707 src.reladdr = ralloc(mem_ctx, st_src_reg); 2708 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 2709 } 2710 } 2711 2712 /* Change the register type to the element type of the array. */ 2713 src.type = ir->type->base_type; 2714 2715 this->result = src; 2716 } 2717 2718 void 2719 glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) 2720 { 2721 unsigned int i; 2722 const glsl_type *struct_type = ir->record->type; 2723 ir_variable *var = ir->record->variable_referenced(); 2724 int offset = 0; 2725 2726 ir->record->accept(this); 2727 2728 assert(ir->field_idx >= 0); 2729 assert(var); 2730 for (i = 0; i < struct_type->length; i++) { 2731 if (i == (unsigned) ir->field_idx) 2732 break; 2733 const glsl_type *member_type = struct_type->fields.structure[i].type; 2734 offset += st_glsl_storage_type_size(member_type, var->data.bindless); 2735 } 2736 2737 /* If the type is smaller than a vec4, replicate the last channel out. */ 2738 if (ir->type->is_scalar() || ir->type->is_vector()) 2739 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 2740 else 2741 this->result.swizzle = SWIZZLE_NOOP; 2742 2743 this->result.index += offset; 2744 this->result.type = ir->type->base_type; 2745 } 2746 2747 /** 2748 * We want to be careful in assignment setup to hit the actual storage 2749 * instead of potentially using a temporary like we might with the 2750 * ir_dereference handler. 2751 */ 2752 static st_dst_reg 2753 get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v, int *component) 2754 { 2755 /* The LHS must be a dereference. If the LHS is a variable indexed array 2756 * access of a vector, it must be separated into a series conditional moves 2757 * before reaching this point (see ir_vec_index_to_cond_assign). 2758 */ 2759 assert(ir->as_dereference()); 2760 ir_dereference_array *deref_array = ir->as_dereference_array(); 2761 if (deref_array) { 2762 assert(!deref_array->array->type->is_vector()); 2763 } 2764 2765 /* Use the rvalue deref handler for the most part. We write swizzles using 2766 * the writemask, but we do extract the base component for enhanced layouts 2767 * from the source swizzle. 2768 */ 2769 ir->accept(v); 2770 *component = GET_SWZ(v->result.swizzle, 0); 2771 return st_dst_reg(v->result); 2772 } 2773 2774 /** 2775 * Process the condition of a conditional assignment 2776 * 2777 * Examines the condition of a conditional assignment to generate the optimal 2778 * first operand of a \c CMP instruction. If the condition is a relational 2779 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be 2780 * used as the source for the \c CMP instruction. Otherwise the comparison 2781 * is processed to a boolean result, and the boolean result is used as the 2782 * operand to the CMP instruction. 2783 */ 2784 bool 2785 glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) 2786 { 2787 ir_rvalue *src_ir = ir; 2788 bool negate = true; 2789 bool switch_order = false; 2790 2791 ir_expression *const expr = ir->as_expression(); 2792 2793 if (native_integers) { 2794 if ((expr != NULL) && (expr->num_operands == 2)) { 2795 enum glsl_base_type type = expr->operands[0]->type->base_type; 2796 if (type == GLSL_TYPE_INT || type == GLSL_TYPE_UINT || 2797 type == GLSL_TYPE_BOOL) { 2798 if (expr->operation == ir_binop_equal) { 2799 if (expr->operands[0]->is_zero()) { 2800 src_ir = expr->operands[1]; 2801 switch_order = true; 2802 } 2803 else if (expr->operands[1]->is_zero()) { 2804 src_ir = expr->operands[0]; 2805 switch_order = true; 2806 } 2807 } 2808 else if (expr->operation == ir_binop_nequal) { 2809 if (expr->operands[0]->is_zero()) { 2810 src_ir = expr->operands[1]; 2811 } 2812 else if (expr->operands[1]->is_zero()) { 2813 src_ir = expr->operands[0]; 2814 } 2815 } 2816 } 2817 } 2818 2819 src_ir->accept(this); 2820 return switch_order; 2821 } 2822 2823 if ((expr != NULL) && (expr->num_operands == 2)) { 2824 bool zero_on_left = false; 2825 2826 if (expr->operands[0]->is_zero()) { 2827 src_ir = expr->operands[1]; 2828 zero_on_left = true; 2829 } else if (expr->operands[1]->is_zero()) { 2830 src_ir = expr->operands[0]; 2831 zero_on_left = false; 2832 } 2833 2834 /* a is - 0 + - 0 + 2835 * (a < 0) T F F ( a < 0) T F F 2836 * (0 < a) F F T (-a < 0) F F T 2837 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) 2838 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) 2839 * 2840 * Note that exchanging the order of 0 and 'a' in the comparison simply 2841 * means that the value of 'a' should be negated. 2842 */ 2843 if (src_ir != ir) { 2844 switch (expr->operation) { 2845 case ir_binop_less: 2846 switch_order = false; 2847 negate = zero_on_left; 2848 break; 2849 2850 case ir_binop_gequal: 2851 switch_order = true; 2852 negate = zero_on_left; 2853 break; 2854 2855 default: 2856 /* This isn't the right kind of comparison afterall, so make sure 2857 * the whole condition is visited. 2858 */ 2859 src_ir = ir; 2860 break; 2861 } 2862 } 2863 } 2864 2865 src_ir->accept(this); 2866 2867 /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the 2868 * condition we produced is 0.0 or 1.0. By flipping the sign, we can 2869 * choose which value TGSI_OPCODE_CMP produces without an extra instruction 2870 * computing the condition. 2871 */ 2872 if (negate) 2873 this->result.negate = ~this->result.negate; 2874 2875 return switch_order; 2876 } 2877 2878 void 2879 glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *type, 2880 st_dst_reg *l, st_src_reg *r, 2881 st_src_reg *cond, bool cond_swap) 2882 { 2883 if (type->is_record()) { 2884 for (unsigned int i = 0; i < type->length; i++) { 2885 emit_block_mov(ir, type->fields.structure[i].type, l, r, 2886 cond, cond_swap); 2887 } 2888 return; 2889 } 2890 2891 if (type->is_array()) { 2892 for (unsigned int i = 0; i < type->length; i++) { 2893 emit_block_mov(ir, type->fields.array, l, r, cond, cond_swap); 2894 } 2895 return; 2896 } 2897 2898 if (type->is_matrix()) { 2899 const struct glsl_type *vec_type; 2900 2901 vec_type = glsl_type::get_instance(type->is_double() ? GLSL_TYPE_DOUBLE : GLSL_TYPE_FLOAT, 2902 type->vector_elements, 1); 2903 2904 for (int i = 0; i < type->matrix_columns; i++) { 2905 emit_block_mov(ir, vec_type, l, r, cond, cond_swap); 2906 } 2907 return; 2908 } 2909 2910 assert(type->is_scalar() || type->is_vector()); 2911 2912 l->type = type->base_type; 2913 r->type = type->base_type; 2914 if (cond) { 2915 st_src_reg l_src = st_src_reg(*l); 2916 2917 if (l_src.file == PROGRAM_OUTPUT && 2918 this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && 2919 (l_src.index == FRAG_RESULT_DEPTH || l_src.index == FRAG_RESULT_STENCIL)) { 2920 /* This is a special case because the source swizzles will be shifted 2921 * later to account for the difference between GLSL (where they're 2922 * plain floats) and TGSI (where they're Z and Y components). */ 2923 l_src.swizzle = SWIZZLE_XXXX; 2924 } 2925 2926 if (native_integers) { 2927 emit_asm(ir, TGSI_OPCODE_UCMP, *l, *cond, 2928 cond_swap ? l_src : *r, 2929 cond_swap ? *r : l_src); 2930 } else { 2931 emit_asm(ir, TGSI_OPCODE_CMP, *l, *cond, 2932 cond_swap ? l_src : *r, 2933 cond_swap ? *r : l_src); 2934 } 2935 } else { 2936 emit_asm(ir, TGSI_OPCODE_MOV, *l, *r); 2937 } 2938 l->index++; 2939 r->index++; 2940 if (type->is_dual_slot()) { 2941 l->index++; 2942 if (r->is_double_vertex_input == false) 2943 r->index++; 2944 } 2945 } 2946 2947 void 2948 glsl_to_tgsi_visitor::visit(ir_assignment *ir) 2949 { 2950 int dst_component; 2951 st_dst_reg l; 2952 st_src_reg r; 2953 2954 /* all generated instructions need to be flaged as precise */ 2955 this->precise = is_precise(ir->lhs->variable_referenced()); 2956 ir->rhs->accept(this); 2957 r = this->result; 2958 2959 l = get_assignment_lhs(ir->lhs, this, &dst_component); 2960 2961 { 2962 int swizzles[4]; 2963 int first_enabled_chan = 0; 2964 int rhs_chan = 0; 2965 ir_variable *variable = ir->lhs->variable_referenced(); 2966 2967 if (shader->Stage == MESA_SHADER_FRAGMENT && 2968 variable->data.mode == ir_var_shader_out && 2969 (variable->data.location == FRAG_RESULT_DEPTH || 2970 variable->data.location == FRAG_RESULT_STENCIL)) { 2971 assert(ir->lhs->type->is_scalar()); 2972 assert(ir->write_mask == WRITEMASK_X); 2973 2974 if (variable->data.location == FRAG_RESULT_DEPTH) 2975 l.writemask = WRITEMASK_Z; 2976 else { 2977 assert(variable->data.location == FRAG_RESULT_STENCIL); 2978 l.writemask = WRITEMASK_Y; 2979 } 2980 } else if (ir->write_mask == 0) { 2981 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 2982 2983 unsigned num_elements = ir->lhs->type->without_array()->vector_elements; 2984 2985 if (num_elements) { 2986 l.writemask = u_bit_consecutive(0, num_elements); 2987 } else { 2988 /* The type is a struct or an array of (array of) structs. */ 2989 l.writemask = WRITEMASK_XYZW; 2990 } 2991 } else { 2992 l.writemask = ir->write_mask; 2993 } 2994 2995 for (int i = 0; i < 4; i++) { 2996 if (l.writemask & (1 << i)) { 2997 first_enabled_chan = GET_SWZ(r.swizzle, i); 2998 break; 2999 } 3000 } 3001 3002 l.writemask = l.writemask << dst_component; 3003 3004 /* Swizzle a small RHS vector into the channels being written. 3005 * 3006 * glsl ir treats write_mask as dictating how many channels are 3007 * present on the RHS while TGSI treats write_mask as just 3008 * showing which channels of the vec4 RHS get written. 3009 */ 3010 for (int i = 0; i < 4; i++) { 3011 if (l.writemask & (1 << i)) 3012 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); 3013 else 3014 swizzles[i] = first_enabled_chan; 3015 } 3016 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], 3017 swizzles[2], swizzles[3]); 3018 } 3019 3020 assert(l.file != PROGRAM_UNDEFINED); 3021 assert(r.file != PROGRAM_UNDEFINED); 3022 3023 if (ir->condition) { 3024 const bool switch_order = this->process_move_condition(ir->condition); 3025 st_src_reg condition = this->result; 3026 3027 emit_block_mov(ir, ir->lhs->type, &l, &r, &condition, switch_order); 3028 } else if (ir->rhs->as_expression() && 3029 this->instructions.get_tail() && 3030 ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && 3031 !((glsl_to_tgsi_instruction *)this->instructions.get_tail())->is_64bit_expanded && 3032 type_size(ir->lhs->type) == 1 && 3033 l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst[0].writemask) { 3034 /* To avoid emitting an extra MOV when assigning an expression to a 3035 * variable, emit the last instruction of the expression again, but 3036 * replace the destination register with the target of the assignment. 3037 * Dead code elimination will remove the original instruction. 3038 */ 3039 glsl_to_tgsi_instruction *inst, *new_inst; 3040 inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 3041 new_inst = emit_asm(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2], inst->src[3]); 3042 new_inst->saturate = inst->saturate; 3043 new_inst->resource = inst->resource; 3044 inst->dead_mask = inst->dst[0].writemask; 3045 } else { 3046 emit_block_mov(ir, ir->rhs->type, &l, &r, NULL, false); 3047 } 3048 this->precise = 0; 3049 } 3050 3051 3052 void 3053 glsl_to_tgsi_visitor::visit(ir_constant *ir) 3054 { 3055 st_src_reg src; 3056 GLdouble stack_vals[4] = { 0 }; 3057 gl_constant_value *values = (gl_constant_value *) stack_vals; 3058 GLenum gl_type = GL_NONE; 3059 unsigned int i; 3060 static int in_array = 0; 3061 gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; 3062 3063 /* Unfortunately, 4 floats is all we can get into 3064 * _mesa_add_typed_unnamed_constant. So, make a temp to store an 3065 * aggregate constant and move each constant value into it. If we 3066 * get lucky, copy propagation will eliminate the extra moves. 3067 */ 3068 if (ir->type->is_record()) { 3069 st_src_reg temp_base = get_temp(ir->type); 3070 st_dst_reg temp = st_dst_reg(temp_base); 3071 3072 for (i = 0; i < ir->type->length; i++) { 3073 ir_constant *const field_value = ir->get_record_field(i); 3074 int size = type_size(field_value->type); 3075 3076 assert(size > 0); 3077 3078 field_value->accept(this); 3079 src = this->result; 3080 3081 for (unsigned j = 0; j < (unsigned int)size; j++) { 3082 emit_asm(ir, TGSI_OPCODE_MOV, temp, src); 3083 3084 src.index++; 3085 temp.index++; 3086 } 3087 } 3088 this->result = temp_base; 3089 return; 3090 } 3091 3092 if (ir->type->is_array()) { 3093 st_src_reg temp_base = get_temp(ir->type); 3094 st_dst_reg temp = st_dst_reg(temp_base); 3095 int size = type_size(ir->type->fields.array); 3096 3097 assert(size > 0); 3098 in_array++; 3099 3100 for (i = 0; i < ir->type->length; i++) { 3101 ir->const_elements[i]->accept(this); 3102 src = this->result; 3103 for (int j = 0; j < size; j++) { 3104 emit_asm(ir, TGSI_OPCODE_MOV, temp, src); 3105 3106 src.index++; 3107 temp.index++; 3108 } 3109 } 3110 this->result = temp_base; 3111 in_array--; 3112 return; 3113 } 3114 3115 if (ir->type->is_matrix()) { 3116 st_src_reg mat = get_temp(ir->type); 3117 st_dst_reg mat_column = st_dst_reg(mat); 3118 3119 for (i = 0; i < ir->type->matrix_columns; i++) { 3120 switch (ir->type->base_type) { 3121 case GLSL_TYPE_FLOAT: 3122 values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements]; 3123 3124 src = st_src_reg(file, -1, ir->type->base_type); 3125 src.index = add_constant(file, 3126 values, 3127 ir->type->vector_elements, 3128 GL_FLOAT, 3129 &src.swizzle); 3130 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); 3131 break; 3132 case GLSL_TYPE_DOUBLE: 3133 values = (gl_constant_value *) &ir->value.d[i * ir->type->vector_elements]; 3134 src = st_src_reg(file, -1, ir->type->base_type); 3135 src.index = add_constant(file, 3136 values, 3137 ir->type->vector_elements, 3138 GL_DOUBLE, 3139 &src.swizzle); 3140 if (ir->type->vector_elements >= 2) { 3141 mat_column.writemask = WRITEMASK_XY; 3142 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y); 3143 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); 3144 } else { 3145 mat_column.writemask = WRITEMASK_X; 3146 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X); 3147 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); 3148 } 3149 src.index++; 3150 if (ir->type->vector_elements > 2) { 3151 if (ir->type->vector_elements == 4) { 3152 mat_column.writemask = WRITEMASK_ZW; 3153 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y); 3154 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); 3155 } else { 3156 mat_column.writemask = WRITEMASK_Z; 3157 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y); 3158 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); 3159 mat_column.writemask = WRITEMASK_XYZW; 3160 src.swizzle = SWIZZLE_XYZW; 3161 } 3162 mat_column.index++; 3163 } 3164 break; 3165 default: 3166 unreachable("Illegal matrix constant type.\n"); 3167 break; 3168 } 3169 mat_column.index++; 3170 } 3171 this->result = mat; 3172 return; 3173 } 3174 3175 switch (ir->type->base_type) { 3176 case GLSL_TYPE_FLOAT: 3177 gl_type = GL_FLOAT; 3178 for (i = 0; i < ir->type->vector_elements; i++) { 3179 values[i].f = ir->value.f[i]; 3180 } 3181 break; 3182 case GLSL_TYPE_DOUBLE: 3183 gl_type = GL_DOUBLE; 3184 for (i = 0; i < ir->type->vector_elements; i++) { 3185 memcpy(&values[i * 2], &ir->value.d[i], sizeof(double)); 3186 } 3187 break; 3188 case GLSL_TYPE_INT64: 3189 gl_type = GL_INT64_ARB; 3190 for (i = 0; i < ir->type->vector_elements; i++) { 3191 memcpy(&values[i * 2], &ir->value.d[i], sizeof(int64_t)); 3192 } 3193 break; 3194 case GLSL_TYPE_UINT64: 3195 gl_type = GL_UNSIGNED_INT64_ARB; 3196 for (i = 0; i < ir->type->vector_elements; i++) { 3197 memcpy(&values[i * 2], &ir->value.d[i], sizeof(uint64_t)); 3198 } 3199 break; 3200 case GLSL_TYPE_UINT: 3201 gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT; 3202 for (i = 0; i < ir->type->vector_elements; i++) { 3203 if (native_integers) 3204 values[i].u = ir->value.u[i]; 3205 else 3206 values[i].f = ir->value.u[i]; 3207 } 3208 break; 3209 case GLSL_TYPE_INT: 3210 gl_type = native_integers ? GL_INT : GL_FLOAT; 3211 for (i = 0; i < ir->type->vector_elements; i++) { 3212 if (native_integers) 3213 values[i].i = ir->value.i[i]; 3214 else 3215 values[i].f = ir->value.i[i]; 3216 } 3217 break; 3218 case GLSL_TYPE_BOOL: 3219 gl_type = native_integers ? GL_BOOL : GL_FLOAT; 3220 for (i = 0; i < ir->type->vector_elements; i++) { 3221 values[i].u = ir->value.b[i] ? ctx->Const.UniformBooleanTrue : 0; 3222 } 3223 break; 3224 default: 3225 assert(!"Non-float/uint/int/bool constant"); 3226 } 3227 3228 this->result = st_src_reg(file, -1, ir->type); 3229 this->result.index = add_constant(file, 3230 values, 3231 ir->type->vector_elements, 3232 gl_type, 3233 &this->result.swizzle); 3234 } 3235 3236 void 3237 glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) 3238 { 3239 exec_node *param = ir->actual_parameters.get_head(); 3240 ir_dereference *deref = static_cast<ir_dereference *>(param); 3241 ir_variable *location = deref->variable_referenced(); 3242 bool has_hw_atomics = st_context(ctx)->has_hw_atomics; 3243 /* Calculate the surface offset */ 3244 st_src_reg offset; 3245 unsigned array_size = 0, base = 0; 3246 uint16_t index = 0; 3247 st_src_reg resource; 3248 3249 get_deref_offsets(deref, &array_size, &base, &index, &offset, false); 3250 3251 if (has_hw_atomics) { 3252 variable_storage *entry = find_variable_storage(location); 3253 st_src_reg buffer(PROGRAM_HW_ATOMIC, 0, GLSL_TYPE_ATOMIC_UINT, location->data.binding); 3254 3255 if (!entry) { 3256 entry = new(mem_ctx) variable_storage(location, PROGRAM_HW_ATOMIC, 3257 num_atomics); 3258 _mesa_hash_table_insert(this->variables, location, entry); 3259 3260 atomic_info[num_atomics].location = location->data.location; 3261 atomic_info[num_atomics].binding = location->data.binding; 3262 atomic_info[num_atomics].size = location->type->arrays_of_arrays_size(); 3263 if (atomic_info[num_atomics].size == 0) 3264 atomic_info[num_atomics].size = 1; 3265 atomic_info[num_atomics].array_id = 0; 3266 num_atomics++; 3267 } 3268 3269 if (offset.file != PROGRAM_UNDEFINED) { 3270 if (atomic_info[entry->index].array_id == 0) { 3271 num_atomic_arrays++; 3272 atomic_info[entry->index].array_id = num_atomic_arrays; 3273 } 3274 buffer.array_id = atomic_info[entry->index].array_id; 3275 } 3276 3277 buffer.index = index; 3278 buffer.index += location->data.offset / ATOMIC_COUNTER_SIZE; 3279 buffer.has_index2 = true; 3280 3281 if (offset.file != PROGRAM_UNDEFINED) { 3282 buffer.reladdr = ralloc(mem_ctx, st_src_reg); 3283 *buffer.reladdr = offset; 3284 emit_arl(ir, sampler_reladdr, offset); 3285 } 3286 offset = st_src_reg_for_int(0); 3287 3288 resource = buffer; 3289 } else { 3290 st_src_reg buffer(PROGRAM_BUFFER, location->data.binding, 3291 GLSL_TYPE_ATOMIC_UINT); 3292 3293 if (offset.file != PROGRAM_UNDEFINED) { 3294 emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset), 3295 offset, st_src_reg_for_int(ATOMIC_COUNTER_SIZE)); 3296 emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset), 3297 offset, st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE)); 3298 } else { 3299 offset = st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE); 3300 } 3301 resource = buffer; 3302 } 3303 3304 ir->return_deref->accept(this); 3305 st_dst_reg dst(this->result); 3306 dst.writemask = WRITEMASK_X; 3307 3308 glsl_to_tgsi_instruction *inst; 3309 3310 if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_read) { 3311 inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, offset); 3312 } else if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_increment) { 3313 inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset, 3314 st_src_reg_for_int(1)); 3315 } else if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_predecrement) { 3316 inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset, 3317 st_src_reg_for_int(-1)); 3318 emit_asm(ir, TGSI_OPCODE_ADD, dst, this->result, st_src_reg_for_int(-1)); 3319 } else { 3320 param = param->get_next(); 3321 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); 3322 val->accept(this); 3323 3324 st_src_reg data = this->result, data2 = undef_src; 3325 unsigned opcode; 3326 switch (ir->callee->intrinsic_id) { 3327 case ir_intrinsic_atomic_counter_add: 3328 opcode = TGSI_OPCODE_ATOMUADD; 3329 break; 3330 case ir_intrinsic_atomic_counter_min: 3331 opcode = TGSI_OPCODE_ATOMIMIN; 3332 break; 3333 case ir_intrinsic_atomic_counter_max: 3334 opcode = TGSI_OPCODE_ATOMIMAX; 3335 break; 3336 case ir_intrinsic_atomic_counter_and: 3337 opcode = TGSI_OPCODE_ATOMAND; 3338 break; 3339 case ir_intrinsic_atomic_counter_or: 3340 opcode = TGSI_OPCODE_ATOMOR; 3341 break; 3342 case ir_intrinsic_atomic_counter_xor: 3343 opcode = TGSI_OPCODE_ATOMXOR; 3344 break; 3345 case ir_intrinsic_atomic_counter_exchange: 3346 opcode = TGSI_OPCODE_ATOMXCHG; 3347 break; 3348 case ir_intrinsic_atomic_counter_comp_swap: { 3349 opcode = TGSI_OPCODE_ATOMCAS; 3350 param = param->get_next(); 3351 val = ((ir_instruction *)param)->as_rvalue(); 3352 val->accept(this); 3353 data2 = this->result; 3354 break; 3355 } 3356 default: 3357 assert(!"Unexpected intrinsic"); 3358 return; 3359 } 3360 3361 inst = emit_asm(ir, opcode, dst, offset, data, data2); 3362 } 3363 3364 inst->resource = resource; 3365 } 3366 3367 void 3368 glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir) 3369 { 3370 exec_node *param = ir->actual_parameters.get_head(); 3371 3372 ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); 3373 3374 param = param->get_next(); 3375 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); 3376 3377 ir_constant *const_block = block->as_constant(); 3378 int buf_base = st_context(ctx)->has_hw_atomics ? 0 : ctx->Const.Program[shader->Stage].MaxAtomicBuffers; 3379 st_src_reg buffer( 3380 PROGRAM_BUFFER, 3381 buf_base + (const_block ? const_block->value.u[0] : 0), 3382 GLSL_TYPE_UINT); 3383 3384 if (!const_block) { 3385 block->accept(this); 3386 buffer.reladdr = ralloc(mem_ctx, st_src_reg); 3387 *buffer.reladdr = this->result; 3388 emit_arl(ir, sampler_reladdr, this->result); 3389 } 3390 3391 /* Calculate the surface offset */ 3392 offset->accept(this); 3393 st_src_reg off = this->result; 3394 3395 st_dst_reg dst = undef_dst; 3396 if (ir->return_deref) { 3397 ir->return_deref->accept(this); 3398 dst = st_dst_reg(this->result); 3399 dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1; 3400 } 3401 3402 glsl_to_tgsi_instruction *inst; 3403 3404 if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_load) { 3405 inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off); 3406 if (dst.type == GLSL_TYPE_BOOL) 3407 emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst), st_src_reg_for_int(0)); 3408 } else if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_store) { 3409 param = param->get_next(); 3410 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); 3411 val->accept(this); 3412 3413 param = param->get_next(); 3414 ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); 3415 assert(write_mask); 3416 dst.writemask = write_mask->value.u[0]; 3417 3418 dst.type = this->result.type; 3419 inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result); 3420 } else { 3421 param = param->get_next(); 3422 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); 3423 val->accept(this); 3424 3425 st_src_reg data = this->result, data2 = undef_src; 3426 unsigned opcode; 3427 switch (ir->callee->intrinsic_id) { 3428 case ir_intrinsic_ssbo_atomic_add: 3429 opcode = TGSI_OPCODE_ATOMUADD; 3430 break; 3431 case ir_intrinsic_ssbo_atomic_min: 3432 opcode = TGSI_OPCODE_ATOMIMIN; 3433 break; 3434 case ir_intrinsic_ssbo_atomic_max: 3435 opcode = TGSI_OPCODE_ATOMIMAX; 3436 break; 3437 case ir_intrinsic_ssbo_atomic_and: 3438 opcode = TGSI_OPCODE_ATOMAND; 3439 break; 3440 case ir_intrinsic_ssbo_atomic_or: 3441 opcode = TGSI_OPCODE_ATOMOR; 3442 break; 3443 case ir_intrinsic_ssbo_atomic_xor: 3444 opcode = TGSI_OPCODE_ATOMXOR; 3445 break; 3446 case ir_intrinsic_ssbo_atomic_exchange: 3447 opcode = TGSI_OPCODE_ATOMXCHG; 3448 break; 3449 case ir_intrinsic_ssbo_atomic_comp_swap: 3450 opcode = TGSI_OPCODE_ATOMCAS; 3451 param = param->get_next(); 3452 val = ((ir_instruction *)param)->as_rvalue(); 3453 val->accept(this); 3454 data2 = this->result; 3455 break; 3456 default: 3457 assert(!"Unexpected intrinsic"); 3458 return; 3459 } 3460 3461 inst = emit_asm(ir, opcode, dst, off, data, data2); 3462 } 3463 3464 param = param->get_next(); 3465 ir_constant *access = NULL; 3466 if (!param->is_tail_sentinel()) { 3467 access = ((ir_instruction *)param)->as_constant(); 3468 assert(access); 3469 } 3470 3471 add_buffer_to_load_and_stores(inst, &buffer, &this->instructions, access); 3472 } 3473 3474 void 3475 glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir) 3476 { 3477 switch (ir->callee->intrinsic_id) { 3478 case ir_intrinsic_memory_barrier: 3479 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, 3480 st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER | 3481 TGSI_MEMBAR_ATOMIC_BUFFER | 3482 TGSI_MEMBAR_SHADER_IMAGE | 3483 TGSI_MEMBAR_SHARED)); 3484 break; 3485 case ir_intrinsic_memory_barrier_atomic_counter: 3486 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, 3487 st_src_reg_for_int(TGSI_MEMBAR_ATOMIC_BUFFER)); 3488 break; 3489 case ir_intrinsic_memory_barrier_buffer: 3490 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, 3491 st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER)); 3492 break; 3493 case ir_intrinsic_memory_barrier_image: 3494 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, 3495 st_src_reg_for_int(TGSI_MEMBAR_SHADER_IMAGE)); 3496 break; 3497 case ir_intrinsic_memory_barrier_shared: 3498 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, 3499 st_src_reg_for_int(TGSI_MEMBAR_SHARED)); 3500 break; 3501 case ir_intrinsic_group_memory_barrier: 3502 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, 3503 st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER | 3504 TGSI_MEMBAR_ATOMIC_BUFFER | 3505 TGSI_MEMBAR_SHADER_IMAGE | 3506 TGSI_MEMBAR_SHARED | 3507 TGSI_MEMBAR_THREAD_GROUP)); 3508 break; 3509 default: 3510 assert(!"Unexpected memory barrier intrinsic"); 3511 } 3512 } 3513 3514 void 3515 glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir) 3516 { 3517 exec_node *param = ir->actual_parameters.get_head(); 3518 3519 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); 3520 3521 st_src_reg buffer(PROGRAM_MEMORY, 0, GLSL_TYPE_UINT); 3522 3523 /* Calculate the surface offset */ 3524 offset->accept(this); 3525 st_src_reg off = this->result; 3526 3527 st_dst_reg dst = undef_dst; 3528 if (ir->return_deref) { 3529 ir->return_deref->accept(this); 3530 dst = st_dst_reg(this->result); 3531 dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1; 3532 } 3533 3534 glsl_to_tgsi_instruction *inst; 3535 3536 if (ir->callee->intrinsic_id == ir_intrinsic_shared_load) { 3537 inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off); 3538 inst->resource = buffer; 3539 } else if (ir->callee->intrinsic_id == ir_intrinsic_shared_store) { 3540 param = param->get_next(); 3541 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); 3542 val->accept(this); 3543 3544 param = param->get_next(); 3545 ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); 3546 assert(write_mask); 3547 dst.writemask = write_mask->value.u[0]; 3548 3549 dst.type = this->result.type; 3550 inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result); 3551 inst->resource = buffer; 3552 } else { 3553 param = param->get_next(); 3554 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); 3555 val->accept(this); 3556 3557 st_src_reg data = this->result, data2 = undef_src; 3558 unsigned opcode; 3559 switch (ir->callee->intrinsic_id) { 3560 case ir_intrinsic_shared_atomic_add: 3561 opcode = TGSI_OPCODE_ATOMUADD; 3562 break; 3563 case ir_intrinsic_shared_atomic_min: 3564 opcode = TGSI_OPCODE_ATOMIMIN; 3565 break; 3566 case ir_intrinsic_shared_atomic_max: 3567 opcode = TGSI_OPCODE_ATOMIMAX; 3568 break; 3569 case ir_intrinsic_shared_atomic_and: 3570 opcode = TGSI_OPCODE_ATOMAND; 3571 break; 3572 case ir_intrinsic_shared_atomic_or: 3573 opcode = TGSI_OPCODE_ATOMOR; 3574 break; 3575 case ir_intrinsic_shared_atomic_xor: 3576 opcode = TGSI_OPCODE_ATOMXOR; 3577 break; 3578 case ir_intrinsic_shared_atomic_exchange: 3579 opcode = TGSI_OPCODE_ATOMXCHG; 3580 break; 3581 case ir_intrinsic_shared_atomic_comp_swap: 3582 opcode = TGSI_OPCODE_ATOMCAS; 3583 param = param->get_next(); 3584 val = ((ir_instruction *)param)->as_rvalue(); 3585 val->accept(this); 3586 data2 = this->result; 3587 break; 3588 default: 3589 assert(!"Unexpected intrinsic"); 3590 return; 3591 } 3592 3593 inst = emit_asm(ir, opcode, dst, off, data, data2); 3594 inst->resource = buffer; 3595 } 3596 } 3597 3598 static void 3599 get_image_qualifiers(ir_dereference *ir, const glsl_type **type, 3600 bool *memory_coherent, bool *memory_volatile, 3601 bool *memory_restrict, unsigned *image_format) 3602 { 3603 3604 switch (ir->ir_type) { 3605 case ir_type_dereference_record: { 3606 ir_dereference_record *deref_record = ir->as_dereference_record(); 3607 const glsl_type *struct_type = deref_record->record->type; 3608 int fild_idx = deref_record->field_idx; 3609 3610 *type = struct_type->fields.structure[fild_idx].type->without_array(); 3611 *memory_coherent = 3612 struct_type->fields.structure[fild_idx].memory_coherent; 3613 *memory_volatile = 3614 struct_type->fields.structure[fild_idx].memory_volatile; 3615 *memory_restrict = 3616 struct_type->fields.structure[fild_idx].memory_restrict; 3617 *image_format = 3618 struct_type->fields.structure[fild_idx].image_format; 3619 break; 3620 } 3621 3622 case ir_type_dereference_array: { 3623 ir_dereference_array *deref_arr = ir->as_dereference_array(); 3624 get_image_qualifiers((ir_dereference *)deref_arr->array, type, 3625 memory_coherent, memory_volatile, memory_restrict, 3626 image_format); 3627 break; 3628 } 3629 3630 case ir_type_dereference_variable: { 3631 ir_variable *var = ir->variable_referenced(); 3632 3633 *type = var->type->without_array(); 3634 *memory_coherent = var->data.memory_coherent; 3635 *memory_volatile = var->data.memory_volatile; 3636 *memory_restrict = var->data.memory_restrict; 3637 *image_format = var->data.image_format; 3638 break; 3639 } 3640 3641 default: 3642 break; 3643 } 3644 } 3645 3646 void 3647 glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) 3648 { 3649 exec_node *param = ir->actual_parameters.get_head(); 3650 3651 ir_dereference *img = (ir_dereference *)param; 3652 const ir_variable *imgvar = img->variable_referenced(); 3653 unsigned sampler_array_size = 1, sampler_base = 0; 3654 bool memory_coherent = false, memory_volatile = false, memory_restrict = false; 3655 unsigned image_format = 0; 3656 const glsl_type *type = NULL; 3657 3658 get_image_qualifiers(img, &type, &memory_coherent, &memory_volatile, 3659 &memory_restrict, &image_format); 3660 3661 st_src_reg reladdr; 3662 st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT); 3663 uint16_t index = 0; 3664 get_deref_offsets(img, &sampler_array_size, &sampler_base, 3665 &index, &reladdr, !imgvar->contains_bindless()); 3666 3667 image.index = index; 3668 if (reladdr.file != PROGRAM_UNDEFINED) { 3669 image.reladdr = ralloc(mem_ctx, st_src_reg); 3670 *image.reladdr = reladdr; 3671 emit_arl(ir, sampler_reladdr, reladdr); 3672 } 3673 3674 st_dst_reg dst = undef_dst; 3675 if (ir->return_deref) { 3676 ir->return_deref->accept(this); 3677 dst = st_dst_reg(this->result); 3678 dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1; 3679 } 3680 3681 glsl_to_tgsi_instruction *inst; 3682 3683 st_src_reg bindless; 3684 if (imgvar->contains_bindless()) { 3685 img->accept(this); 3686 bindless = this->result; 3687 } 3688 3689 if (ir->callee->intrinsic_id == ir_intrinsic_image_size) { 3690 dst.writemask = WRITEMASK_XYZ; 3691 inst = emit_asm(ir, TGSI_OPCODE_RESQ, dst); 3692 } else if (ir->callee->intrinsic_id == ir_intrinsic_image_samples) { 3693 st_src_reg res = get_temp(glsl_type::ivec4_type); 3694 st_dst_reg dstres = st_dst_reg(res); 3695 dstres.writemask = WRITEMASK_W; 3696 inst = emit_asm(ir, TGSI_OPCODE_RESQ, dstres); 3697 res.swizzle = SWIZZLE_WWWW; 3698 emit_asm(ir, TGSI_OPCODE_MOV, dst, res); 3699 } else { 3700 st_src_reg arg1 = undef_src, arg2 = undef_src; 3701 st_src_reg coord; 3702 st_dst_reg coord_dst; 3703 coord = get_temp(glsl_type::ivec4_type); 3704 coord_dst = st_dst_reg(coord); 3705 coord_dst.writemask = (1 << type->coordinate_components()) - 1; 3706 param = param->get_next(); 3707 ((ir_dereference *)param)->accept(this); 3708 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 3709 coord.swizzle = SWIZZLE_XXXX; 3710 switch (type->coordinate_components()) { 3711 case 4: assert(!"unexpected coord count"); 3712 /* fallthrough */ 3713 case 3: coord.swizzle |= SWIZZLE_Z << 6; 3714 /* fallthrough */ 3715 case 2: coord.swizzle |= SWIZZLE_Y << 3; 3716 } 3717 3718 if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) { 3719 param = param->get_next(); 3720 ((ir_dereference *)param)->accept(this); 3721 st_src_reg sample = this->result; 3722 sample.swizzle = SWIZZLE_XXXX; 3723 coord_dst.writemask = WRITEMASK_W; 3724 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample); 3725 coord.swizzle |= SWIZZLE_W << 9; 3726 } 3727 3728 param = param->get_next(); 3729 if (!param->is_tail_sentinel()) { 3730 ((ir_dereference *)param)->accept(this); 3731 arg1 = this->result; 3732 param = param->get_next(); 3733 } 3734 3735 if (!param->is_tail_sentinel()) { 3736 ((ir_dereference *)param)->accept(this); 3737 arg2 = this->result; 3738 param = param->get_next(); 3739 } 3740 3741 assert(param->is_tail_sentinel()); 3742 3743 unsigned opcode; 3744 switch (ir->callee->intrinsic_id) { 3745 case ir_intrinsic_image_load: 3746 opcode = TGSI_OPCODE_LOAD; 3747 break; 3748 case ir_intrinsic_image_store: 3749 opcode = TGSI_OPCODE_STORE; 3750 break; 3751 case ir_intrinsic_image_atomic_add: 3752 opcode = TGSI_OPCODE_ATOMUADD; 3753 break; 3754 case ir_intrinsic_image_atomic_min: 3755 opcode = TGSI_OPCODE_ATOMIMIN; 3756 break; 3757 case ir_intrinsic_image_atomic_max: 3758 opcode = TGSI_OPCODE_ATOMIMAX; 3759 break; 3760 case ir_intrinsic_image_atomic_and: 3761 opcode = TGSI_OPCODE_ATOMAND; 3762 break; 3763 case ir_intrinsic_image_atomic_or: 3764 opcode = TGSI_OPCODE_ATOMOR; 3765 break; 3766 case ir_intrinsic_image_atomic_xor: 3767 opcode = TGSI_OPCODE_ATOMXOR; 3768 break; 3769 case ir_intrinsic_image_atomic_exchange: 3770 opcode = TGSI_OPCODE_ATOMXCHG; 3771 break; 3772 case ir_intrinsic_image_atomic_comp_swap: 3773 opcode = TGSI_OPCODE_ATOMCAS; 3774 break; 3775 default: 3776 assert(!"Unexpected intrinsic"); 3777 return; 3778 } 3779 3780 inst = emit_asm(ir, opcode, dst, coord, arg1, arg2); 3781 if (opcode == TGSI_OPCODE_STORE) 3782 inst->dst[0].writemask = WRITEMASK_XYZW; 3783 } 3784 3785 if (imgvar->contains_bindless()) { 3786 inst->resource = bindless; 3787 inst->resource.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, 3788 SWIZZLE_X, SWIZZLE_Y); 3789 } else { 3790 inst->resource = image; 3791 inst->sampler_array_size = sampler_array_size; 3792 inst->sampler_base = sampler_base; 3793 } 3794 3795 inst->tex_target = type->sampler_index(); 3796 inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx), 3797 _mesa_get_shader_image_format(image_format)); 3798 3799 if (memory_coherent) 3800 inst->buffer_access |= TGSI_MEMORY_COHERENT; 3801 if (memory_restrict) 3802 inst->buffer_access |= TGSI_MEMORY_RESTRICT; 3803 if (memory_volatile) 3804 inst->buffer_access |= TGSI_MEMORY_VOLATILE; 3805 } 3806 3807 void 3808 glsl_to_tgsi_visitor::visit_generic_intrinsic(ir_call *ir, unsigned op) 3809 { 3810 ir->return_deref->accept(this); 3811 st_dst_reg dst = st_dst_reg(this->result); 3812 3813 dst.writemask = u_bit_consecutive(0, ir->return_deref->var->type->vector_elements); 3814 3815 st_src_reg src[4] = { undef_src, undef_src, undef_src, undef_src }; 3816 unsigned num_src = 0; 3817 foreach_in_list(ir_rvalue, param, &ir->actual_parameters) { 3818 assert(num_src < ARRAY_SIZE(src)); 3819 3820 this->result.file = PROGRAM_UNDEFINED; 3821 param->accept(this); 3822 assert(this->result.file != PROGRAM_UNDEFINED); 3823 3824 src[num_src] = this->result; 3825 num_src++; 3826 } 3827 3828 emit_asm(ir, op, dst, src[0], src[1], src[2], src[3]); 3829 } 3830 3831 void 3832 glsl_to_tgsi_visitor::visit(ir_call *ir) 3833 { 3834 ir_function_signature *sig = ir->callee; 3835 3836 /* Filter out intrinsics */ 3837 switch (sig->intrinsic_id) { 3838 case ir_intrinsic_atomic_counter_read: 3839 case ir_intrinsic_atomic_counter_increment: 3840 case ir_intrinsic_atomic_counter_predecrement: 3841 case ir_intrinsic_atomic_counter_add: 3842 case ir_intrinsic_atomic_counter_min: 3843 case ir_intrinsic_atomic_counter_max: 3844 case ir_intrinsic_atomic_counter_and: 3845 case ir_intrinsic_atomic_counter_or: 3846 case ir_intrinsic_atomic_counter_xor: 3847 case ir_intrinsic_atomic_counter_exchange: 3848 case ir_intrinsic_atomic_counter_comp_swap: 3849 visit_atomic_counter_intrinsic(ir); 3850 return; 3851 3852 case ir_intrinsic_ssbo_load: 3853 case ir_intrinsic_ssbo_store: 3854 case ir_intrinsic_ssbo_atomic_add: 3855 case ir_intrinsic_ssbo_atomic_min: 3856 case ir_intrinsic_ssbo_atomic_max: 3857 case ir_intrinsic_ssbo_atomic_and: 3858 case ir_intrinsic_ssbo_atomic_or: 3859 case ir_intrinsic_ssbo_atomic_xor: 3860 case ir_intrinsic_ssbo_atomic_exchange: 3861 case ir_intrinsic_ssbo_atomic_comp_swap: 3862 visit_ssbo_intrinsic(ir); 3863 return; 3864 3865 case ir_intrinsic_memory_barrier: 3866 case ir_intrinsic_memory_barrier_atomic_counter: 3867 case ir_intrinsic_memory_barrier_buffer: 3868 case ir_intrinsic_memory_barrier_image: 3869 case ir_intrinsic_memory_barrier_shared: 3870 case ir_intrinsic_group_memory_barrier: 3871 visit_membar_intrinsic(ir); 3872 return; 3873 3874 case ir_intrinsic_shared_load: 3875 case ir_intrinsic_shared_store: 3876 case ir_intrinsic_shared_atomic_add: 3877 case ir_intrinsic_shared_atomic_min: 3878 case ir_intrinsic_shared_atomic_max: 3879 case ir_intrinsic_shared_atomic_and: 3880 case ir_intrinsic_shared_atomic_or: 3881 case ir_intrinsic_shared_atomic_xor: 3882 case ir_intrinsic_shared_atomic_exchange: 3883 case ir_intrinsic_shared_atomic_comp_swap: 3884 visit_shared_intrinsic(ir); 3885 return; 3886 3887 case ir_intrinsic_image_load: 3888 case ir_intrinsic_image_store: 3889 case ir_intrinsic_image_atomic_add: 3890 case ir_intrinsic_image_atomic_min: 3891 case ir_intrinsic_image_atomic_max: 3892 case ir_intrinsic_image_atomic_and: 3893 case ir_intrinsic_image_atomic_or: 3894 case ir_intrinsic_image_atomic_xor: 3895 case ir_intrinsic_image_atomic_exchange: 3896 case ir_intrinsic_image_atomic_comp_swap: 3897 case ir_intrinsic_image_size: 3898 case ir_intrinsic_image_samples: 3899 visit_image_intrinsic(ir); 3900 return; 3901 3902 case ir_intrinsic_shader_clock: 3903 visit_generic_intrinsic(ir, TGSI_OPCODE_CLOCK); 3904 return; 3905 3906 case ir_intrinsic_vote_all: 3907 visit_generic_intrinsic(ir, TGSI_OPCODE_VOTE_ALL); 3908 return; 3909 case ir_intrinsic_vote_any: 3910 visit_generic_intrinsic(ir, TGSI_OPCODE_VOTE_ANY); 3911 return; 3912 case ir_intrinsic_vote_eq: 3913 visit_generic_intrinsic(ir, TGSI_OPCODE_VOTE_EQ); 3914 return; 3915 case ir_intrinsic_ballot: 3916 visit_generic_intrinsic(ir, TGSI_OPCODE_BALLOT); 3917 return; 3918 case ir_intrinsic_read_first_invocation: 3919 visit_generic_intrinsic(ir, TGSI_OPCODE_READ_FIRST); 3920 return; 3921 case ir_intrinsic_read_invocation: 3922 visit_generic_intrinsic(ir, TGSI_OPCODE_READ_INVOC); 3923 return; 3924 3925 case ir_intrinsic_invalid: 3926 case ir_intrinsic_generic_load: 3927 case ir_intrinsic_generic_store: 3928 case ir_intrinsic_generic_atomic_add: 3929 case ir_intrinsic_generic_atomic_and: 3930 case ir_intrinsic_generic_atomic_or: 3931 case ir_intrinsic_generic_atomic_xor: 3932 case ir_intrinsic_generic_atomic_min: 3933 case ir_intrinsic_generic_atomic_max: 3934 case ir_intrinsic_generic_atomic_exchange: 3935 case ir_intrinsic_generic_atomic_comp_swap: 3936 unreachable("Invalid intrinsic"); 3937 } 3938 } 3939 3940 void 3941 glsl_to_tgsi_visitor::calc_deref_offsets(ir_dereference *tail, 3942 unsigned *array_elements, 3943 uint16_t *index, 3944 st_src_reg *indirect, 3945 unsigned *location) 3946 { 3947 switch (tail->ir_type) { 3948 case ir_type_dereference_record: { 3949 ir_dereference_record *deref_record = tail->as_dereference_record(); 3950 const glsl_type *struct_type = deref_record->record->type; 3951 int field_index = deref_record->field_idx; 3952 3953 calc_deref_offsets(deref_record->record->as_dereference(), array_elements, index, indirect, location); 3954 3955 assert(field_index >= 0); 3956 *location += struct_type->record_location_offset(field_index); 3957 break; 3958 } 3959 3960 case ir_type_dereference_array: { 3961 ir_dereference_array *deref_arr = tail->as_dereference_array(); 3962 3963 void *mem_ctx = ralloc_parent(deref_arr); 3964 ir_constant *array_index = 3965 deref_arr->array_index->constant_expression_value(mem_ctx); 3966 3967 if (!array_index) { 3968 st_src_reg temp_reg; 3969 st_dst_reg temp_dst; 3970 3971 temp_reg = get_temp(glsl_type::uint_type); 3972 temp_dst = st_dst_reg(temp_reg); 3973 temp_dst.writemask = 1; 3974 3975 deref_arr->array_index->accept(this); 3976 if (*array_elements != 1) 3977 emit_asm(NULL, TGSI_OPCODE_MUL, temp_dst, this->result, st_src_reg_for_int(*array_elements)); 3978 else 3979 emit_asm(NULL, TGSI_OPCODE_MOV, temp_dst, this->result); 3980 3981 if (indirect->file == PROGRAM_UNDEFINED) 3982 *indirect = temp_reg; 3983 else { 3984 temp_dst = st_dst_reg(*indirect); 3985 temp_dst.writemask = 1; 3986 emit_asm(NULL, TGSI_OPCODE_ADD, temp_dst, *indirect, temp_reg); 3987 } 3988 } else 3989 *index += array_index->value.u[0] * *array_elements; 3990 3991 *array_elements *= deref_arr->array->type->length; 3992 3993 calc_deref_offsets(deref_arr->array->as_dereference(), array_elements, index, indirect, location); 3994 break; 3995 } 3996 default: 3997 break; 3998 } 3999 } 4000 4001 void 4002 glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir, 4003 unsigned *array_size, 4004 unsigned *base, 4005 uint16_t *index, 4006 st_src_reg *reladdr, 4007 bool opaque) 4008 { 4009 GLuint shader = _mesa_program_enum_to_shader_stage(this->prog->Target); 4010 unsigned location = 0; 4011 ir_variable *var = ir->variable_referenced(); 4012 4013 memset(reladdr, 0, sizeof(*reladdr)); 4014 reladdr->file = PROGRAM_UNDEFINED; 4015 4016 *base = 0; 4017 *array_size = 1; 4018 4019 assert(var); 4020 location = var->data.location; 4021 calc_deref_offsets(ir, array_size, index, reladdr, &location); 4022 4023 /* 4024 * If we end up with no indirect then adjust the base to the index, 4025 * and set the array size to 1. 4026 */ 4027 if (reladdr->file == PROGRAM_UNDEFINED) { 4028 *base = *index; 4029 *array_size = 1; 4030 } 4031 4032 if (opaque) { 4033 assert(location != 0xffffffff); 4034 *base += this->shader_program->data->UniformStorage[location].opaque[shader].index; 4035 *index += this->shader_program->data->UniformStorage[location].opaque[shader].index; 4036 } 4037 } 4038 4039 st_src_reg 4040 glsl_to_tgsi_visitor::canonicalize_gather_offset(st_src_reg offset) 4041 { 4042 if (offset.reladdr || offset.reladdr2) { 4043 st_src_reg tmp = get_temp(glsl_type::ivec2_type); 4044 st_dst_reg tmp_dst = st_dst_reg(tmp); 4045 tmp_dst.writemask = WRITEMASK_XY; 4046 emit_asm(NULL, TGSI_OPCODE_MOV, tmp_dst, offset); 4047 return tmp; 4048 } 4049 4050 return offset; 4051 } 4052 4053 void 4054 glsl_to_tgsi_visitor::visit(ir_texture *ir) 4055 { 4056 st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy; 4057 st_src_reg offset[MAX_GLSL_TEXTURE_OFFSET], sample_index, component; 4058 st_src_reg levels_src, reladdr; 4059 st_dst_reg result_dst, coord_dst, cube_sc_dst; 4060 glsl_to_tgsi_instruction *inst = NULL; 4061 unsigned opcode = TGSI_OPCODE_NOP; 4062 const glsl_type *sampler_type = ir->sampler->type; 4063 unsigned sampler_array_size = 1, sampler_base = 0; 4064 bool is_cube_array = false, is_cube_shadow = false; 4065 ir_variable *var = ir->sampler->variable_referenced(); 4066 unsigned i; 4067 4068 /* if we are a cube array sampler or a cube shadow */ 4069 if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) { 4070 is_cube_array = sampler_type->sampler_array; 4071 is_cube_shadow = sampler_type->sampler_shadow; 4072 } 4073 4074 if (ir->coordinate) { 4075 ir->coordinate->accept(this); 4076 4077 /* Put our coords in a temp. We'll need to modify them for shadow, 4078 * projection, or LOD, so the only case we'd use it as-is is if 4079 * we're doing plain old texturing. The optimization passes on 4080 * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. 4081 */ 4082 coord = get_temp(glsl_type::vec4_type); 4083 coord_dst = st_dst_reg(coord); 4084 coord_dst.writemask = (1 << ir->coordinate->type->vector_elements) - 1; 4085 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 4086 } 4087 4088 if (ir->projector) { 4089 ir->projector->accept(this); 4090 projector = this->result; 4091 } 4092 4093 /* Storage for our result. Ideally for an assignment we'd be using 4094 * the actual storage for the result here, instead. 4095 */ 4096 result_src = get_temp(ir->type); 4097 result_dst = st_dst_reg(result_src); 4098 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 4099 4100 switch (ir->op) { 4101 case ir_tex: 4102 opcode = (is_cube_array && ir->shadow_comparator) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX; 4103 if (ir->offset) { 4104 ir->offset->accept(this); 4105 offset[0] = this->result; 4106 } 4107 break; 4108 case ir_txb: 4109 if (is_cube_array || is_cube_shadow) { 4110 opcode = TGSI_OPCODE_TXB2; 4111 } 4112 else { 4113 opcode = TGSI_OPCODE_TXB; 4114 } 4115 ir->lod_info.bias->accept(this); 4116 lod_info = this->result; 4117 if (ir->offset) { 4118 ir->offset->accept(this); 4119 offset[0] = this->result; 4120 } 4121 break; 4122 case ir_txl: 4123 if (this->has_tex_txf_lz && ir->lod_info.lod->is_zero()) { 4124 opcode = TGSI_OPCODE_TEX_LZ; 4125 } else { 4126 opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL; 4127 ir->lod_info.lod->accept(this); 4128 lod_info = this->result; 4129 } 4130 if (ir->offset) { 4131 ir->offset->accept(this); 4132 offset[0] = this->result; 4133 } 4134 break; 4135 case ir_txd: 4136 opcode = TGSI_OPCODE_TXD; 4137 ir->lod_info.grad.dPdx->accept(this); 4138 dx = this->result; 4139 ir->lod_info.grad.dPdy->accept(this); 4140 dy = this->result; 4141 if (ir->offset) { 4142 ir->offset->accept(this); 4143 offset[0] = this->result; 4144 } 4145 break; 4146 case ir_txs: 4147 opcode = TGSI_OPCODE_TXQ; 4148 ir->lod_info.lod->accept(this); 4149 lod_info = this->result; 4150 break; 4151 case ir_query_levels: 4152 opcode = TGSI_OPCODE_TXQ; 4153 lod_info = undef_src; 4154 levels_src = get_temp(ir->type); 4155 break; 4156 case ir_txf: 4157 if (this->has_tex_txf_lz && ir->lod_info.lod->is_zero()) { 4158 opcode = TGSI_OPCODE_TXF_LZ; 4159 } else { 4160 opcode = TGSI_OPCODE_TXF; 4161 ir->lod_info.lod->accept(this); 4162 lod_info = this->result; 4163 } 4164 if (ir->offset) { 4165 ir->offset->accept(this); 4166 offset[0] = this->result; 4167 } 4168 break; 4169 case ir_txf_ms: 4170 opcode = TGSI_OPCODE_TXF; 4171 ir->lod_info.sample_index->accept(this); 4172 sample_index = this->result; 4173 break; 4174 case ir_tg4: 4175 opcode = TGSI_OPCODE_TG4; 4176 ir->lod_info.component->accept(this); 4177 component = this->result; 4178 if (ir->offset) { 4179 ir->offset->accept(this); 4180 if (ir->offset->type->is_array()) { 4181 const glsl_type *elt_type = ir->offset->type->fields.array; 4182 for (i = 0; i < ir->offset->type->length; i++) { 4183 offset[i] = this->result; 4184 offset[i].index += i * type_size(elt_type); 4185 offset[i].type = elt_type->base_type; 4186 offset[i].swizzle = swizzle_for_size(elt_type->vector_elements); 4187 offset[i] = canonicalize_gather_offset(offset[i]); 4188 } 4189 } else { 4190 offset[0] = canonicalize_gather_offset(this->result); 4191 } 4192 } 4193 break; 4194 case ir_lod: 4195 opcode = TGSI_OPCODE_LODQ; 4196 break; 4197 case ir_texture_samples: 4198 opcode = TGSI_OPCODE_TXQS; 4199 break; 4200 case ir_samples_identical: 4201 unreachable("Unexpected ir_samples_identical opcode"); 4202 } 4203 4204 if (ir->projector) { 4205 if (opcode == TGSI_OPCODE_TEX) { 4206 /* Slot the projector in as the last component of the coord. */ 4207 coord_dst.writemask = WRITEMASK_W; 4208 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, projector); 4209 coord_dst.writemask = WRITEMASK_XYZW; 4210 opcode = TGSI_OPCODE_TXP; 4211 } else { 4212 st_src_reg coord_w = coord; 4213 coord_w.swizzle = SWIZZLE_WWWW; 4214 4215 /* For the other TEX opcodes there's no projective version 4216 * since the last slot is taken up by LOD info. Do the 4217 * projective divide now. 4218 */ 4219 coord_dst.writemask = WRITEMASK_W; 4220 emit_asm(ir, TGSI_OPCODE_RCP, coord_dst, projector); 4221 4222 /* In the case where we have to project the coordinates "by hand," 4223 * the shadow comparator value must also be projected. 4224 */ 4225 st_src_reg tmp_src = coord; 4226 if (ir->shadow_comparator) { 4227 /* Slot the shadow value in as the second to last component of the 4228 * coord. 4229 */ 4230 ir->shadow_comparator->accept(this); 4231 4232 tmp_src = get_temp(glsl_type::vec4_type); 4233 st_dst_reg tmp_dst = st_dst_reg(tmp_src); 4234 4235 /* Projective division not allowed for array samplers. */ 4236 assert(!sampler_type->sampler_array); 4237 4238 tmp_dst.writemask = WRITEMASK_Z; 4239 emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); 4240 4241 tmp_dst.writemask = WRITEMASK_XY; 4242 emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, coord); 4243 } 4244 4245 coord_dst.writemask = WRITEMASK_XYZ; 4246 emit_asm(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); 4247 4248 coord_dst.writemask = WRITEMASK_XYZW; 4249 coord.swizzle = SWIZZLE_XYZW; 4250 } 4251 } 4252 4253 /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow 4254 * comparator was put in the correct place (and projected) by the code, 4255 * above, that handles by-hand projection. 4256 */ 4257 if (ir->shadow_comparator && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { 4258 /* Slot the shadow value in as the second to last component of the 4259 * coord. 4260 */ 4261 ir->shadow_comparator->accept(this); 4262 4263 if (is_cube_array) { 4264 cube_sc = get_temp(glsl_type::float_type); 4265 cube_sc_dst = st_dst_reg(cube_sc); 4266 cube_sc_dst.writemask = WRITEMASK_X; 4267 emit_asm(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result); 4268 cube_sc_dst.writemask = WRITEMASK_X; 4269 } 4270 else { 4271 if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && 4272 sampler_type->sampler_array) || 4273 sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) { 4274 coord_dst.writemask = WRITEMASK_W; 4275 } else { 4276 coord_dst.writemask = WRITEMASK_Z; 4277 } 4278 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 4279 coord_dst.writemask = WRITEMASK_XYZW; 4280 } 4281 } 4282 4283 if (ir->op == ir_txf_ms) { 4284 coord_dst.writemask = WRITEMASK_W; 4285 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample_index); 4286 coord_dst.writemask = WRITEMASK_XYZW; 4287 } else if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB || 4288 opcode == TGSI_OPCODE_TXF) { 4289 /* TGSI stores LOD or LOD bias in the last channel of the coords. */ 4290 coord_dst.writemask = WRITEMASK_W; 4291 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); 4292 coord_dst.writemask = WRITEMASK_XYZW; 4293 } 4294 4295 st_src_reg sampler(PROGRAM_SAMPLER, 0, GLSL_TYPE_UINT); 4296 4297 uint16_t index = 0; 4298 get_deref_offsets(ir->sampler, &sampler_array_size, &sampler_base, 4299 &index, &reladdr, !var->contains_bindless()); 4300 4301 sampler.index = index; 4302 if (reladdr.file != PROGRAM_UNDEFINED) { 4303 sampler.reladdr = ralloc(mem_ctx, st_src_reg); 4304 *sampler.reladdr = reladdr; 4305 emit_arl(ir, sampler_reladdr, reladdr); 4306 } 4307 4308 st_src_reg bindless; 4309 if (var->contains_bindless()) { 4310 ir->sampler->accept(this); 4311 bindless = this->result; 4312 } 4313 4314 if (opcode == TGSI_OPCODE_TXD) 4315 inst = emit_asm(ir, opcode, result_dst, coord, dx, dy); 4316 else if (opcode == TGSI_OPCODE_TXQ) { 4317 if (ir->op == ir_query_levels) { 4318 /* the level is stored in W */ 4319 inst = emit_asm(ir, opcode, st_dst_reg(levels_src), lod_info); 4320 result_dst.writemask = WRITEMASK_X; 4321 levels_src.swizzle = SWIZZLE_WWWW; 4322 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, levels_src); 4323 } else 4324 inst = emit_asm(ir, opcode, result_dst, lod_info); 4325 } else if (opcode == TGSI_OPCODE_TXQS) { 4326 inst = emit_asm(ir, opcode, result_dst); 4327 } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) { 4328 inst = emit_asm(ir, opcode, result_dst, coord, lod_info); 4329 } else if (opcode == TGSI_OPCODE_TEX2) { 4330 inst = emit_asm(ir, opcode, result_dst, coord, cube_sc); 4331 } else if (opcode == TGSI_OPCODE_TG4) { 4332 if (is_cube_array && ir->shadow_comparator) { 4333 inst = emit_asm(ir, opcode, result_dst, coord, cube_sc); 4334 } else { 4335 inst = emit_asm(ir, opcode, result_dst, coord, component); 4336 } 4337 } else 4338 inst = emit_asm(ir, opcode, result_dst, coord); 4339 4340 if (ir->shadow_comparator) 4341 inst->tex_shadow = GL_TRUE; 4342 4343 if (var->contains_bindless()) { 4344 inst->resource = bindless; 4345 inst->resource.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, 4346 SWIZZLE_X, SWIZZLE_Y); 4347 } else { 4348 inst->resource = sampler; 4349 inst->sampler_array_size = sampler_array_size; 4350 inst->sampler_base = sampler_base; 4351 } 4352 4353 if (ir->offset) { 4354 if (!inst->tex_offsets) 4355 inst->tex_offsets = rzalloc_array(inst, st_src_reg, MAX_GLSL_TEXTURE_OFFSET); 4356 4357 for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET && offset[i].file != PROGRAM_UNDEFINED; i++) 4358 inst->tex_offsets[i] = offset[i]; 4359 inst->tex_offset_num_offset = i; 4360 } 4361 4362 inst->tex_target = sampler_type->sampler_index(); 4363 inst->tex_type = ir->type->base_type; 4364 4365 this->result = result_src; 4366 } 4367 4368 void 4369 glsl_to_tgsi_visitor::visit(ir_return *ir) 4370 { 4371 assert(!ir->get_value()); 4372 4373 emit_asm(ir, TGSI_OPCODE_RET); 4374 } 4375 4376 void 4377 glsl_to_tgsi_visitor::visit(ir_discard *ir) 4378 { 4379 if (ir->condition) { 4380 ir->condition->accept(this); 4381 st_src_reg condition = this->result; 4382 4383 /* Convert the bool condition to a float so we can negate. */ 4384 if (native_integers) { 4385 st_src_reg temp = get_temp(ir->condition->type); 4386 emit_asm(ir, TGSI_OPCODE_AND, st_dst_reg(temp), 4387 condition, st_src_reg_for_float(1.0)); 4388 condition = temp; 4389 } 4390 4391 condition.negate = ~condition.negate; 4392 emit_asm(ir, TGSI_OPCODE_KILL_IF, undef_dst, condition); 4393 } else { 4394 /* unconditional kil */ 4395 emit_asm(ir, TGSI_OPCODE_KILL); 4396 } 4397 } 4398 4399 void 4400 glsl_to_tgsi_visitor::visit(ir_if *ir) 4401 { 4402 unsigned if_opcode; 4403 glsl_to_tgsi_instruction *if_inst; 4404 4405 ir->condition->accept(this); 4406 assert(this->result.file != PROGRAM_UNDEFINED); 4407 4408 if_opcode = native_integers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF; 4409 4410 if_inst = emit_asm(ir->condition, if_opcode, undef_dst, this->result); 4411 4412 this->instructions.push_tail(if_inst); 4413 4414 visit_exec_list(&ir->then_instructions, this); 4415 4416 if (!ir->else_instructions.is_empty()) { 4417 emit_asm(ir->condition, TGSI_OPCODE_ELSE); 4418 visit_exec_list(&ir->else_instructions, this); 4419 } 4420 4421 if_inst = emit_asm(ir->condition, TGSI_OPCODE_ENDIF); 4422 } 4423 4424 4425 void 4426 glsl_to_tgsi_visitor::visit(ir_emit_vertex *ir) 4427 { 4428 assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV); 4429 4430 ir->stream->accept(this); 4431 emit_asm(ir, TGSI_OPCODE_EMIT, undef_dst, this->result); 4432 } 4433 4434 void 4435 glsl_to_tgsi_visitor::visit(ir_end_primitive *ir) 4436 { 4437 assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV); 4438 4439 ir->stream->accept(this); 4440 emit_asm(ir, TGSI_OPCODE_ENDPRIM, undef_dst, this->result); 4441 } 4442 4443 void 4444 glsl_to_tgsi_visitor::visit(ir_barrier *ir) 4445 { 4446 assert(this->prog->Target == GL_TESS_CONTROL_PROGRAM_NV || 4447 this->prog->Target == GL_COMPUTE_PROGRAM_NV); 4448 4449 emit_asm(ir, TGSI_OPCODE_BARRIER); 4450 } 4451 4452 glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() 4453 { 4454 STATIC_ASSERT(sizeof(samplers_used) * 8 >= PIPE_MAX_SAMPLERS); 4455 4456 result.file = PROGRAM_UNDEFINED; 4457 next_temp = 1; 4458 array_sizes = NULL; 4459 max_num_arrays = 0; 4460 next_array = 0; 4461 num_inputs = 0; 4462 num_outputs = 0; 4463 num_input_arrays = 0; 4464 num_output_arrays = 0; 4465 num_atomics = 0; 4466 num_atomic_arrays = 0; 4467 num_immediates = 0; 4468 num_address_regs = 0; 4469 samplers_used = 0; 4470 images_used = 0; 4471 indirect_addr_consts = false; 4472 wpos_transform_const = -1; 4473 native_integers = false; 4474 mem_ctx = ralloc_context(NULL); 4475 ctx = NULL; 4476 prog = NULL; 4477 precise = 0; 4478 shader_program = NULL; 4479 shader = NULL; 4480 options = NULL; 4481 have_sqrt = false; 4482 have_fma = false; 4483 use_shared_memory = false; 4484 has_tex_txf_lz = false; 4485 variables = NULL; 4486 } 4487 4488 static void var_destroy(struct hash_entry *entry) 4489 { 4490 variable_storage *storage = (variable_storage *)entry->data; 4491 4492 delete storage; 4493 } 4494 4495 glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() 4496 { 4497 _mesa_hash_table_destroy(variables, var_destroy); 4498 free(array_sizes); 4499 ralloc_free(mem_ctx); 4500 } 4501 4502 extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) 4503 { 4504 delete v; 4505 } 4506 4507 4508 /** 4509 * Count resources used by the given gpu program (number of texture 4510 * samplers, etc). 4511 */ 4512 static void 4513 count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) 4514 { 4515 v->samplers_used = 0; 4516 v->images_used = 0; 4517 prog->info.textures_used_by_txf = 0; 4518 4519 foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) { 4520 if (inst->info->is_tex) { 4521 for (int i = 0; i < inst->sampler_array_size; i++) { 4522 unsigned idx = inst->sampler_base + i; 4523 v->samplers_used |= 1u << idx; 4524 4525 debug_assert(idx < (int)ARRAY_SIZE(v->sampler_types)); 4526 v->sampler_types[idx] = inst->tex_type; 4527 v->sampler_targets[idx] = 4528 st_translate_texture_target(inst->tex_target, inst->tex_shadow); 4529 4530 if (inst->op == TGSI_OPCODE_TXF || inst->op == TGSI_OPCODE_TXF_LZ) { 4531 prog->info.textures_used_by_txf |= 1u << idx; 4532 } 4533 } 4534 } 4535 4536 if (inst->tex_target == TEXTURE_EXTERNAL_INDEX) 4537 prog->ExternalSamplersUsed |= 1 << inst->resource.index; 4538 4539 if (inst->resource.file != PROGRAM_UNDEFINED && ( 4540 is_resource_instruction(inst->op) || 4541 inst->op == TGSI_OPCODE_STORE)) { 4542 if (inst->resource.file == PROGRAM_MEMORY) { 4543 v->use_shared_memory = true; 4544 } else if (inst->resource.file == PROGRAM_IMAGE) { 4545 for (int i = 0; i < inst->sampler_array_size; i++) { 4546 unsigned idx = inst->sampler_base + i; 4547 v->images_used |= 1 << idx; 4548 v->image_targets[idx] = 4549 st_translate_texture_target(inst->tex_target, false); 4550 v->image_formats[idx] = inst->image_format; 4551 } 4552 } 4553 } 4554 } 4555 prog->SamplersUsed = v->samplers_used; 4556 4557 if (v->shader_program != NULL) 4558 _mesa_update_shader_textures_used(v->shader_program, prog); 4559 } 4560 4561 /** 4562 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which 4563 * are read from the given src in this instruction 4564 */ 4565 static int 4566 get_src_arg_mask(st_dst_reg dst, st_src_reg src) 4567 { 4568 int read_mask = 0, comp; 4569 4570 /* Now, given the src swizzle and the written channels, find which 4571 * components are actually read 4572 */ 4573 for (comp = 0; comp < 4; ++comp) { 4574 const unsigned coord = GET_SWZ(src.swizzle, comp); 4575 assert(coord < 4); 4576 if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W) 4577 read_mask |= 1 << coord; 4578 } 4579 4580 return read_mask; 4581 } 4582 4583 /** 4584 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP 4585 * instruction is the first instruction to write to register T0. There are 4586 * several lowering passes done in GLSL IR (e.g. branches and 4587 * relative addressing) that create a large number of conditional assignments 4588 * that ir_to_mesa converts to CMP instructions like the one mentioned above. 4589 * 4590 * Here is why this conversion is safe: 4591 * CMP T0, T1 T2 T0 can be expanded to: 4592 * if (T1 < 0.0) 4593 * MOV T0, T2; 4594 * else 4595 * MOV T0, T0; 4596 * 4597 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same 4598 * as the original program. If (T1 < 0.0) evaluates to false, executing 4599 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. 4600 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 4601 * because any instruction that was going to read from T0 after this was going 4602 * to read a garbage value anyway. 4603 */ 4604 void 4605 glsl_to_tgsi_visitor::simplify_cmp(void) 4606 { 4607 int tempWritesSize = 0; 4608 unsigned *tempWrites = NULL; 4609 unsigned outputWrites[VARYING_SLOT_TESS_MAX]; 4610 4611 memset(outputWrites, 0, sizeof(outputWrites)); 4612 4613 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 4614 unsigned prevWriteMask = 0; 4615 4616 /* Give up if we encounter relative addressing or flow control. */ 4617 if (inst->dst[0].reladdr || inst->dst[0].reladdr2 || 4618 inst->dst[1].reladdr || inst->dst[1].reladdr2 || 4619 inst->info->is_branch || 4620 inst->op == TGSI_OPCODE_CONT || 4621 inst->op == TGSI_OPCODE_END || 4622 inst->op == TGSI_OPCODE_RET) { 4623 break; 4624 } 4625 4626 if (inst->dst[0].file == PROGRAM_OUTPUT) { 4627 assert(inst->dst[0].index < (signed)ARRAY_SIZE(outputWrites)); 4628 prevWriteMask = outputWrites[inst->dst[0].index]; 4629 outputWrites[inst->dst[0].index] |= inst->dst[0].writemask; 4630 } else if (inst->dst[0].file == PROGRAM_TEMPORARY) { 4631 if (inst->dst[0].index >= tempWritesSize) { 4632 const int inc = 4096; 4633 4634 tempWrites = (unsigned*) 4635 realloc(tempWrites, 4636 (tempWritesSize + inc) * sizeof(unsigned)); 4637 if (!tempWrites) 4638 return; 4639 4640 memset(tempWrites + tempWritesSize, 0, inc * sizeof(unsigned)); 4641 tempWritesSize += inc; 4642 } 4643 4644 prevWriteMask = tempWrites[inst->dst[0].index]; 4645 tempWrites[inst->dst[0].index] |= inst->dst[0].writemask; 4646 } else 4647 continue; 4648 4649 /* For a CMP to be considered a conditional write, the destination 4650 * register and source register two must be the same. */ 4651 if (inst->op == TGSI_OPCODE_CMP 4652 && !(inst->dst[0].writemask & prevWriteMask) 4653 && inst->src[2].file == inst->dst[0].file 4654 && inst->src[2].index == inst->dst[0].index 4655 && inst->dst[0].writemask == get_src_arg_mask(inst->dst[0], inst->src[2])) { 4656 4657 inst->op = TGSI_OPCODE_MOV; 4658 inst->info = tgsi_get_opcode_info(inst->op); 4659 inst->src[0] = inst->src[1]; 4660 } 4661 } 4662 4663 free(tempWrites); 4664 } 4665 4666 static void 4667 rename_temp_handle_src(struct rename_reg_pair *renames, st_src_reg *src) 4668 { 4669 if (src && src->file == PROGRAM_TEMPORARY) { 4670 int old_idx = src->index; 4671 if (renames[old_idx].valid) 4672 src->index = renames[old_idx].new_reg; 4673 } 4674 } 4675 4676 /* Replaces all references to a temporary register index with another index. */ 4677 void 4678 glsl_to_tgsi_visitor::rename_temp_registers(struct rename_reg_pair *renames) 4679 { 4680 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 4681 unsigned j; 4682 for (j = 0; j < num_inst_src_regs(inst); j++) { 4683 rename_temp_handle_src(renames, &inst->src[j]); 4684 rename_temp_handle_src(renames, inst->src[j].reladdr); 4685 rename_temp_handle_src(renames, inst->src[j].reladdr2); 4686 } 4687 4688 for (j = 0; j < inst->tex_offset_num_offset; j++) { 4689 rename_temp_handle_src(renames, &inst->tex_offsets[j]); 4690 rename_temp_handle_src(renames, inst->tex_offsets[j].reladdr); 4691 rename_temp_handle_src(renames, inst->tex_offsets[j].reladdr2); 4692 } 4693 4694 rename_temp_handle_src(renames, &inst->resource); 4695 rename_temp_handle_src(renames, inst->resource.reladdr); 4696 rename_temp_handle_src(renames, inst->resource.reladdr2); 4697 4698 for (j = 0; j < num_inst_dst_regs(inst); j++) { 4699 if (inst->dst[j].file == PROGRAM_TEMPORARY) { 4700 int old_idx = inst->dst[j].index; 4701 if (renames[old_idx].valid) 4702 inst->dst[j].index = renames[old_idx].new_reg; 4703 } 4704 rename_temp_handle_src(renames, inst->dst[j].reladdr); 4705 rename_temp_handle_src(renames, inst->dst[j].reladdr2); 4706 } 4707 } 4708 } 4709 4710 void 4711 glsl_to_tgsi_visitor::get_first_temp_write(int *first_writes) 4712 { 4713 int depth = 0; /* loop depth */ 4714 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 4715 unsigned i = 0, j; 4716 4717 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 4718 for (j = 0; j < num_inst_dst_regs(inst); j++) { 4719 if (inst->dst[j].file == PROGRAM_TEMPORARY) { 4720 if (first_writes[inst->dst[j].index] == -1) 4721 first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start; 4722 } 4723 } 4724 4725 if (inst->op == TGSI_OPCODE_BGNLOOP) { 4726 if(depth++ == 0) 4727 loop_start = i; 4728 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 4729 if (--depth == 0) 4730 loop_start = -1; 4731 } 4732 assert(depth >= 0); 4733 i++; 4734 } 4735 } 4736 4737 void 4738 glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads) 4739 { 4740 int depth = 0; /* loop depth */ 4741 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 4742 unsigned i = 0, j; 4743 4744 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 4745 for (j = 0; j < num_inst_src_regs(inst); j++) { 4746 if (inst->src[j].file == PROGRAM_TEMPORARY) { 4747 if (first_reads[inst->src[j].index] == -1) 4748 first_reads[inst->src[j].index] = (depth == 0) ? i : loop_start; 4749 } 4750 } 4751 for (j = 0; j < inst->tex_offset_num_offset; j++) { 4752 if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) { 4753 if (first_reads[inst->tex_offsets[j].index] == -1) 4754 first_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : loop_start; 4755 } 4756 } 4757 if (inst->op == TGSI_OPCODE_BGNLOOP) { 4758 if(depth++ == 0) 4759 loop_start = i; 4760 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 4761 if (--depth == 0) 4762 loop_start = -1; 4763 } 4764 assert(depth >= 0); 4765 i++; 4766 } 4767 } 4768 4769 void 4770 glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *first_writes) 4771 { 4772 int depth = 0; /* loop depth */ 4773 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 4774 unsigned i = 0, j; 4775 int k; 4776 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 4777 for (j = 0; j < num_inst_src_regs(inst); j++) { 4778 if (inst->src[j].file == PROGRAM_TEMPORARY) 4779 last_reads[inst->src[j].index] = (depth == 0) ? i : -2; 4780 } 4781 for (j = 0; j < num_inst_dst_regs(inst); j++) { 4782 if (inst->dst[j].file == PROGRAM_TEMPORARY) { 4783 if (first_writes[inst->dst[j].index] == -1) 4784 first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start; 4785 last_reads[inst->dst[j].index] = (depth == 0) ? i : -2; 4786 } 4787 } 4788 for (j = 0; j < inst->tex_offset_num_offset; j++) { 4789 if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) 4790 last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2; 4791 } 4792 if (inst->op == TGSI_OPCODE_BGNLOOP) { 4793 if(depth++ == 0) 4794 loop_start = i; 4795 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 4796 if (--depth == 0) { 4797 loop_start = -1; 4798 for (k = 0; k < this->next_temp; k++) { 4799 if (last_reads[k] == -2) { 4800 last_reads[k] = i; 4801 } 4802 } 4803 } 4804 } 4805 assert(depth >= 0); 4806 i++; 4807 } 4808 } 4809 4810 void 4811 glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes) 4812 { 4813 int depth = 0; /* loop depth */ 4814 int i = 0, k; 4815 unsigned j; 4816 4817 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 4818 for (j = 0; j < num_inst_dst_regs(inst); j++) { 4819 if (inst->dst[j].file == PROGRAM_TEMPORARY) 4820 last_writes[inst->dst[j].index] = (depth == 0) ? i : -2; 4821 } 4822 4823 if (inst->op == TGSI_OPCODE_BGNLOOP) 4824 depth++; 4825 else if (inst->op == TGSI_OPCODE_ENDLOOP) 4826 if (--depth == 0) { 4827 for (k = 0; k < this->next_temp; k++) { 4828 if (last_writes[k] == -2) { 4829 last_writes[k] = i; 4830 } 4831 } 4832 } 4833 assert(depth >= 0); 4834 i++; 4835 } 4836 } 4837 4838 /* 4839 * On a basic block basis, tracks available PROGRAM_TEMPORARY register 4840 * channels for copy propagation and updates following instructions to 4841 * use the original versions. 4842 * 4843 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 4844 * will occur. As an example, a TXP production before this pass: 4845 * 4846 * 0: MOV TEMP[1], INPUT[4].xyyy; 4847 * 1: MOV TEMP[1].w, INPUT[4].wwww; 4848 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; 4849 * 4850 * and after: 4851 * 4852 * 0: MOV TEMP[1], INPUT[4].xyyy; 4853 * 1: MOV TEMP[1].w, INPUT[4].wwww; 4854 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 4855 * 4856 * which allows for dead code elimination on TEMP[1]'s writes. 4857 */ 4858 void 4859 glsl_to_tgsi_visitor::copy_propagate(void) 4860 { 4861 glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, 4862 glsl_to_tgsi_instruction *, 4863 this->next_temp * 4); 4864 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 4865 int level = 0; 4866 4867 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 4868 assert(inst->dst[0].file != PROGRAM_TEMPORARY 4869 || inst->dst[0].index < this->next_temp); 4870 4871 /* First, do any copy propagation possible into the src regs. */ 4872 for (int r = 0; r < 3; r++) { 4873 glsl_to_tgsi_instruction *first = NULL; 4874 bool good = true; 4875 int acp_base = inst->src[r].index * 4; 4876 4877 if (inst->src[r].file != PROGRAM_TEMPORARY || 4878 inst->src[r].reladdr || 4879 inst->src[r].reladdr2) 4880 continue; 4881 4882 /* See if we can find entries in the ACP consisting of MOVs 4883 * from the same src register for all the swizzled channels 4884 * of this src register reference. 4885 */ 4886 for (int i = 0; i < 4; i++) { 4887 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 4888 glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan]; 4889 4890 if (!copy_chan) { 4891 good = false; 4892 break; 4893 } 4894 4895 assert(acp_level[acp_base + src_chan] <= level); 4896 4897 if (!first) { 4898 first = copy_chan; 4899 } else { 4900 if (first->src[0].file != copy_chan->src[0].file || 4901 first->src[0].index != copy_chan->src[0].index || 4902 first->src[0].double_reg2 != copy_chan->src[0].double_reg2 || 4903 first->src[0].index2D != copy_chan->src[0].index2D) { 4904 good = false; 4905 break; 4906 } 4907 } 4908 } 4909 4910 if (good) { 4911 /* We've now validated that we can copy-propagate to 4912 * replace this src register reference. Do it. 4913 */ 4914 inst->src[r].file = first->src[0].file; 4915 inst->src[r].index = first->src[0].index; 4916 inst->src[r].index2D = first->src[0].index2D; 4917 inst->src[r].has_index2 = first->src[0].has_index2; 4918 inst->src[r].double_reg2 = first->src[0].double_reg2; 4919 inst->src[r].array_id = first->src[0].array_id; 4920 4921 int swizzle = 0; 4922 for (int i = 0; i < 4; i++) { 4923 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 4924 glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan]; 4925 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << (3 * i)); 4926 } 4927 inst->src[r].swizzle = swizzle; 4928 } 4929 } 4930 4931 switch (inst->op) { 4932 case TGSI_OPCODE_BGNLOOP: 4933 case TGSI_OPCODE_ENDLOOP: 4934 /* End of a basic block, clear the ACP entirely. */ 4935 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 4936 break; 4937 4938 case TGSI_OPCODE_IF: 4939 case TGSI_OPCODE_UIF: 4940 ++level; 4941 break; 4942 4943 case TGSI_OPCODE_ENDIF: 4944 case TGSI_OPCODE_ELSE: 4945 /* Clear all channels written inside the block from the ACP, but 4946 * leaving those that were not touched. 4947 */ 4948 for (int r = 0; r < this->next_temp; r++) { 4949 for (int c = 0; c < 4; c++) { 4950 if (!acp[4 * r + c]) 4951 continue; 4952 4953 if (acp_level[4 * r + c] >= level) 4954 acp[4 * r + c] = NULL; 4955 } 4956 } 4957 if (inst->op == TGSI_OPCODE_ENDIF) 4958 --level; 4959 break; 4960 4961 default: 4962 /* Continuing the block, clear any written channels from 4963 * the ACP. 4964 */ 4965 for (int d = 0; d < 2; d++) { 4966 if (inst->dst[d].file == PROGRAM_TEMPORARY && inst->dst[d].reladdr) { 4967 /* Any temporary might be written, so no copy propagation 4968 * across this instruction. 4969 */ 4970 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 4971 } else if (inst->dst[d].file == PROGRAM_OUTPUT && 4972 inst->dst[d].reladdr) { 4973 /* Any output might be written, so no copy propagation 4974 * from outputs across this instruction. 4975 */ 4976 for (int r = 0; r < this->next_temp; r++) { 4977 for (int c = 0; c < 4; c++) { 4978 if (!acp[4 * r + c]) 4979 continue; 4980 4981 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) 4982 acp[4 * r + c] = NULL; 4983 } 4984 } 4985 } else if (inst->dst[d].file == PROGRAM_TEMPORARY || 4986 inst->dst[d].file == PROGRAM_OUTPUT) { 4987 /* Clear where it's used as dst. */ 4988 if (inst->dst[d].file == PROGRAM_TEMPORARY) { 4989 for (int c = 0; c < 4; c++) { 4990 if (inst->dst[d].writemask & (1 << c)) 4991 acp[4 * inst->dst[d].index + c] = NULL; 4992 } 4993 } 4994 4995 /* Clear where it's used as src. */ 4996 for (int r = 0; r < this->next_temp; r++) { 4997 for (int c = 0; c < 4; c++) { 4998 if (!acp[4 * r + c]) 4999 continue; 5000 5001 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); 5002 5003 if (acp[4 * r + c]->src[0].file == inst->dst[d].file && 5004 acp[4 * r + c]->src[0].index == inst->dst[d].index && 5005 inst->dst[d].writemask & (1 << src_chan)) { 5006 acp[4 * r + c] = NULL; 5007 } 5008 } 5009 } 5010 } 5011 } 5012 break; 5013 } 5014 5015 /* If this is a copy, add it to the ACP. */ 5016 if (inst->op == TGSI_OPCODE_MOV && 5017 inst->dst[0].file == PROGRAM_TEMPORARY && 5018 !(inst->dst[0].file == inst->src[0].file && 5019 inst->dst[0].index == inst->src[0].index) && 5020 !inst->dst[0].reladdr && 5021 !inst->dst[0].reladdr2 && 5022 !inst->saturate && 5023 inst->src[0].file != PROGRAM_ARRAY && 5024 (inst->src[0].file != PROGRAM_OUTPUT || 5025 this->shader->Stage != MESA_SHADER_TESS_CTRL) && 5026 !inst->src[0].reladdr && 5027 !inst->src[0].reladdr2 && 5028 !inst->src[0].negate && 5029 !inst->src[0].abs) { 5030 for (int i = 0; i < 4; i++) { 5031 if (inst->dst[0].writemask & (1 << i)) { 5032 acp[4 * inst->dst[0].index + i] = inst; 5033 acp_level[4 * inst->dst[0].index + i] = level; 5034 } 5035 } 5036 } 5037 } 5038 5039 ralloc_free(acp_level); 5040 ralloc_free(acp); 5041 } 5042 5043 static void 5044 dead_code_handle_reladdr(glsl_to_tgsi_instruction **writes, st_src_reg *reladdr) 5045 { 5046 if (reladdr && reladdr->file == PROGRAM_TEMPORARY) { 5047 /* Clear where it's used as src. */ 5048 int swz = GET_SWZ(reladdr->swizzle, 0); 5049 writes[4 * reladdr->index + swz] = NULL; 5050 } 5051 } 5052 5053 /* 5054 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead 5055 * code elimination. 5056 * 5057 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 5058 * will occur. As an example, a TXP production after copy propagation but 5059 * before this pass: 5060 * 5061 * 0: MOV TEMP[1], INPUT[4].xyyy; 5062 * 1: MOV TEMP[1].w, INPUT[4].wwww; 5063 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 5064 * 5065 * and after this pass: 5066 * 5067 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 5068 */ 5069 int 5070 glsl_to_tgsi_visitor::eliminate_dead_code(void) 5071 { 5072 glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, 5073 glsl_to_tgsi_instruction *, 5074 this->next_temp * 4); 5075 int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 5076 int level = 0; 5077 int removed = 0; 5078 5079 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 5080 assert(inst->dst[0].file != PROGRAM_TEMPORARY 5081 || inst->dst[0].index < this->next_temp); 5082 5083 switch (inst->op) { 5084 case TGSI_OPCODE_BGNLOOP: 5085 case TGSI_OPCODE_ENDLOOP: 5086 case TGSI_OPCODE_CONT: 5087 case TGSI_OPCODE_BRK: 5088 /* End of a basic block, clear the write array entirely. 5089 * 5090 * This keeps us from killing dead code when the writes are 5091 * on either side of a loop, even when the register isn't touched 5092 * inside the loop. However, glsl_to_tgsi_visitor doesn't seem to emit 5093 * dead code of this type, so it shouldn't make a difference as long as 5094 * the dead code elimination pass in the GLSL compiler does its job. 5095 */ 5096 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 5097 break; 5098 5099 case TGSI_OPCODE_ENDIF: 5100 case TGSI_OPCODE_ELSE: 5101 /* Promote the recorded level of all channels written inside the 5102 * preceding if or else block to the level above the if/else block. 5103 */ 5104 for (int r = 0; r < this->next_temp; r++) { 5105 for (int c = 0; c < 4; c++) { 5106 if (!writes[4 * r + c]) 5107 continue; 5108 5109 if (write_level[4 * r + c] == level) 5110 write_level[4 * r + c] = level-1; 5111 } 5112 } 5113 if(inst->op == TGSI_OPCODE_ENDIF) 5114 --level; 5115 break; 5116 5117 case TGSI_OPCODE_IF: 5118 case TGSI_OPCODE_UIF: 5119 ++level; 5120 /* fallthrough to default case to mark the condition as read */ 5121 default: 5122 /* Continuing the block, clear any channels from the write array that 5123 * are read by this instruction. 5124 */ 5125 for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++) { 5126 if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ 5127 /* Any temporary might be read, so no dead code elimination 5128 * across this instruction. 5129 */ 5130 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 5131 } else if (inst->src[i].file == PROGRAM_TEMPORARY) { 5132 /* Clear where it's used as src. */ 5133 int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0); 5134 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1); 5135 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2); 5136 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3); 5137 5138 for (int c = 0; c < 4; c++) { 5139 if (src_chans & (1 << c)) 5140 writes[4 * inst->src[i].index + c] = NULL; 5141 } 5142 } 5143 dead_code_handle_reladdr(writes, inst->src[i].reladdr); 5144 dead_code_handle_reladdr(writes, inst->src[i].reladdr2); 5145 } 5146 for (unsigned i = 0; i < inst->tex_offset_num_offset; i++) { 5147 if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY && inst->tex_offsets[i].reladdr){ 5148 /* Any temporary might be read, so no dead code elimination 5149 * across this instruction. 5150 */ 5151 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 5152 } else if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY) { 5153 /* Clear where it's used as src. */ 5154 int src_chans = 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 0); 5155 src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 1); 5156 src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 2); 5157 src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 3); 5158 5159 for (int c = 0; c < 4; c++) { 5160 if (src_chans & (1 << c)) 5161 writes[4 * inst->tex_offsets[i].index + c] = NULL; 5162 } 5163 } 5164 dead_code_handle_reladdr(writes, inst->tex_offsets[i].reladdr); 5165 dead_code_handle_reladdr(writes, inst->tex_offsets[i].reladdr2); 5166 } 5167 5168 if (inst->resource.file == PROGRAM_TEMPORARY) { 5169 int src_chans; 5170 5171 src_chans = 1 << GET_SWZ(inst->resource.swizzle, 0); 5172 src_chans |= 1 << GET_SWZ(inst->resource.swizzle, 1); 5173 src_chans |= 1 << GET_SWZ(inst->resource.swizzle, 2); 5174 src_chans |= 1 << GET_SWZ(inst->resource.swizzle, 3); 5175 5176 for (int c = 0; c < 4; c++) { 5177 if (src_chans & (1 << c)) 5178 writes[4 * inst->resource.index + c] = NULL; 5179 } 5180 } 5181 dead_code_handle_reladdr(writes, inst->resource.reladdr); 5182 dead_code_handle_reladdr(writes, inst->resource.reladdr2); 5183 5184 for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) { 5185 dead_code_handle_reladdr(writes, inst->dst[i].reladdr); 5186 dead_code_handle_reladdr(writes, inst->dst[i].reladdr2); 5187 } 5188 break; 5189 } 5190 5191 /* If this instruction writes to a temporary, add it to the write array. 5192 * If there is already an instruction in the write array for one or more 5193 * of the channels, flag that channel write as dead. 5194 */ 5195 for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) { 5196 if (inst->dst[i].file == PROGRAM_TEMPORARY && 5197 !inst->dst[i].reladdr) { 5198 for (int c = 0; c < 4; c++) { 5199 if (inst->dst[i].writemask & (1 << c)) { 5200 if (writes[4 * inst->dst[i].index + c]) { 5201 if (write_level[4 * inst->dst[i].index + c] < level) 5202 continue; 5203 else 5204 writes[4 * inst->dst[i].index + c]->dead_mask |= (1 << c); 5205 } 5206 writes[4 * inst->dst[i].index + c] = inst; 5207 write_level[4 * inst->dst[i].index + c] = level; 5208 } 5209 } 5210 } 5211 } 5212 } 5213 5214 /* Anything still in the write array at this point is dead code. */ 5215 for (int r = 0; r < this->next_temp; r++) { 5216 for (int c = 0; c < 4; c++) { 5217 glsl_to_tgsi_instruction *inst = writes[4 * r + c]; 5218 if (inst) 5219 inst->dead_mask |= (1 << c); 5220 } 5221 } 5222 5223 /* Now actually remove the instructions that are completely dead and update 5224 * the writemask of other instructions with dead channels. 5225 */ 5226 foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) { 5227 if (!inst->dead_mask || !inst->dst[0].writemask) 5228 continue; 5229 /* No amount of dead masks should remove memory stores */ 5230 if (inst->info->is_store) 5231 continue; 5232 5233 if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) { 5234 inst->remove(); 5235 delete inst; 5236 removed++; 5237 } else { 5238 if (glsl_base_type_is_64bit(inst->dst[0].type)) { 5239 if (inst->dead_mask == WRITEMASK_XY || 5240 inst->dead_mask == WRITEMASK_ZW) 5241 inst->dst[0].writemask &= ~(inst->dead_mask); 5242 } else 5243 inst->dst[0].writemask &= ~(inst->dead_mask); 5244 } 5245 } 5246 5247 ralloc_free(write_level); 5248 ralloc_free(writes); 5249 5250 return removed; 5251 } 5252 5253 /* merge DFRACEXP instructions into one. */ 5254 void 5255 glsl_to_tgsi_visitor::merge_two_dsts(void) 5256 { 5257 /* We never delete inst, but we may delete its successor. */ 5258 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 5259 glsl_to_tgsi_instruction *inst2; 5260 unsigned defined; 5261 5262 if (num_inst_dst_regs(inst) != 2) 5263 continue; 5264 5265 if (inst->dst[0].file != PROGRAM_UNDEFINED && 5266 inst->dst[1].file != PROGRAM_UNDEFINED) 5267 continue; 5268 5269 assert(inst->dst[0].file != PROGRAM_UNDEFINED || 5270 inst->dst[1].file != PROGRAM_UNDEFINED); 5271 5272 if (inst->dst[0].file == PROGRAM_UNDEFINED) 5273 defined = 1; 5274 else 5275 defined = 0; 5276 5277 inst2 = (glsl_to_tgsi_instruction *) inst->next; 5278 while (!inst2->is_tail_sentinel()) { 5279 if (inst->op == inst2->op && 5280 inst2->dst[defined].file == PROGRAM_UNDEFINED && 5281 inst->src[0].file == inst2->src[0].file && 5282 inst->src[0].index == inst2->src[0].index && 5283 inst->src[0].type == inst2->src[0].type && 5284 inst->src[0].swizzle == inst2->src[0].swizzle) 5285 break; 5286 inst2 = (glsl_to_tgsi_instruction *) inst2->next; 5287 } 5288 5289 if (inst2->is_tail_sentinel()) { 5290 /* Undefined destinations are not allowed, substitute with an unused 5291 * temporary register. 5292 */ 5293 st_src_reg tmp = get_temp(glsl_type::vec4_type); 5294 inst->dst[defined ^ 1] = st_dst_reg(tmp); 5295 inst->dst[defined ^ 1].writemask = 0; 5296 continue; 5297 } 5298 5299 inst->dst[defined ^ 1] = inst2->dst[defined ^ 1]; 5300 inst2->remove(); 5301 delete inst2; 5302 } 5303 } 5304 5305 /* Merges temporary registers together where possible to reduce the number of 5306 * registers needed to run a program. 5307 * 5308 * Produces optimal code only after copy propagation and dead code elimination 5309 * have been run. */ 5310 void 5311 glsl_to_tgsi_visitor::merge_registers(void) 5312 { 5313 assert(need_uarl); 5314 struct lifetime *lifetimes = 5315 rzalloc_array(mem_ctx, struct lifetime, this->next_temp); 5316 5317 if (get_temp_registers_required_lifetimes(mem_ctx, &this->instructions, 5318 this->next_temp, lifetimes)) { 5319 struct rename_reg_pair *renames = 5320 rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp); 5321 get_temp_registers_remapping(mem_ctx, this->next_temp, lifetimes, renames); 5322 rename_temp_registers(renames); 5323 ralloc_free(renames); 5324 } 5325 5326 ralloc_free(lifetimes); 5327 } 5328 5329 /* Reassign indices to temporary registers by reusing unused indices created 5330 * by optimization passes. */ 5331 void 5332 glsl_to_tgsi_visitor::renumber_registers(void) 5333 { 5334 int i = 0; 5335 int new_index = 0; 5336 int *first_writes = ralloc_array(mem_ctx, int, this->next_temp); 5337 struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp); 5338 5339 for (i = 0; i < this->next_temp; i++) { 5340 first_writes[i] = -1; 5341 } 5342 get_first_temp_write(first_writes); 5343 5344 for (i = 0; i < this->next_temp; i++) { 5345 if (first_writes[i] < 0) continue; 5346 if (i != new_index) { 5347 renames[i].new_reg = new_index; 5348 renames[i].valid = true; 5349 } 5350 new_index++; 5351 } 5352 5353 rename_temp_registers(renames); 5354 this->next_temp = new_index; 5355 ralloc_free(renames); 5356 ralloc_free(first_writes); 5357 } 5358 5359 /* ------------------------- TGSI conversion stuff -------------------------- */ 5360 5361 /** 5362 * Intermediate state used during shader translation. 5363 */ 5364 struct st_translate { 5365 struct ureg_program *ureg; 5366 5367 unsigned temps_size; 5368 struct ureg_dst *temps; 5369 5370 struct ureg_dst *arrays; 5371 unsigned num_temp_arrays; 5372 struct ureg_src *constants; 5373 int num_constants; 5374 struct ureg_src *immediates; 5375 int num_immediates; 5376 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 5377 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 5378 struct ureg_dst address[3]; 5379 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 5380 struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS]; 5381 struct ureg_src images[PIPE_MAX_SHADER_IMAGES]; 5382 struct ureg_src systemValues[SYSTEM_VALUE_MAX]; 5383 struct ureg_src hw_atomics[PIPE_MAX_HW_ATOMIC_BUFFERS]; 5384 struct ureg_src shared_memory; 5385 unsigned *array_sizes; 5386 struct inout_decl *input_decls; 5387 unsigned num_input_decls; 5388 struct inout_decl *output_decls; 5389 unsigned num_output_decls; 5390 5391 const ubyte *inputMapping; 5392 const ubyte *outputMapping; 5393 5394 unsigned procType; /**< PIPE_SHADER_VERTEX/FRAGMENT */ 5395 bool need_uarl; 5396 }; 5397 5398 /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */ 5399 unsigned 5400 _mesa_sysval_to_semantic(unsigned sysval) 5401 { 5402 switch (sysval) { 5403 /* Vertex shader */ 5404 case SYSTEM_VALUE_VERTEX_ID: 5405 return TGSI_SEMANTIC_VERTEXID; 5406 case SYSTEM_VALUE_INSTANCE_ID: 5407 return TGSI_SEMANTIC_INSTANCEID; 5408 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: 5409 return TGSI_SEMANTIC_VERTEXID_NOBASE; 5410 case SYSTEM_VALUE_BASE_VERTEX: 5411 return TGSI_SEMANTIC_BASEVERTEX; 5412 case SYSTEM_VALUE_BASE_INSTANCE: 5413 return TGSI_SEMANTIC_BASEINSTANCE; 5414 case SYSTEM_VALUE_DRAW_ID: 5415 return TGSI_SEMANTIC_DRAWID; 5416 5417 /* Geometry shader */ 5418 case SYSTEM_VALUE_INVOCATION_ID: 5419 return TGSI_SEMANTIC_INVOCATIONID; 5420 5421 /* Fragment shader */ 5422 case SYSTEM_VALUE_FRAG_COORD: 5423 return TGSI_SEMANTIC_POSITION; 5424 case SYSTEM_VALUE_FRONT_FACE: 5425 return TGSI_SEMANTIC_FACE; 5426 case SYSTEM_VALUE_SAMPLE_ID: 5427 return TGSI_SEMANTIC_SAMPLEID; 5428 case SYSTEM_VALUE_SAMPLE_POS: 5429 return TGSI_SEMANTIC_SAMPLEPOS; 5430 case SYSTEM_VALUE_SAMPLE_MASK_IN: 5431 return TGSI_SEMANTIC_SAMPLEMASK; 5432 case SYSTEM_VALUE_HELPER_INVOCATION: 5433 return TGSI_SEMANTIC_HELPER_INVOCATION; 5434 5435 /* Tessellation shader */ 5436 case SYSTEM_VALUE_TESS_COORD: 5437 return TGSI_SEMANTIC_TESSCOORD; 5438 case SYSTEM_VALUE_VERTICES_IN: 5439 return TGSI_SEMANTIC_VERTICESIN; 5440 case SYSTEM_VALUE_PRIMITIVE_ID: 5441 return TGSI_SEMANTIC_PRIMID; 5442 case SYSTEM_VALUE_TESS_LEVEL_OUTER: 5443 return TGSI_SEMANTIC_TESSOUTER; 5444 case SYSTEM_VALUE_TESS_LEVEL_INNER: 5445 return TGSI_SEMANTIC_TESSINNER; 5446 5447 /* Compute shader */ 5448 case SYSTEM_VALUE_LOCAL_INVOCATION_ID: 5449 return TGSI_SEMANTIC_THREAD_ID; 5450 case SYSTEM_VALUE_WORK_GROUP_ID: 5451 return TGSI_SEMANTIC_BLOCK_ID; 5452 case SYSTEM_VALUE_NUM_WORK_GROUPS: 5453 return TGSI_SEMANTIC_GRID_SIZE; 5454 case SYSTEM_VALUE_LOCAL_GROUP_SIZE: 5455 return TGSI_SEMANTIC_BLOCK_SIZE; 5456 5457 /* ARB_shader_ballot */ 5458 case SYSTEM_VALUE_SUBGROUP_SIZE: 5459 return TGSI_SEMANTIC_SUBGROUP_SIZE; 5460 case SYSTEM_VALUE_SUBGROUP_INVOCATION: 5461 return TGSI_SEMANTIC_SUBGROUP_INVOCATION; 5462 case SYSTEM_VALUE_SUBGROUP_EQ_MASK: 5463 return TGSI_SEMANTIC_SUBGROUP_EQ_MASK; 5464 case SYSTEM_VALUE_SUBGROUP_GE_MASK: 5465 return TGSI_SEMANTIC_SUBGROUP_GE_MASK; 5466 case SYSTEM_VALUE_SUBGROUP_GT_MASK: 5467 return TGSI_SEMANTIC_SUBGROUP_GT_MASK; 5468 case SYSTEM_VALUE_SUBGROUP_LE_MASK: 5469 return TGSI_SEMANTIC_SUBGROUP_LE_MASK; 5470 case SYSTEM_VALUE_SUBGROUP_LT_MASK: 5471 return TGSI_SEMANTIC_SUBGROUP_LT_MASK; 5472 5473 /* Unhandled */ 5474 case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: 5475 case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: 5476 case SYSTEM_VALUE_VERTEX_CNT: 5477 default: 5478 assert(!"Unexpected SYSTEM_VALUE_ enum"); 5479 return TGSI_SEMANTIC_COUNT; 5480 } 5481 } 5482 5483 /** 5484 * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. 5485 */ 5486 static struct ureg_src 5487 emit_immediate(struct st_translate *t, 5488 gl_constant_value values[4], 5489 int type, int size) 5490 { 5491 struct ureg_program *ureg = t->ureg; 5492 5493 switch(type) 5494 { 5495 case GL_FLOAT: 5496 return ureg_DECL_immediate(ureg, &values[0].f, size); 5497 case GL_DOUBLE: 5498 return ureg_DECL_immediate_f64(ureg, (double *)&values[0].f, size); 5499 case GL_INT64_ARB: 5500 return ureg_DECL_immediate_int64(ureg, (int64_t *)&values[0].f, size); 5501 case GL_UNSIGNED_INT64_ARB: 5502 return ureg_DECL_immediate_uint64(ureg, (uint64_t *)&values[0].f, size); 5503 case GL_INT: 5504 return ureg_DECL_immediate_int(ureg, &values[0].i, size); 5505 case GL_UNSIGNED_INT: 5506 case GL_BOOL: 5507 return ureg_DECL_immediate_uint(ureg, &values[0].u, size); 5508 default: 5509 assert(!"should not get here - type must be float, int, uint, or bool"); 5510 return ureg_src_undef(); 5511 } 5512 } 5513 5514 /** 5515 * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register. 5516 */ 5517 static struct ureg_dst 5518 dst_register(struct st_translate *t, gl_register_file file, unsigned index, 5519 unsigned array_id) 5520 { 5521 unsigned array; 5522 5523 switch(file) { 5524 case PROGRAM_UNDEFINED: 5525 return ureg_dst_undef(); 5526 5527 case PROGRAM_TEMPORARY: 5528 /* Allocate space for temporaries on demand. */ 5529 if (index >= t->temps_size) { 5530 const int inc = align(index - t->temps_size + 1, 4096); 5531 5532 t->temps = (struct ureg_dst*) 5533 realloc(t->temps, 5534 (t->temps_size + inc) * sizeof(struct ureg_dst)); 5535 if (!t->temps) 5536 return ureg_dst_undef(); 5537 5538 memset(t->temps + t->temps_size, 0, inc * sizeof(struct ureg_dst)); 5539 t->temps_size += inc; 5540 } 5541 5542 if (ureg_dst_is_undef(t->temps[index])) 5543 t->temps[index] = ureg_DECL_local_temporary(t->ureg); 5544 5545 return t->temps[index]; 5546 5547 case PROGRAM_ARRAY: 5548 assert(array_id && array_id <= t->num_temp_arrays); 5549 array = array_id - 1; 5550 5551 if (ureg_dst_is_undef(t->arrays[array])) 5552 t->arrays[array] = ureg_DECL_array_temporary( 5553 t->ureg, t->array_sizes[array], TRUE); 5554 5555 return ureg_dst_array_offset(t->arrays[array], index); 5556 5557 case PROGRAM_OUTPUT: 5558 if (!array_id) { 5559 if (t->procType == PIPE_SHADER_FRAGMENT) 5560 assert(index < 2 * FRAG_RESULT_MAX); 5561 else if (t->procType == PIPE_SHADER_TESS_CTRL || 5562 t->procType == PIPE_SHADER_TESS_EVAL) 5563 assert(index < VARYING_SLOT_TESS_MAX); 5564 else 5565 assert(index < VARYING_SLOT_MAX); 5566 5567 assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs)); 5568 assert(t->outputs[t->outputMapping[index]].File != TGSI_FILE_NULL); 5569 return t->outputs[t->outputMapping[index]]; 5570 } 5571 else { 5572 struct inout_decl *decl = find_inout_array(t->output_decls, t->num_output_decls, array_id); 5573 unsigned mesa_index = decl->mesa_index; 5574 int slot = t->outputMapping[mesa_index]; 5575 5576 assert(slot != -1 && t->outputs[slot].File == TGSI_FILE_OUTPUT); 5577 5578 struct ureg_dst dst = t->outputs[slot]; 5579 dst.ArrayID = array_id; 5580 return ureg_dst_array_offset(dst, index - mesa_index); 5581 } 5582 5583 case PROGRAM_ADDRESS: 5584 return t->address[index]; 5585 5586 default: 5587 assert(!"unknown dst register file"); 5588 return ureg_dst_undef(); 5589 } 5590 } 5591 5592 static struct ureg_src 5593 translate_src(struct st_translate *t, const st_src_reg *src_reg); 5594 5595 static struct ureg_src 5596 translate_addr(struct st_translate *t, const st_src_reg *reladdr, 5597 unsigned addr_index) 5598 { 5599 if (t->need_uarl || !reladdr->is_legal_tgsi_address_operand()) 5600 return ureg_src(t->address[addr_index]); 5601 5602 return translate_src(t, reladdr); 5603 } 5604 5605 /** 5606 * Create a TGSI ureg_dst register from an st_dst_reg. 5607 */ 5608 static struct ureg_dst 5609 translate_dst(struct st_translate *t, 5610 const st_dst_reg *dst_reg, 5611 bool saturate) 5612 { 5613 struct ureg_dst dst = dst_register(t, dst_reg->file, dst_reg->index, 5614 dst_reg->array_id); 5615 5616 if (dst.File == TGSI_FILE_NULL) 5617 return dst; 5618 5619 dst = ureg_writemask(dst, dst_reg->writemask); 5620 5621 if (saturate) 5622 dst = ureg_saturate(dst); 5623 5624 if (dst_reg->reladdr != NULL) { 5625 assert(dst_reg->file != PROGRAM_TEMPORARY); 5626 dst = ureg_dst_indirect(dst, translate_addr(t, dst_reg->reladdr, 0)); 5627 } 5628 5629 if (dst_reg->has_index2) { 5630 if (dst_reg->reladdr2) 5631 dst = ureg_dst_dimension_indirect(dst, 5632 translate_addr(t, dst_reg->reladdr2, 1), 5633 dst_reg->index2D); 5634 else 5635 dst = ureg_dst_dimension(dst, dst_reg->index2D); 5636 } 5637 5638 return dst; 5639 } 5640 5641 /** 5642 * Create a TGSI ureg_src register from an st_src_reg. 5643 */ 5644 static struct ureg_src 5645 translate_src(struct st_translate *t, const st_src_reg *src_reg) 5646 { 5647 struct ureg_src src; 5648 int index = src_reg->index; 5649 int double_reg2 = src_reg->double_reg2 ? 1 : 0; 5650 5651 switch(src_reg->file) { 5652 case PROGRAM_UNDEFINED: 5653 src = ureg_imm4f(t->ureg, 0, 0, 0, 0); 5654 break; 5655 5656 case PROGRAM_TEMPORARY: 5657 case PROGRAM_ARRAY: 5658 src = ureg_src(dst_register(t, src_reg->file, src_reg->index, src_reg->array_id)); 5659 break; 5660 5661 case PROGRAM_OUTPUT: { 5662 struct ureg_dst dst = dst_register(t, src_reg->file, src_reg->index, src_reg->array_id); 5663 assert(dst.WriteMask != 0); 5664 unsigned shift = ffs(dst.WriteMask) - 1; 5665 src = ureg_swizzle(ureg_src(dst), 5666 shift, 5667 MIN2(shift + 1, 3), 5668 MIN2(shift + 2, 3), 5669 MIN2(shift + 3, 3)); 5670 break; 5671 } 5672 5673 case PROGRAM_UNIFORM: 5674 assert(src_reg->index >= 0); 5675 src = src_reg->index < t->num_constants ? 5676 t->constants[src_reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0); 5677 break; 5678 case PROGRAM_STATE_VAR: 5679 case PROGRAM_CONSTANT: /* ie, immediate */ 5680 if (src_reg->has_index2) 5681 src = ureg_src_register(TGSI_FILE_CONSTANT, src_reg->index); 5682 else 5683 src = src_reg->index >= 0 && src_reg->index < t->num_constants ? 5684 t->constants[src_reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0); 5685 break; 5686 5687 case PROGRAM_IMMEDIATE: 5688 assert(src_reg->index >= 0 && src_reg->index < t->num_immediates); 5689 src = t->immediates[src_reg->index]; 5690 break; 5691 5692 case PROGRAM_INPUT: 5693 /* GLSL inputs are 64-bit containers, so we have to 5694 * map back to the original index and add the offset after 5695 * mapping. */ 5696 index -= double_reg2; 5697 if (!src_reg->array_id) { 5698 assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs)); 5699 assert(t->inputs[t->inputMapping[index]].File != TGSI_FILE_NULL); 5700 src = t->inputs[t->inputMapping[index] + double_reg2]; 5701 } 5702 else { 5703 struct inout_decl *decl = find_inout_array(t->input_decls, t->num_input_decls, 5704 src_reg->array_id); 5705 unsigned mesa_index = decl->mesa_index; 5706 int slot = t->inputMapping[mesa_index]; 5707 5708 assert(slot != -1 && t->inputs[slot].File == TGSI_FILE_INPUT); 5709 5710 src = t->inputs[slot]; 5711 src.ArrayID = src_reg->array_id; 5712 src = ureg_src_array_offset(src, index + double_reg2 - mesa_index); 5713 } 5714 break; 5715 5716 case PROGRAM_ADDRESS: 5717 src = ureg_src(t->address[src_reg->index]); 5718 break; 5719 5720 case PROGRAM_SYSTEM_VALUE: 5721 assert(src_reg->index < (int) ARRAY_SIZE(t->systemValues)); 5722 src = t->systemValues[src_reg->index]; 5723 break; 5724 5725 case PROGRAM_HW_ATOMIC: 5726 src = ureg_src_array_register(TGSI_FILE_HW_ATOMIC, src_reg->index, 5727 src_reg->array_id); 5728 break; 5729 5730 default: 5731 assert(!"unknown src register file"); 5732 return ureg_src_undef(); 5733 } 5734 5735 if (src_reg->has_index2) { 5736 /* 2D indexes occur with geometry shader inputs (attrib, vertex) 5737 * and UBO constant buffers (buffer, position). 5738 */ 5739 if (src_reg->reladdr2) 5740 src = ureg_src_dimension_indirect(src, 5741 translate_addr(t, src_reg->reladdr2, 1), 5742 src_reg->index2D); 5743 else 5744 src = ureg_src_dimension(src, src_reg->index2D); 5745 } 5746 5747 src = ureg_swizzle(src, 5748 GET_SWZ(src_reg->swizzle, 0) & 0x3, 5749 GET_SWZ(src_reg->swizzle, 1) & 0x3, 5750 GET_SWZ(src_reg->swizzle, 2) & 0x3, 5751 GET_SWZ(src_reg->swizzle, 3) & 0x3); 5752 5753 if (src_reg->abs) 5754 src = ureg_abs(src); 5755 5756 if ((src_reg->negate & 0xf) == NEGATE_XYZW) 5757 src = ureg_negate(src); 5758 5759 if (src_reg->reladdr != NULL) { 5760 assert(src_reg->file != PROGRAM_TEMPORARY); 5761 src = ureg_src_indirect(src, translate_addr(t, src_reg->reladdr, 0)); 5762 } 5763 5764 return src; 5765 } 5766 5767 static struct tgsi_texture_offset 5768 translate_tex_offset(struct st_translate *t, 5769 const st_src_reg *in_offset) 5770 { 5771 struct tgsi_texture_offset offset; 5772 struct ureg_src src = translate_src(t, in_offset); 5773 5774 offset.File = src.File; 5775 offset.Index = src.Index; 5776 offset.SwizzleX = src.SwizzleX; 5777 offset.SwizzleY = src.SwizzleY; 5778 offset.SwizzleZ = src.SwizzleZ; 5779 offset.Padding = 0; 5780 5781 assert(!src.Indirect); 5782 assert(!src.DimIndirect); 5783 assert(!src.Dimension); 5784 assert(!src.Absolute); /* those shouldn't be used with integers anyway */ 5785 assert(!src.Negate); 5786 5787 return offset; 5788 } 5789 5790 static void 5791 compile_tgsi_instruction(struct st_translate *t, 5792 const glsl_to_tgsi_instruction *inst) 5793 { 5794 struct ureg_program *ureg = t->ureg; 5795 int i; 5796 struct ureg_dst dst[2]; 5797 struct ureg_src src[4]; 5798 struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET]; 5799 5800 int num_dst; 5801 int num_src; 5802 unsigned tex_target = 0; 5803 5804 num_dst = num_inst_dst_regs(inst); 5805 num_src = num_inst_src_regs(inst); 5806 5807 for (i = 0; i < num_dst; i++) 5808 dst[i] = translate_dst(t, 5809 &inst->dst[i], 5810 inst->saturate); 5811 5812 for (i = 0; i < num_src; i++) 5813 src[i] = translate_src(t, &inst->src[i]); 5814 5815 switch(inst->op) { 5816 case TGSI_OPCODE_BGNLOOP: 5817 case TGSI_OPCODE_ELSE: 5818 case TGSI_OPCODE_ENDLOOP: 5819 case TGSI_OPCODE_IF: 5820 case TGSI_OPCODE_UIF: 5821 assert(num_dst == 0); 5822 ureg_insn(ureg, inst->op, NULL, 0, src, num_src, inst->precise); 5823 return; 5824 5825 case TGSI_OPCODE_TEX: 5826 case TGSI_OPCODE_TEX_LZ: 5827 case TGSI_OPCODE_TXB: 5828 case TGSI_OPCODE_TXD: 5829 case TGSI_OPCODE_TXL: 5830 case TGSI_OPCODE_TXP: 5831 case TGSI_OPCODE_TXQ: 5832 case TGSI_OPCODE_TXQS: 5833 case TGSI_OPCODE_TXF: 5834 case TGSI_OPCODE_TXF_LZ: 5835 case TGSI_OPCODE_TEX2: 5836 case TGSI_OPCODE_TXB2: 5837 case TGSI_OPCODE_TXL2: 5838 case TGSI_OPCODE_TG4: 5839 case TGSI_OPCODE_LODQ: 5840 if (inst->resource.file == PROGRAM_SAMPLER) { 5841 src[num_src] = t->samplers[inst->resource.index]; 5842 } else { 5843 /* Bindless samplers. */ 5844 src[num_src] = translate_src(t, &inst->resource); 5845 } 5846 assert(src[num_src].File != TGSI_FILE_NULL); 5847 if (inst->resource.reladdr) 5848 src[num_src] = 5849 ureg_src_indirect(src[num_src], 5850 translate_addr(t, inst->resource.reladdr, 2)); 5851 num_src++; 5852 for (i = 0; i < (int)inst->tex_offset_num_offset; i++) { 5853 texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]); 5854 } 5855 tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); 5856 5857 ureg_tex_insn(ureg, 5858 inst->op, 5859 dst, num_dst, 5860 tex_target, 5861 st_translate_texture_type(inst->tex_type), 5862 texoffsets, inst->tex_offset_num_offset, 5863 src, num_src); 5864 return; 5865 5866 case TGSI_OPCODE_RESQ: 5867 case TGSI_OPCODE_LOAD: 5868 case TGSI_OPCODE_ATOMUADD: 5869 case TGSI_OPCODE_ATOMXCHG: 5870 case TGSI_OPCODE_ATOMCAS: 5871 case TGSI_OPCODE_ATOMAND: 5872 case TGSI_OPCODE_ATOMOR: 5873 case TGSI_OPCODE_ATOMXOR: 5874 case TGSI_OPCODE_ATOMUMIN: 5875 case TGSI_OPCODE_ATOMUMAX: 5876 case TGSI_OPCODE_ATOMIMIN: 5877 case TGSI_OPCODE_ATOMIMAX: 5878 for (i = num_src - 1; i >= 0; i--) 5879 src[i + 1] = src[i]; 5880 num_src++; 5881 if (inst->resource.file == PROGRAM_MEMORY) { 5882 src[0] = t->shared_memory; 5883 } else if (inst->resource.file == PROGRAM_BUFFER) { 5884 src[0] = t->buffers[inst->resource.index]; 5885 } else if (inst->resource.file == PROGRAM_HW_ATOMIC) { 5886 src[0] = translate_src(t, &inst->resource); 5887 } else if (inst->resource.file == PROGRAM_CONSTANT) { 5888 assert(inst->resource.has_index2); 5889 src[0] = ureg_src_register(TGSI_FILE_CONSTBUF, inst->resource.index); 5890 } else { 5891 assert(inst->resource.file != PROGRAM_UNDEFINED); 5892 if (inst->resource.file == PROGRAM_IMAGE) { 5893 src[0] = t->images[inst->resource.index]; 5894 } else { 5895 /* Bindless images. */ 5896 src[0] = translate_src(t, &inst->resource); 5897 } 5898 tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); 5899 } 5900 if (inst->resource.reladdr) 5901 src[0] = ureg_src_indirect(src[0], 5902 translate_addr(t, inst->resource.reladdr, 2)); 5903 assert(src[0].File != TGSI_FILE_NULL); 5904 ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src, 5905 inst->buffer_access, 5906 tex_target, inst->image_format); 5907 break; 5908 5909 case TGSI_OPCODE_STORE: 5910 if (inst->resource.file == PROGRAM_MEMORY) { 5911 dst[0] = ureg_dst(t->shared_memory); 5912 } else if (inst->resource.file == PROGRAM_BUFFER) { 5913 dst[0] = ureg_dst(t->buffers[inst->resource.index]); 5914 } else { 5915 if (inst->resource.file == PROGRAM_IMAGE) { 5916 dst[0] = ureg_dst(t->images[inst->resource.index]); 5917 } else { 5918 /* Bindless images. */ 5919 dst[0] = ureg_dst(translate_src(t, &inst->resource)); 5920 } 5921 tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); 5922 } 5923 dst[0] = ureg_writemask(dst[0], inst->dst[0].writemask); 5924 if (inst->resource.reladdr) 5925 dst[0] = ureg_dst_indirect(dst[0], 5926 translate_addr(t, inst->resource.reladdr, 2)); 5927 assert(dst[0].File != TGSI_FILE_NULL); 5928 ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src, 5929 inst->buffer_access, 5930 tex_target, inst->image_format); 5931 break; 5932 5933 default: 5934 ureg_insn(ureg, 5935 inst->op, 5936 dst, num_dst, 5937 src, num_src, inst->precise); 5938 break; 5939 } 5940 } 5941 5942 /** 5943 * Emit the TGSI instructions for inverting and adjusting WPOS. 5944 * This code is unavoidable because it also depends on whether 5945 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). 5946 */ 5947 static void 5948 emit_wpos_adjustment(struct gl_context *ctx, 5949 struct st_translate *t, 5950 int wpos_transform_const, 5951 boolean invert, 5952 GLfloat adjX, GLfloat adjY[2]) 5953 { 5954 struct ureg_program *ureg = t->ureg; 5955 5956 assert(wpos_transform_const >= 0); 5957 5958 /* Fragment program uses fragment position input. 5959 * Need to replace instances of INPUT[WPOS] with temp T 5960 * where T = INPUT[WPOS] is inverted by Y. 5961 */ 5962 struct ureg_src wpostrans = ureg_DECL_constant(ureg, wpos_transform_const); 5963 struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg ); 5964 struct ureg_src *wpos = 5965 ctx->Const.GLSLFragCoordIsSysVal ? 5966 &t->systemValues[SYSTEM_VALUE_FRAG_COORD] : 5967 &t->inputs[t->inputMapping[VARYING_SLOT_POS]]; 5968 struct ureg_src wpos_input = *wpos; 5969 5970 /* First, apply the coordinate shift: */ 5971 if (adjX || adjY[0] || adjY[1]) { 5972 if (adjY[0] != adjY[1]) { 5973 /* Adjust the y coordinate by adjY[1] or adjY[0] respectively 5974 * depending on whether inversion is actually going to be applied 5975 * or not, which is determined by testing against the inversion 5976 * state variable used below, which will be either +1 or -1. 5977 */ 5978 struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg); 5979 5980 ureg_CMP(ureg, adj_temp, 5981 ureg_scalar(wpostrans, invert ? 2 : 0), 5982 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f), 5983 ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f)); 5984 ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp)); 5985 } else { 5986 ureg_ADD(ureg, wpos_temp, wpos_input, 5987 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f)); 5988 } 5989 wpos_input = ureg_src(wpos_temp); 5990 } else { 5991 /* MOV wpos_temp, input[wpos] 5992 */ 5993 ureg_MOV( ureg, wpos_temp, wpos_input ); 5994 } 5995 5996 /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be 5997 * inversion/identity, or the other way around if we're drawing to an FBO. 5998 */ 5999 if (invert) { 6000 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy 6001 */ 6002 ureg_MAD( ureg, 6003 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 6004 wpos_input, 6005 ureg_scalar(wpostrans, 0), 6006 ureg_scalar(wpostrans, 1)); 6007 } else { 6008 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww 6009 */ 6010 ureg_MAD( ureg, 6011 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 6012 wpos_input, 6013 ureg_scalar(wpostrans, 2), 6014 ureg_scalar(wpostrans, 3)); 6015 } 6016 6017 /* Use wpos_temp as position input from here on: 6018 */ 6019 *wpos = ureg_src(wpos_temp); 6020 } 6021 6022 6023 /** 6024 * Emit fragment position/ooordinate code. 6025 */ 6026 static void 6027 emit_wpos(struct st_context *st, 6028 struct st_translate *t, 6029 const struct gl_program *program, 6030 struct ureg_program *ureg, 6031 int wpos_transform_const) 6032 { 6033 struct pipe_screen *pscreen = st->pipe->screen; 6034 GLfloat adjX = 0.0f; 6035 GLfloat adjY[2] = { 0.0f, 0.0f }; 6036 boolean invert = FALSE; 6037 6038 /* Query the pixel center conventions supported by the pipe driver and set 6039 * adjX, adjY to help out if it cannot handle the requested one internally. 6040 * 6041 * The bias of the y-coordinate depends on whether y-inversion takes place 6042 * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are 6043 * drawing to an FBO (causes additional inversion), and whether the pipe 6044 * driver origin and the requested origin differ (the latter condition is 6045 * stored in the 'invert' variable). 6046 * 6047 * For height = 100 (i = integer, h = half-integer, l = lower, u = upper): 6048 * 6049 * center shift only: 6050 * i -> h: +0.5 6051 * h -> i: -0.5 6052 * 6053 * inversion only: 6054 * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99 6055 * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5 6056 * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0 6057 * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5 6058 * 6059 * inversion and center shift: 6060 * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5 6061 * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99 6062 * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5 6063 * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0 6064 */ 6065 if (program->OriginUpperLeft) { 6066 /* Fragment shader wants origin in upper-left */ 6067 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { 6068 /* the driver supports upper-left origin */ 6069 } 6070 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { 6071 /* the driver supports lower-left origin, need to invert Y */ 6072 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, 6073 TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 6074 invert = TRUE; 6075 } 6076 else 6077 assert(0); 6078 } 6079 else { 6080 /* Fragment shader wants origin in lower-left */ 6081 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) 6082 /* the driver supports lower-left origin */ 6083 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, 6084 TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 6085 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) 6086 /* the driver supports upper-left origin, need to invert Y */ 6087 invert = TRUE; 6088 else 6089 assert(0); 6090 } 6091 6092 if (program->PixelCenterInteger) { 6093 /* Fragment shader wants pixel center integer */ 6094 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 6095 /* the driver supports pixel center integer */ 6096 adjY[1] = 1.0f; 6097 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, 6098 TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 6099 } 6100 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 6101 /* the driver supports pixel center half integer, need to bias X,Y */ 6102 adjX = -0.5f; 6103 adjY[0] = -0.5f; 6104 adjY[1] = 0.5f; 6105 } 6106 else 6107 assert(0); 6108 } 6109 else { 6110 /* Fragment shader wants pixel center half integer */ 6111 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 6112 /* the driver supports pixel center half integer */ 6113 } 6114 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 6115 /* the driver supports pixel center integer, need to bias X,Y */ 6116 adjX = adjY[0] = adjY[1] = 0.5f; 6117 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, 6118 TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 6119 } 6120 else 6121 assert(0); 6122 } 6123 6124 /* we invert after adjustment so that we avoid the MOV to temporary, 6125 * and reuse the adjustment ADD instead */ 6126 emit_wpos_adjustment(st->ctx, t, wpos_transform_const, invert, adjX, adjY); 6127 } 6128 6129 /** 6130 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. 6131 * TGSI uses +1 for front, -1 for back. 6132 * This function converts the TGSI value to the GL value. Simply clamping/ 6133 * saturating the value to [0,1] does the job. 6134 */ 6135 static void 6136 emit_face_var(struct gl_context *ctx, struct st_translate *t) 6137 { 6138 struct ureg_program *ureg = t->ureg; 6139 struct ureg_dst face_temp = ureg_DECL_temporary(ureg); 6140 struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]]; 6141 6142 if (ctx->Const.NativeIntegers) { 6143 ureg_FSGE(ureg, face_temp, face_input, ureg_imm1f(ureg, 0)); 6144 } 6145 else { 6146 /* MOV_SAT face_temp, input[face] */ 6147 ureg_MOV(ureg, ureg_saturate(face_temp), face_input); 6148 } 6149 6150 /* Use face_temp as face input from here on: */ 6151 t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp); 6152 } 6153 6154 static void 6155 emit_compute_block_size(const struct gl_program *prog, 6156 struct ureg_program *ureg) { 6157 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH, 6158 prog->info.cs.local_size[0]); 6159 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT, 6160 prog->info.cs.local_size[1]); 6161 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH, 6162 prog->info.cs.local_size[2]); 6163 } 6164 6165 struct sort_inout_decls { 6166 bool operator()(const struct inout_decl &a, const struct inout_decl &b) const { 6167 return mapping[a.mesa_index] < mapping[b.mesa_index]; 6168 } 6169 6170 const ubyte *mapping; 6171 }; 6172 6173 /* Sort the given array of decls by the corresponding slot (TGSI file index). 6174 * 6175 * This is for the benefit of older drivers which are broken when the 6176 * declarations aren't sorted in this way. 6177 */ 6178 static void 6179 sort_inout_decls_by_slot(struct inout_decl *decls, 6180 unsigned count, 6181 const ubyte mapping[]) 6182 { 6183 sort_inout_decls sorter; 6184 sorter.mapping = mapping; 6185 std::sort(decls, decls + count, sorter); 6186 } 6187 6188 static unsigned 6189 st_translate_interp(enum glsl_interp_mode glsl_qual, GLuint varying) 6190 { 6191 switch (glsl_qual) { 6192 case INTERP_MODE_NONE: 6193 if (varying == VARYING_SLOT_COL0 || varying == VARYING_SLOT_COL1) 6194 return TGSI_INTERPOLATE_COLOR; 6195 return TGSI_INTERPOLATE_PERSPECTIVE; 6196 case INTERP_MODE_SMOOTH: 6197 return TGSI_INTERPOLATE_PERSPECTIVE; 6198 case INTERP_MODE_FLAT: 6199 return TGSI_INTERPOLATE_CONSTANT; 6200 case INTERP_MODE_NOPERSPECTIVE: 6201 return TGSI_INTERPOLATE_LINEAR; 6202 default: 6203 assert(0 && "unexpected interp mode in st_translate_interp()"); 6204 return TGSI_INTERPOLATE_PERSPECTIVE; 6205 } 6206 } 6207 6208 /** 6209 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. 6210 * \param program the program to translate 6211 * \param numInputs number of input registers used 6212 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 6213 * input indexes 6214 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 6215 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 6216 * each input 6217 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 6218 * \param numOutputs number of output registers used 6219 * \param outputMapping maps Mesa fragment program outputs to TGSI 6220 * generic outputs 6221 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 6222 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 6223 * each output 6224 * 6225 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY 6226 */ 6227 extern "C" enum pipe_error 6228 st_translate_program( 6229 struct gl_context *ctx, 6230 uint procType, 6231 struct ureg_program *ureg, 6232 glsl_to_tgsi_visitor *program, 6233 const struct gl_program *proginfo, 6234 GLuint numInputs, 6235 const ubyte inputMapping[], 6236 const ubyte inputSlotToAttr[], 6237 const ubyte inputSemanticName[], 6238 const ubyte inputSemanticIndex[], 6239 const ubyte interpMode[], 6240 GLuint numOutputs, 6241 const ubyte outputMapping[], 6242 const ubyte outputSemanticName[], 6243 const ubyte outputSemanticIndex[]) 6244 { 6245 struct pipe_screen *screen = st_context(ctx)->pipe->screen; 6246 struct st_translate *t; 6247 unsigned i; 6248 struct gl_program_constants *frag_const = 6249 &ctx->Const.Program[MESA_SHADER_FRAGMENT]; 6250 enum pipe_error ret = PIPE_OK; 6251 6252 assert(numInputs <= ARRAY_SIZE(t->inputs)); 6253 assert(numOutputs <= ARRAY_SIZE(t->outputs)); 6254 6255 ASSERT_BITFIELD_SIZE(st_src_reg, type, GLSL_TYPE_ERROR); 6256 ASSERT_BITFIELD_SIZE(st_dst_reg, type, GLSL_TYPE_ERROR); 6257 ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, tex_type, GLSL_TYPE_ERROR); 6258 ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, image_format, PIPE_FORMAT_COUNT); 6259 ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, tex_target, 6260 (gl_texture_index) (NUM_TEXTURE_TARGETS - 1)); 6261 ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, image_format, 6262 (enum pipe_format) (PIPE_FORMAT_COUNT - 1)); 6263 ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, op, TGSI_OPCODE_LAST - 1); 6264 6265 t = CALLOC_STRUCT(st_translate); 6266 if (!t) { 6267 ret = PIPE_ERROR_OUT_OF_MEMORY; 6268 goto out; 6269 } 6270 6271 t->procType = procType; 6272 t->need_uarl = !screen->get_param(screen, PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS); 6273 t->inputMapping = inputMapping; 6274 t->outputMapping = outputMapping; 6275 t->ureg = ureg; 6276 t->num_temp_arrays = program->next_array; 6277 if (t->num_temp_arrays) 6278 t->arrays = (struct ureg_dst*) 6279 calloc(t->num_temp_arrays, sizeof(t->arrays[0])); 6280 6281 /* 6282 * Declare input attributes. 6283 */ 6284 switch (procType) { 6285 case PIPE_SHADER_FRAGMENT: 6286 case PIPE_SHADER_GEOMETRY: 6287 case PIPE_SHADER_TESS_EVAL: 6288 case PIPE_SHADER_TESS_CTRL: 6289 sort_inout_decls_by_slot(program->inputs, program->num_inputs, inputMapping); 6290 6291 for (i = 0; i < program->num_inputs; ++i) { 6292 struct inout_decl *decl = &program->inputs[i]; 6293 unsigned slot = inputMapping[decl->mesa_index]; 6294 struct ureg_src src; 6295 ubyte tgsi_usage_mask = decl->usage_mask; 6296 6297 if (glsl_base_type_is_64bit(decl->base_type)) { 6298 if (tgsi_usage_mask == 1) 6299 tgsi_usage_mask = TGSI_WRITEMASK_XY; 6300 else if (tgsi_usage_mask == 2) 6301 tgsi_usage_mask = TGSI_WRITEMASK_ZW; 6302 else 6303 tgsi_usage_mask = TGSI_WRITEMASK_XYZW; 6304 } 6305 6306 unsigned interp_mode = 0; 6307 unsigned interp_location = 0; 6308 if (procType == PIPE_SHADER_FRAGMENT) { 6309 assert(interpMode); 6310 interp_mode = interpMode[slot] != TGSI_INTERPOLATE_COUNT ? 6311 interpMode[slot] : 6312 st_translate_interp(decl->interp, inputSlotToAttr[slot]); 6313 6314 interp_location = decl->interp_loc; 6315 } 6316 6317 src = ureg_DECL_fs_input_cyl_centroid_layout(ureg, 6318 inputSemanticName[slot], inputSemanticIndex[slot], 6319 interp_mode, 0, interp_location, slot, tgsi_usage_mask, 6320 decl->array_id, decl->size); 6321 6322 for (unsigned j = 0; j < decl->size; ++j) { 6323 if (t->inputs[slot + j].File != TGSI_FILE_INPUT) { 6324 /* The ArrayID is set up in dst_register */ 6325 t->inputs[slot + j] = src; 6326 t->inputs[slot + j].ArrayID = 0; 6327 t->inputs[slot + j].Index += j; 6328 } 6329 } 6330 } 6331 break; 6332 case PIPE_SHADER_VERTEX: 6333 for (i = 0; i < numInputs; i++) { 6334 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 6335 } 6336 break; 6337 case PIPE_SHADER_COMPUTE: 6338 break; 6339 default: 6340 assert(0); 6341 } 6342 6343 /* 6344 * Declare output attributes. 6345 */ 6346 switch (procType) { 6347 case PIPE_SHADER_FRAGMENT: 6348 case PIPE_SHADER_COMPUTE: 6349 break; 6350 case PIPE_SHADER_GEOMETRY: 6351 case PIPE_SHADER_TESS_EVAL: 6352 case PIPE_SHADER_TESS_CTRL: 6353 case PIPE_SHADER_VERTEX: 6354 sort_inout_decls_by_slot(program->outputs, program->num_outputs, outputMapping); 6355 6356 for (i = 0; i < program->num_outputs; ++i) { 6357 struct inout_decl *decl = &program->outputs[i]; 6358 unsigned slot = outputMapping[decl->mesa_index]; 6359 struct ureg_dst dst; 6360 ubyte tgsi_usage_mask = decl->usage_mask; 6361 6362 if (glsl_base_type_is_64bit(decl->base_type)) { 6363 if (tgsi_usage_mask == 1) 6364 tgsi_usage_mask = TGSI_WRITEMASK_XY; 6365 else if (tgsi_usage_mask == 2) 6366 tgsi_usage_mask = TGSI_WRITEMASK_ZW; 6367 else 6368 tgsi_usage_mask = TGSI_WRITEMASK_XYZW; 6369 } 6370 6371 dst = ureg_DECL_output_layout(ureg, 6372 outputSemanticName[slot], outputSemanticIndex[slot], 6373 decl->gs_out_streams, 6374 slot, tgsi_usage_mask, decl->array_id, decl->size); 6375 6376 for (unsigned j = 0; j < decl->size; ++j) { 6377 if (t->outputs[slot + j].File != TGSI_FILE_OUTPUT) { 6378 /* The ArrayID is set up in dst_register */ 6379 t->outputs[slot + j] = dst; 6380 t->outputs[slot + j].ArrayID = 0; 6381 t->outputs[slot + j].Index += j; 6382 } 6383 } 6384 } 6385 break; 6386 default: 6387 assert(0); 6388 } 6389 6390 if (procType == PIPE_SHADER_FRAGMENT) { 6391 if (program->shader->Program->info.fs.early_fragment_tests || 6392 program->shader->Program->info.fs.post_depth_coverage) { 6393 ureg_property(ureg, TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL, 1); 6394 6395 if (program->shader->Program->info.fs.post_depth_coverage) 6396 ureg_property(ureg, TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE, 1); 6397 } 6398 6399 if (proginfo->info.inputs_read & VARYING_BIT_POS) { 6400 /* Must do this after setting up t->inputs. */ 6401 emit_wpos(st_context(ctx), t, proginfo, ureg, 6402 program->wpos_transform_const); 6403 } 6404 6405 if (proginfo->info.inputs_read & VARYING_BIT_FACE) 6406 emit_face_var(ctx, t); 6407 6408 for (i = 0; i < numOutputs; i++) { 6409 switch (outputSemanticName[i]) { 6410 case TGSI_SEMANTIC_POSITION: 6411 t->outputs[i] = ureg_DECL_output(ureg, 6412 TGSI_SEMANTIC_POSITION, /* Z/Depth */ 6413 outputSemanticIndex[i]); 6414 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z); 6415 break; 6416 case TGSI_SEMANTIC_STENCIL: 6417 t->outputs[i] = ureg_DECL_output(ureg, 6418 TGSI_SEMANTIC_STENCIL, /* Stencil */ 6419 outputSemanticIndex[i]); 6420 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y); 6421 break; 6422 case TGSI_SEMANTIC_COLOR: 6423 t->outputs[i] = ureg_DECL_output(ureg, 6424 TGSI_SEMANTIC_COLOR, 6425 outputSemanticIndex[i]); 6426 break; 6427 case TGSI_SEMANTIC_SAMPLEMASK: 6428 t->outputs[i] = ureg_DECL_output(ureg, 6429 TGSI_SEMANTIC_SAMPLEMASK, 6430 outputSemanticIndex[i]); 6431 /* TODO: If we ever support more than 32 samples, this will have 6432 * to become an array. 6433 */ 6434 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X); 6435 break; 6436 default: 6437 assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); 6438 ret = PIPE_ERROR_BAD_INPUT; 6439 goto out; 6440 } 6441 } 6442 } 6443 else if (procType == PIPE_SHADER_VERTEX) { 6444 for (i = 0; i < numOutputs; i++) { 6445 if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) { 6446 /* force register to contain a fog coordinate in the form (F, 0, 0, 1). */ 6447 ureg_MOV(ureg, 6448 ureg_writemask(t->outputs[i], TGSI_WRITEMASK_YZW), 6449 ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f)); 6450 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X); 6451 } 6452 } 6453 } 6454 6455 if (procType == PIPE_SHADER_COMPUTE) { 6456 emit_compute_block_size(proginfo, ureg); 6457 } 6458 6459 /* Declare address register. 6460 */ 6461 if (program->num_address_regs > 0) { 6462 assert(program->num_address_regs <= 3); 6463 for (int i = 0; i < program->num_address_regs; i++) 6464 t->address[i] = ureg_DECL_address(ureg); 6465 } 6466 6467 /* Declare misc input registers 6468 */ 6469 { 6470 GLbitfield sysInputs = proginfo->info.system_values_read; 6471 6472 for (i = 0; sysInputs; i++) { 6473 if (sysInputs & (1 << i)) { 6474 unsigned semName = _mesa_sysval_to_semantic(i); 6475 6476 t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0); 6477 6478 if (semName == TGSI_SEMANTIC_INSTANCEID || 6479 semName == TGSI_SEMANTIC_VERTEXID) { 6480 /* From Gallium perspective, these system values are always 6481 * integer, and require native integer support. However, if 6482 * native integer is supported on the vertex stage but not the 6483 * pixel stage (e.g, i915g + draw), Mesa will generate IR that 6484 * assumes these system values are floats. To resolve the 6485 * inconsistency, we insert a U2F. 6486 */ 6487 struct st_context *st = st_context(ctx); 6488 struct pipe_screen *pscreen = st->pipe->screen; 6489 assert(procType == PIPE_SHADER_VERTEX); 6490 assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS)); 6491 (void) pscreen; 6492 if (!ctx->Const.NativeIntegers) { 6493 struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg); 6494 ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]); 6495 t->systemValues[i] = ureg_scalar(ureg_src(temp), 0); 6496 } 6497 } 6498 6499 if (procType == PIPE_SHADER_FRAGMENT && 6500 semName == TGSI_SEMANTIC_POSITION) 6501 emit_wpos(st_context(ctx), t, proginfo, ureg, 6502 program->wpos_transform_const); 6503 6504 sysInputs &= ~(1 << i); 6505 } 6506 } 6507 } 6508 6509 t->array_sizes = program->array_sizes; 6510 t->input_decls = program->inputs; 6511 t->num_input_decls = program->num_inputs; 6512 t->output_decls = program->outputs; 6513 t->num_output_decls = program->num_outputs; 6514 6515 /* Emit constants and uniforms. TGSI uses a single index space for these, 6516 * so we put all the translated regs in t->constants. 6517 */ 6518 if (proginfo->Parameters) { 6519 t->constants = (struct ureg_src *) 6520 calloc(proginfo->Parameters->NumParameters, sizeof(t->constants[0])); 6521 if (t->constants == NULL) { 6522 ret = PIPE_ERROR_OUT_OF_MEMORY; 6523 goto out; 6524 } 6525 t->num_constants = proginfo->Parameters->NumParameters; 6526 6527 for (i = 0; i < proginfo->Parameters->NumParameters; i++) { 6528 switch (proginfo->Parameters->Parameters[i].Type) { 6529 case PROGRAM_STATE_VAR: 6530 case PROGRAM_UNIFORM: 6531 t->constants[i] = ureg_DECL_constant(ureg, i); 6532 break; 6533 6534 /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect 6535 * addressing of the const buffer. 6536 * FIXME: Be smarter and recognize param arrays: 6537 * indirect addressing is only valid within the referenced 6538 * array. 6539 */ 6540 case PROGRAM_CONSTANT: 6541 if (program->indirect_addr_consts) 6542 t->constants[i] = ureg_DECL_constant(ureg, i); 6543 else 6544 t->constants[i] = emit_immediate(t, 6545 proginfo->Parameters->ParameterValues[i], 6546 proginfo->Parameters->Parameters[i].DataType, 6547 4); 6548 break; 6549 default: 6550 break; 6551 } 6552 } 6553 } 6554 6555 for (i = 0; i < proginfo->info.num_ubos; i++) { 6556 unsigned size = proginfo->sh.UniformBlocks[i]->UniformBufferSize; 6557 unsigned num_const_vecs = (size + 15) / 16; 6558 unsigned first, last; 6559 assert(num_const_vecs > 0); 6560 first = 0; 6561 last = num_const_vecs > 0 ? num_const_vecs - 1 : 0; 6562 ureg_DECL_constant2D(t->ureg, first, last, i + 1); 6563 } 6564 6565 /* Emit immediate values. 6566 */ 6567 t->immediates = (struct ureg_src *) 6568 calloc(program->num_immediates, sizeof(struct ureg_src)); 6569 if (t->immediates == NULL) { 6570 ret = PIPE_ERROR_OUT_OF_MEMORY; 6571 goto out; 6572 } 6573 t->num_immediates = program->num_immediates; 6574 6575 i = 0; 6576 foreach_in_list(immediate_storage, imm, &program->immediates) { 6577 assert(i < program->num_immediates); 6578 t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size32); 6579 } 6580 assert(i == program->num_immediates); 6581 6582 /* texture samplers */ 6583 for (i = 0; i < frag_const->MaxTextureImageUnits; i++) { 6584 if (program->samplers_used & (1u << i)) { 6585 enum tgsi_return_type type = 6586 st_translate_texture_type(program->sampler_types[i]); 6587 6588 t->samplers[i] = ureg_DECL_sampler(ureg, i); 6589 6590 ureg_DECL_sampler_view( ureg, i, program->sampler_targets[i], 6591 type, type, type, type ); 6592 } 6593 } 6594 6595 /* Declare atomic and shader storage buffers. */ 6596 { 6597 struct gl_program *prog = program->prog; 6598 6599 if (!st_context(ctx)->has_hw_atomics) { 6600 for (i = 0; i < prog->info.num_abos; i++) { 6601 unsigned index = prog->sh.AtomicBuffers[i]->Binding; 6602 assert(index < frag_const->MaxAtomicBuffers); 6603 t->buffers[index] = ureg_DECL_buffer(ureg, index, true); 6604 } 6605 } else { 6606 for (i = 0; i < program->num_atomics; i++) { 6607 struct hwatomic_decl *ainfo = &program->atomic_info[i]; 6608 gl_uniform_storage *uni_storage = &prog->sh.data->UniformStorage[ainfo->location]; 6609 int base = uni_storage->offset / ATOMIC_COUNTER_SIZE; 6610 ureg_DECL_hw_atomic(ureg, base, base + ainfo->size - 1, ainfo->binding, 6611 ainfo->array_id); 6612 } 6613 } 6614 6615 assert(prog->info.num_ssbos <= frag_const->MaxShaderStorageBlocks); 6616 for (i = 0; i < prog->info.num_ssbos; i++) { 6617 unsigned index = i; 6618 if (!st_context(ctx)->has_hw_atomics) 6619 index += frag_const->MaxAtomicBuffers; 6620 6621 t->buffers[index] = ureg_DECL_buffer(ureg, index, false); 6622 } 6623 } 6624 6625 if (program->use_shared_memory) 6626 t->shared_memory = ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED); 6627 6628 for (i = 0; i < program->shader->Program->info.num_images; i++) { 6629 if (program->images_used & (1 << i)) { 6630 t->images[i] = ureg_DECL_image(ureg, i, 6631 program->image_targets[i], 6632 program->image_formats[i], 6633 true, false); 6634 } 6635 } 6636 6637 /* Emit each instruction in turn: 6638 */ 6639 foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) 6640 compile_tgsi_instruction(t, inst); 6641 6642 /* Set the next shader stage hint for VS and TES. */ 6643 switch (procType) { 6644 case PIPE_SHADER_VERTEX: 6645 case PIPE_SHADER_TESS_EVAL: 6646 if (program->shader_program->SeparateShader) 6647 break; 6648 6649 for (i = program->shader->Stage+1; i <= MESA_SHADER_FRAGMENT; i++) { 6650 if (program->shader_program->_LinkedShaders[i]) { 6651 ureg_set_next_shader_processor( 6652 ureg, pipe_shader_type_from_mesa((gl_shader_stage)i)); 6653 break; 6654 } 6655 } 6656 break; 6657 } 6658 6659 out: 6660 if (t) { 6661 free(t->arrays); 6662 free(t->temps); 6663 free(t->constants); 6664 t->num_constants = 0; 6665 free(t->immediates); 6666 t->num_immediates = 0; 6667 FREE(t); 6668 } 6669 6670 return ret; 6671 } 6672 /* ----------------------------- End TGSI code ------------------------------ */ 6673 6674 6675 /** 6676 * Convert a shader's GLSL IR into a Mesa gl_program, although without 6677 * generating Mesa IR. 6678 */ 6679 static struct gl_program * 6680 get_mesa_program_tgsi(struct gl_context *ctx, 6681 struct gl_shader_program *shader_program, 6682 struct gl_linked_shader *shader) 6683 { 6684 glsl_to_tgsi_visitor* v; 6685 struct gl_program *prog; 6686 struct gl_shader_compiler_options *options = 6687 &ctx->Const.ShaderCompilerOptions[shader->Stage]; 6688 struct pipe_screen *pscreen = ctx->st->pipe->screen; 6689 enum pipe_shader_type ptarget = pipe_shader_type_from_mesa(shader->Stage); 6690 unsigned skip_merge_registers; 6691 6692 validate_ir_tree(shader->ir); 6693 6694 prog = shader->Program; 6695 6696 prog->Parameters = _mesa_new_parameter_list(); 6697 v = new glsl_to_tgsi_visitor(); 6698 v->ctx = ctx; 6699 v->prog = prog; 6700 v->shader_program = shader_program; 6701 v->shader = shader; 6702 v->options = options; 6703 v->native_integers = ctx->Const.NativeIntegers; 6704 6705 v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget, 6706 PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED); 6707 v->have_fma = pscreen->get_shader_param(pscreen, ptarget, 6708 PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED); 6709 v->has_tex_txf_lz = pscreen->get_param(pscreen, 6710 PIPE_CAP_TGSI_TEX_TXF_LZ); 6711 v->need_uarl = !pscreen->get_param(pscreen, PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS); 6712 6713 v->variables = _mesa_hash_table_create(v->mem_ctx, _mesa_hash_pointer, 6714 _mesa_key_pointer_equal); 6715 skip_merge_registers = 6716 pscreen->get_shader_param(pscreen, ptarget, 6717 PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS); 6718 6719 _mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader, 6720 prog->Parameters); 6721 6722 /* Remove reads from output registers. */ 6723 if (!pscreen->get_param(pscreen, PIPE_CAP_TGSI_CAN_READ_OUTPUTS)) 6724 lower_output_reads(shader->Stage, shader->ir); 6725 6726 /* Emit intermediate IR for main(). */ 6727 visit_exec_list(shader->ir, v); 6728 6729 #if 0 6730 /* Print out some information (for debugging purposes) used by the 6731 * optimization passes. */ 6732 { 6733 int i; 6734 int *first_writes = ralloc_array(v->mem_ctx, int, v->next_temp); 6735 int *first_reads = ralloc_array(v->mem_ctx, int, v->next_temp); 6736 int *last_writes = ralloc_array(v->mem_ctx, int, v->next_temp); 6737 int *last_reads = ralloc_array(v->mem_ctx, int, v->next_temp); 6738 6739 for (i = 0; i < v->next_temp; i++) { 6740 first_writes[i] = -1; 6741 first_reads[i] = -1; 6742 last_writes[i] = -1; 6743 last_reads[i] = -1; 6744 } 6745 v->get_first_temp_read(first_reads); 6746 v->get_last_temp_read_first_temp_write(last_reads, first_writes); 6747 v->get_last_temp_write(last_writes); 6748 for (i = 0; i < v->next_temp; i++) 6749 printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, first_reads[i], 6750 first_writes[i], 6751 last_reads[i], 6752 last_writes[i]); 6753 ralloc_free(first_writes); 6754 ralloc_free(first_reads); 6755 ralloc_free(last_writes); 6756 ralloc_free(last_reads); 6757 } 6758 #endif 6759 6760 /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ 6761 v->simplify_cmp(); 6762 v->copy_propagate(); 6763 6764 while (v->eliminate_dead_code()); 6765 6766 v->merge_two_dsts(); 6767 if (!skip_merge_registers) 6768 v->merge_registers(); 6769 v->renumber_registers(); 6770 6771 /* Write the END instruction. */ 6772 v->emit_asm(NULL, TGSI_OPCODE_END); 6773 6774 if (ctx->_Shader->Flags & GLSL_DUMP) { 6775 _mesa_log("\n"); 6776 _mesa_log("GLSL IR for linked %s program %d:\n", 6777 _mesa_shader_stage_to_string(shader->Stage), 6778 shader_program->Name); 6779 _mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL); 6780 _mesa_log("\n\n"); 6781 } 6782 6783 do_set_program_inouts(shader->ir, prog, shader->Stage); 6784 _mesa_copy_linked_program_data(shader_program, shader); 6785 shrink_array_declarations(v->inputs, v->num_inputs, 6786 &prog->info.inputs_read, 6787 prog->info.double_inputs_read, 6788 &prog->info.patch_inputs_read); 6789 shrink_array_declarations(v->outputs, v->num_outputs, 6790 &prog->info.outputs_written, 0ULL, 6791 &prog->info.patch_outputs_written); 6792 count_resources(v, prog); 6793 6794 /* The GLSL IR won't be needed anymore. */ 6795 ralloc_free(shader->ir); 6796 shader->ir = NULL; 6797 6798 /* This must be done before the uniform storage is associated. */ 6799 if (shader->Stage == MESA_SHADER_FRAGMENT && 6800 (prog->info.inputs_read & VARYING_BIT_POS || 6801 prog->info.system_values_read & (1 << SYSTEM_VALUE_FRAG_COORD))) { 6802 static const gl_state_index wposTransformState[STATE_LENGTH] = { 6803 STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM 6804 }; 6805 6806 v->wpos_transform_const = _mesa_add_state_reference(prog->Parameters, 6807 wposTransformState); 6808 } 6809 6810 /* Avoid reallocation of the program parameter list, because the uniform 6811 * storage is only associated with the original parameter list. 6812 * This should be enough for Bitmap and DrawPixels constants. 6813 */ 6814 _mesa_reserve_parameter_storage(prog->Parameters, 8); 6815 6816 /* This has to be done last. Any operation the can cause 6817 * prog->ParameterValues to get reallocated (e.g., anything that adds a 6818 * program constant) has to happen before creating this linkage. 6819 */ 6820 _mesa_associate_uniform_storage(ctx, shader_program, prog, true); 6821 if (!shader_program->data->LinkStatus) { 6822 free_glsl_to_tgsi_visitor(v); 6823 _mesa_reference_program(ctx, &shader->Program, NULL); 6824 return NULL; 6825 } 6826 6827 struct st_vertex_program *stvp; 6828 struct st_fragment_program *stfp; 6829 struct st_common_program *stp; 6830 struct st_compute_program *stcp; 6831 6832 switch (shader->Stage) { 6833 case MESA_SHADER_VERTEX: 6834 stvp = (struct st_vertex_program *)prog; 6835 stvp->glsl_to_tgsi = v; 6836 break; 6837 case MESA_SHADER_FRAGMENT: 6838 stfp = (struct st_fragment_program *)prog; 6839 stfp->glsl_to_tgsi = v; 6840 break; 6841 case MESA_SHADER_TESS_CTRL: 6842 case MESA_SHADER_TESS_EVAL: 6843 case MESA_SHADER_GEOMETRY: 6844 stp = st_common_program(prog); 6845 stp->glsl_to_tgsi = v; 6846 break; 6847 case MESA_SHADER_COMPUTE: 6848 stcp = (struct st_compute_program *)prog; 6849 stcp->glsl_to_tgsi = v; 6850 break; 6851 default: 6852 assert(!"should not be reached"); 6853 return NULL; 6854 } 6855 6856 return prog; 6857 } 6858 6859 /* See if there are unsupported control flow statements. */ 6860 class ir_control_flow_info_visitor : public ir_hierarchical_visitor { 6861 private: 6862 const struct gl_shader_compiler_options *options; 6863 public: 6864 ir_control_flow_info_visitor(const struct gl_shader_compiler_options *options) 6865 : options(options), 6866 unsupported(false) 6867 { 6868 } 6869 6870 virtual ir_visitor_status visit_enter(ir_function *ir) 6871 { 6872 /* Other functions are skipped (same as glsl_to_tgsi). */ 6873 if (strcmp(ir->name, "main") == 0) 6874 return visit_continue; 6875 6876 return visit_continue_with_parent; 6877 } 6878 6879 virtual ir_visitor_status visit_enter(ir_call *ir) 6880 { 6881 if (!ir->callee->is_intrinsic()) { 6882 unsupported = true; /* it's a function call */ 6883 return visit_stop; 6884 } 6885 return visit_continue; 6886 } 6887 6888 virtual ir_visitor_status visit_enter(ir_return *ir) 6889 { 6890 if (options->EmitNoMainReturn) { 6891 unsupported = true; 6892 return visit_stop; 6893 } 6894 return visit_continue; 6895 } 6896 6897 bool unsupported; 6898 }; 6899 6900 static bool 6901 has_unsupported_control_flow(exec_list *ir, 6902 const struct gl_shader_compiler_options *options) 6903 { 6904 ir_control_flow_info_visitor visitor(options); 6905 visit_list_elements(&visitor, ir); 6906 return visitor.unsupported; 6907 } 6908 6909 extern "C" { 6910 6911 /** 6912 * Link a shader. 6913 * Called via ctx->Driver.LinkShader() 6914 * This actually involves converting GLSL IR into an intermediate TGSI-like IR 6915 * with code lowering and other optimizations. 6916 */ 6917 GLboolean 6918 st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 6919 { 6920 /* Return early if we are loading the shader from on-disk cache */ 6921 if (st_load_tgsi_from_disk_cache(ctx, prog)) { 6922 return GL_TRUE; 6923 } 6924 6925 struct pipe_screen *pscreen = ctx->st->pipe->screen; 6926 assert(prog->data->LinkStatus); 6927 6928 bool use_nir = false; 6929 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 6930 if (prog->_LinkedShaders[i] == NULL) 6931 continue; 6932 6933 struct gl_linked_shader *shader = prog->_LinkedShaders[i]; 6934 exec_list *ir = shader->ir; 6935 gl_shader_stage stage = shader->Stage; 6936 const struct gl_shader_compiler_options *options = 6937 &ctx->Const.ShaderCompilerOptions[stage]; 6938 enum pipe_shader_type ptarget = pipe_shader_type_from_mesa(stage); 6939 bool have_dround = pscreen->get_shader_param(pscreen, ptarget, 6940 PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED); 6941 bool have_dfrexp = pscreen->get_shader_param(pscreen, ptarget, 6942 PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED); 6943 bool have_ldexp = pscreen->get_shader_param(pscreen, ptarget, 6944 PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED); 6945 unsigned if_threshold = pscreen->get_shader_param(pscreen, ptarget, 6946 PIPE_SHADER_CAP_LOWER_IF_THRESHOLD); 6947 6948 enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir) 6949 pscreen->get_shader_param(pscreen, ptarget, 6950 PIPE_SHADER_CAP_PREFERRED_IR); 6951 if (preferred_ir == PIPE_SHADER_IR_NIR) 6952 use_nir = true; 6953 6954 /* If there are forms of indirect addressing that the driver 6955 * cannot handle, perform the lowering pass. 6956 */ 6957 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput || 6958 options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) { 6959 lower_variable_index_to_cond_assign(stage, ir, 6960 options->EmitNoIndirectInput, 6961 options->EmitNoIndirectOutput, 6962 options->EmitNoIndirectTemp, 6963 options->EmitNoIndirectUniform); 6964 } 6965 6966 if (!pscreen->get_param(pscreen, PIPE_CAP_INT64_DIVMOD)) 6967 lower_64bit_integer_instructions(ir, DIV64 | MOD64); 6968 6969 if (ctx->Extensions.ARB_shading_language_packing) { 6970 unsigned lower_inst = LOWER_PACK_SNORM_2x16 | 6971 LOWER_UNPACK_SNORM_2x16 | 6972 LOWER_PACK_UNORM_2x16 | 6973 LOWER_UNPACK_UNORM_2x16 | 6974 LOWER_PACK_SNORM_4x8 | 6975 LOWER_UNPACK_SNORM_4x8 | 6976 LOWER_UNPACK_UNORM_4x8 | 6977 LOWER_PACK_UNORM_4x8; 6978 6979 if (ctx->Extensions.ARB_gpu_shader5) 6980 lower_inst |= LOWER_PACK_USE_BFI | 6981 LOWER_PACK_USE_BFE; 6982 if (!ctx->st->has_half_float_packing) 6983 lower_inst |= LOWER_PACK_HALF_2x16 | 6984 LOWER_UNPACK_HALF_2x16; 6985 6986 lower_packing_builtins(ir, lower_inst); 6987 } 6988 6989 if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS)) 6990 lower_offset_arrays(ir); 6991 do_mat_op_to_vec(ir); 6992 6993 if (stage == MESA_SHADER_FRAGMENT) 6994 lower_blend_equation_advanced(shader); 6995 6996 lower_instructions(ir, 6997 MOD_TO_FLOOR | 6998 FDIV_TO_MUL_RCP | 6999 EXP_TO_EXP2 | 7000 LOG_TO_LOG2 | 7001 (have_ldexp ? 0 : LDEXP_TO_ARITH) | 7002 (have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) | 7003 CARRY_TO_ARITH | 7004 BORROW_TO_ARITH | 7005 (have_dround ? 0 : DOPS_TO_DFRAC) | 7006 (options->EmitNoPow ? POW_TO_EXP2 : 0) | 7007 (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) | 7008 (options->EmitNoSat ? SAT_TO_CLAMP : 0) | 7009 (ctx->Const.ForceGLSLAbsSqrt ? SQRT_TO_ABS_SQRT : 0) | 7010 /* Assume that if ARB_gpu_shader5 is not supported 7011 * then all of the extended integer functions need 7012 * lowering. It may be necessary to add some caps 7013 * for individual instructions. 7014 */ 7015 (!ctx->Extensions.ARB_gpu_shader5 7016 ? BIT_COUNT_TO_MATH | 7017 EXTRACT_TO_SHIFTS | 7018 INSERT_TO_SHIFTS | 7019 REVERSE_TO_SHIFTS | 7020 FIND_LSB_TO_FLOAT_CAST | 7021 FIND_MSB_TO_FLOAT_CAST | 7022 IMUL_HIGH_TO_MUL 7023 : 0)); 7024 7025 do_vec_index_to_cond_assign(ir); 7026 lower_vector_insert(ir, true); 7027 lower_quadop_vector(ir, false); 7028 lower_noise(ir); 7029 if (options->MaxIfDepth == 0) { 7030 lower_discard(ir); 7031 } 7032 7033 if (ctx->Const.GLSLOptimizeConservatively) { 7034 /* Do it once and repeat only if there's unsupported control flow. */ 7035 do { 7036 do_common_optimization(ir, true, true, options, 7037 ctx->Const.NativeIntegers); 7038 lower_if_to_cond_assign((gl_shader_stage)i, ir, 7039 options->MaxIfDepth, if_threshold); 7040 } while (has_unsupported_control_flow(ir, options)); 7041 } else { 7042 /* Repeat it until it stops making changes. */ 7043 bool progress; 7044 do { 7045 progress = do_common_optimization(ir, true, true, options, 7046 ctx->Const.NativeIntegers); 7047 progress |= lower_if_to_cond_assign((gl_shader_stage)i, ir, 7048 options->MaxIfDepth, if_threshold); 7049 } while (progress); 7050 } 7051 7052 /* Do this again to lower ir_binop_vector_extract introduced 7053 * by optimization passes. 7054 */ 7055 do_vec_index_to_cond_assign(ir); 7056 7057 validate_ir_tree(ir); 7058 } 7059 7060 build_program_resource_list(ctx, prog); 7061 7062 if (use_nir) 7063 return st_link_nir(ctx, prog); 7064 7065 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 7066 struct gl_linked_shader *shader = prog->_LinkedShaders[i]; 7067 if (shader == NULL) 7068 continue; 7069 7070 struct gl_program *linked_prog = 7071 get_mesa_program_tgsi(ctx, prog, shader); 7072 st_set_prog_affected_state_flags(linked_prog); 7073 7074 if (linked_prog) { 7075 if (!ctx->Driver.ProgramStringNotify(ctx, 7076 _mesa_shader_stage_to_program(i), 7077 linked_prog)) { 7078 _mesa_reference_program(ctx, &shader->Program, NULL); 7079 return GL_FALSE; 7080 } 7081 } 7082 } 7083 7084 return GL_TRUE; 7085 } 7086 7087 void 7088 st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi, 7089 const ubyte outputMapping[], 7090 struct pipe_stream_output_info *so) 7091 { 7092 if (!glsl_to_tgsi->shader_program->last_vert_prog) 7093 return; 7094 7095 struct gl_transform_feedback_info *info = 7096 glsl_to_tgsi->shader_program->last_vert_prog->sh.LinkedTransformFeedback; 7097 st_translate_stream_output_info2(info, outputMapping, so); 7098 } 7099 7100 void 7101 st_translate_stream_output_info2(struct gl_transform_feedback_info *info, 7102 const ubyte outputMapping[], 7103 struct pipe_stream_output_info *so) 7104 { 7105 unsigned i; 7106 7107 for (i = 0; i < info->NumOutputs; i++) { 7108 so->output[i].register_index = 7109 outputMapping[info->Outputs[i].OutputRegister]; 7110 so->output[i].start_component = info->Outputs[i].ComponentOffset; 7111 so->output[i].num_components = info->Outputs[i].NumComponents; 7112 so->output[i].output_buffer = info->Outputs[i].OutputBuffer; 7113 so->output[i].dst_offset = info->Outputs[i].DstOffset; 7114 so->output[i].stream = info->Outputs[i].StreamId; 7115 } 7116 7117 for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { 7118 so->stride[i] = info->Buffers[i].Stride; 7119 } 7120 so->num_outputs = info->NumOutputs; 7121 } 7122 7123 } /* extern "C" */ 7124