1 /* 2 * Copyright 2010 Jerome Glisse <glisse (at) freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 #include "r600_sq.h" 24 #include "r600_formats.h" 25 #include "r600_opcodes.h" 26 #include "r600_shader.h" 27 #include "r600d.h" 28 29 #include "sb/sb_public.h" 30 31 #include "pipe/p_shader_tokens.h" 32 #include "tgsi/tgsi_info.h" 33 #include "tgsi/tgsi_parse.h" 34 #include "tgsi/tgsi_scan.h" 35 #include "tgsi/tgsi_dump.h" 36 #include "util/u_bitcast.h" 37 #include "util/u_memory.h" 38 #include "util/u_math.h" 39 #include <stdio.h> 40 #include <errno.h> 41 42 /* CAYMAN notes 43 Why CAYMAN got loops for lots of instructions is explained here. 44 45 -These 8xx t-slot only ops are implemented in all vector slots. 46 MUL_LIT, FLT_TO_UINT, INT_TO_FLT, UINT_TO_FLT 47 These 8xx t-slot only opcodes become vector ops, with all four 48 slots expecting the arguments on sources a and b. Result is 49 broadcast to all channels. 50 MULLO_INT, MULHI_INT, MULLO_UINT, MULHI_UINT, MUL_64 51 These 8xx t-slot only opcodes become vector ops in the z, y, and 52 x slots. 53 EXP_IEEE, LOG_IEEE/CLAMPED, RECIP_IEEE/CLAMPED/FF/INT/UINT/_64/CLAMPED_64 54 RECIPSQRT_IEEE/CLAMPED/FF/_64/CLAMPED_64 55 SQRT_IEEE/_64 56 SIN/COS 57 The w slot may have an independent co-issued operation, or if the 58 result is required to be in the w slot, the opcode above may be 59 issued in the w slot as well. 60 The compiler must issue the source argument to slots z, y, and x 61 */ 62 63 /* Contents of r0 on entry to various shaders 64 65 VS - .x = VertexID 66 .y = RelVertexID (??) 67 .w = InstanceID 68 69 GS - r0.xyw, r1.xyz = per-vertex offsets 70 r0.z = PrimitiveID 71 72 TCS - .x = PatchID 73 .y = RelPatchID (??) 74 .z = InvocationID 75 .w = tess factor base. 76 77 TES - .x = TessCoord.x 78 - .y = TessCoord.y 79 - .z = RelPatchID (??) 80 - .w = PrimitiveID 81 82 PS - face_gpr.z = SampleMask 83 face_gpr.w = SampleID 84 */ 85 #define R600_SHADER_BUFFER_INFO_SEL (512 + R600_BUFFER_INFO_OFFSET / 16) 86 static int r600_shader_from_tgsi(struct r600_context *rctx, 87 struct r600_pipe_shader *pipeshader, 88 union r600_shader_key key); 89 90 static void r600_add_gpr_array(struct r600_shader *ps, int start_gpr, 91 int size, unsigned comp_mask) { 92 93 if (!size) 94 return; 95 96 if (ps->num_arrays == ps->max_arrays) { 97 ps->max_arrays += 64; 98 ps->arrays = realloc(ps->arrays, ps->max_arrays * 99 sizeof(struct r600_shader_array)); 100 } 101 102 int n = ps->num_arrays; 103 ++ps->num_arrays; 104 105 ps->arrays[n].comp_mask = comp_mask; 106 ps->arrays[n].gpr_start = start_gpr; 107 ps->arrays[n].gpr_count = size; 108 } 109 110 static void r600_dump_streamout(struct pipe_stream_output_info *so) 111 { 112 unsigned i; 113 114 fprintf(stderr, "STREAMOUT\n"); 115 for (i = 0; i < so->num_outputs; i++) { 116 unsigned mask = ((1 << so->output[i].num_components) - 1) << 117 so->output[i].start_component; 118 fprintf(stderr, " %i: MEM_STREAM%d_BUF%i[%i..%i] <- OUT[%i].%s%s%s%s%s\n", 119 i, 120 so->output[i].stream, 121 so->output[i].output_buffer, 122 so->output[i].dst_offset, so->output[i].dst_offset + so->output[i].num_components - 1, 123 so->output[i].register_index, 124 mask & 1 ? "x" : "", 125 mask & 2 ? "y" : "", 126 mask & 4 ? "z" : "", 127 mask & 8 ? "w" : "", 128 so->output[i].dst_offset < so->output[i].start_component ? " (will lower)" : ""); 129 } 130 } 131 132 static int store_shader(struct pipe_context *ctx, 133 struct r600_pipe_shader *shader) 134 { 135 struct r600_context *rctx = (struct r600_context *)ctx; 136 uint32_t *ptr, i; 137 138 if (shader->bo == NULL) { 139 shader->bo = (struct r600_resource*) 140 pipe_buffer_create(ctx->screen, 0, PIPE_USAGE_IMMUTABLE, shader->shader.bc.ndw * 4); 141 if (shader->bo == NULL) { 142 return -ENOMEM; 143 } 144 ptr = r600_buffer_map_sync_with_rings(&rctx->b, shader->bo, PIPE_TRANSFER_WRITE); 145 if (R600_BIG_ENDIAN) { 146 for (i = 0; i < shader->shader.bc.ndw; ++i) { 147 ptr[i] = util_cpu_to_le32(shader->shader.bc.bytecode[i]); 148 } 149 } else { 150 memcpy(ptr, shader->shader.bc.bytecode, shader->shader.bc.ndw * sizeof(*ptr)); 151 } 152 rctx->b.ws->buffer_unmap(shader->bo->buf); 153 } 154 155 return 0; 156 } 157 158 int r600_pipe_shader_create(struct pipe_context *ctx, 159 struct r600_pipe_shader *shader, 160 union r600_shader_key key) 161 { 162 struct r600_context *rctx = (struct r600_context *)ctx; 163 struct r600_pipe_shader_selector *sel = shader->selector; 164 int r; 165 bool dump = r600_can_dump_shader(&rctx->screen->b, 166 tgsi_get_processor_type(sel->tokens)); 167 unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB); 168 unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM); 169 unsigned export_shader; 170 171 shader->shader.bc.isa = rctx->isa; 172 173 if (dump) { 174 fprintf(stderr, "--------------------------------------------------------------\n"); 175 tgsi_dump(sel->tokens, 0); 176 177 if (sel->so.num_outputs) { 178 r600_dump_streamout(&sel->so); 179 } 180 } 181 r = r600_shader_from_tgsi(rctx, shader, key); 182 if (r) { 183 R600_ERR("translation from TGSI failed !\n"); 184 goto error; 185 } 186 if (shader->shader.processor_type == PIPE_SHADER_VERTEX) { 187 /* only disable for vertex shaders in tess paths */ 188 if (key.vs.as_ls) 189 use_sb = 0; 190 } 191 use_sb &= (shader->shader.processor_type != PIPE_SHADER_TESS_CTRL); 192 use_sb &= (shader->shader.processor_type != PIPE_SHADER_TESS_EVAL); 193 194 /* disable SB for shaders using doubles */ 195 use_sb &= !shader->shader.uses_doubles; 196 197 /* Check if the bytecode has already been built. */ 198 if (!shader->shader.bc.bytecode) { 199 r = r600_bytecode_build(&shader->shader.bc); 200 if (r) { 201 R600_ERR("building bytecode failed !\n"); 202 goto error; 203 } 204 } 205 206 if (dump && !sb_disasm) { 207 fprintf(stderr, "--------------------------------------------------------------\n"); 208 r600_bytecode_disasm(&shader->shader.bc); 209 fprintf(stderr, "______________________________________________________________\n"); 210 } else if ((dump && sb_disasm) || use_sb) { 211 r = r600_sb_bytecode_process(rctx, &shader->shader.bc, &shader->shader, 212 dump, use_sb); 213 if (r) { 214 R600_ERR("r600_sb_bytecode_process failed !\n"); 215 goto error; 216 } 217 } 218 219 if (shader->gs_copy_shader) { 220 if (dump) { 221 // dump copy shader 222 r = r600_sb_bytecode_process(rctx, &shader->gs_copy_shader->shader.bc, 223 &shader->gs_copy_shader->shader, dump, 0); 224 if (r) 225 goto error; 226 } 227 228 if ((r = store_shader(ctx, shader->gs_copy_shader))) 229 goto error; 230 } 231 232 /* Store the shader in a buffer. */ 233 if ((r = store_shader(ctx, shader))) 234 goto error; 235 236 /* Build state. */ 237 switch (shader->shader.processor_type) { 238 case PIPE_SHADER_TESS_CTRL: 239 evergreen_update_hs_state(ctx, shader); 240 break; 241 case PIPE_SHADER_TESS_EVAL: 242 if (key.tes.as_es) 243 evergreen_update_es_state(ctx, shader); 244 else 245 evergreen_update_vs_state(ctx, shader); 246 break; 247 case PIPE_SHADER_GEOMETRY: 248 if (rctx->b.chip_class >= EVERGREEN) { 249 evergreen_update_gs_state(ctx, shader); 250 evergreen_update_vs_state(ctx, shader->gs_copy_shader); 251 } else { 252 r600_update_gs_state(ctx, shader); 253 r600_update_vs_state(ctx, shader->gs_copy_shader); 254 } 255 break; 256 case PIPE_SHADER_VERTEX: 257 export_shader = key.vs.as_es; 258 if (rctx->b.chip_class >= EVERGREEN) { 259 if (key.vs.as_ls) 260 evergreen_update_ls_state(ctx, shader); 261 else if (key.vs.as_es) 262 evergreen_update_es_state(ctx, shader); 263 else 264 evergreen_update_vs_state(ctx, shader); 265 } else { 266 if (export_shader) 267 r600_update_es_state(ctx, shader); 268 else 269 r600_update_vs_state(ctx, shader); 270 } 271 break; 272 case PIPE_SHADER_FRAGMENT: 273 if (rctx->b.chip_class >= EVERGREEN) { 274 evergreen_update_ps_state(ctx, shader); 275 } else { 276 r600_update_ps_state(ctx, shader); 277 } 278 break; 279 default: 280 r = -EINVAL; 281 goto error; 282 } 283 return 0; 284 285 error: 286 r600_pipe_shader_destroy(ctx, shader); 287 return r; 288 } 289 290 void r600_pipe_shader_destroy(struct pipe_context *ctx, struct r600_pipe_shader *shader) 291 { 292 r600_resource_reference(&shader->bo, NULL); 293 r600_bytecode_clear(&shader->shader.bc); 294 r600_release_command_buffer(&shader->command_buffer); 295 } 296 297 /* 298 * tgsi -> r600 shader 299 */ 300 struct r600_shader_tgsi_instruction; 301 302 struct r600_shader_src { 303 unsigned sel; 304 unsigned swizzle[4]; 305 unsigned neg; 306 unsigned abs; 307 unsigned rel; 308 unsigned kc_bank; 309 boolean kc_rel; /* true if cache bank is indexed */ 310 uint32_t value[4]; 311 }; 312 313 struct eg_interp { 314 boolean enabled; 315 unsigned ij_index; 316 }; 317 318 struct r600_shader_ctx { 319 struct tgsi_shader_info info; 320 struct tgsi_parse_context parse; 321 const struct tgsi_token *tokens; 322 unsigned type; 323 unsigned file_offset[TGSI_FILE_COUNT]; 324 unsigned temp_reg; 325 const struct r600_shader_tgsi_instruction *inst_info; 326 struct r600_bytecode *bc; 327 struct r600_shader *shader; 328 struct r600_shader_src src[4]; 329 uint32_t *literals; 330 uint32_t nliterals; 331 uint32_t max_driver_temp_used; 332 /* needed for evergreen interpolation */ 333 struct eg_interp eg_interpolators[6]; // indexed by Persp/Linear * 3 + sample/center/centroid 334 /* evergreen/cayman also store sample mask in face register */ 335 int face_gpr; 336 /* sample id is .w component stored in fixed point position register */ 337 int fixed_pt_position_gpr; 338 int colors_used; 339 boolean clip_vertex_write; 340 unsigned cv_output; 341 unsigned edgeflag_output; 342 int fragcoord_input; 343 int native_integers; 344 int next_ring_offset; 345 int gs_out_ring_offset; 346 int gs_next_vertex; 347 struct r600_shader *gs_for_vs; 348 int gs_export_gpr_tregs[4]; 349 const struct pipe_stream_output_info *gs_stream_output_info; 350 unsigned enabled_stream_buffers_mask; 351 unsigned tess_input_info; /* temp with tess input offsets */ 352 unsigned tess_output_info; /* temp with tess input offsets */ 353 }; 354 355 struct r600_shader_tgsi_instruction { 356 unsigned op; 357 int (*process)(struct r600_shader_ctx *ctx); 358 }; 359 360 static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind); 361 static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[], eg_shader_tgsi_instruction[], cm_shader_tgsi_instruction[]; 362 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx); 363 static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason); 364 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type); 365 static int tgsi_else(struct r600_shader_ctx *ctx); 366 static int tgsi_endif(struct r600_shader_ctx *ctx); 367 static int tgsi_bgnloop(struct r600_shader_ctx *ctx); 368 static int tgsi_endloop(struct r600_shader_ctx *ctx); 369 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx); 370 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, 371 unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan, 372 unsigned int dst_reg); 373 static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 374 const struct r600_shader_src *shader_src, 375 unsigned chan); 376 static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg, 377 unsigned dst_reg); 378 379 static int tgsi_last_instruction(unsigned writemask) 380 { 381 int i, lasti = 0; 382 383 for (i = 0; i < 4; i++) { 384 if (writemask & (1 << i)) { 385 lasti = i; 386 } 387 } 388 return lasti; 389 } 390 391 static int tgsi_is_supported(struct r600_shader_ctx *ctx) 392 { 393 struct tgsi_full_instruction *i = &ctx->parse.FullToken.FullInstruction; 394 unsigned j; 395 396 if (i->Instruction.NumDstRegs > 1 && i->Instruction.Opcode != TGSI_OPCODE_DFRACEXP) { 397 R600_ERR("too many dst (%d)\n", i->Instruction.NumDstRegs); 398 return -EINVAL; 399 } 400 if (i->Instruction.Predicate) { 401 R600_ERR("predicate unsupported\n"); 402 return -EINVAL; 403 } 404 #if 0 405 if (i->Instruction.Label) { 406 R600_ERR("label unsupported\n"); 407 return -EINVAL; 408 } 409 #endif 410 for (j = 0; j < i->Instruction.NumSrcRegs; j++) { 411 if (i->Src[j].Register.Dimension) { 412 switch (i->Src[j].Register.File) { 413 case TGSI_FILE_CONSTANT: 414 break; 415 case TGSI_FILE_INPUT: 416 if (ctx->type == PIPE_SHADER_GEOMETRY || 417 ctx->type == PIPE_SHADER_TESS_CTRL || 418 ctx->type == PIPE_SHADER_TESS_EVAL) 419 break; 420 case TGSI_FILE_OUTPUT: 421 if (ctx->type == PIPE_SHADER_TESS_CTRL) 422 break; 423 default: 424 R600_ERR("unsupported src %d (file %d, dimension %d)\n", j, 425 i->Src[j].Register.File, 426 i->Src[j].Register.Dimension); 427 return -EINVAL; 428 } 429 } 430 } 431 for (j = 0; j < i->Instruction.NumDstRegs; j++) { 432 if (i->Dst[j].Register.Dimension) { 433 if (ctx->type == PIPE_SHADER_TESS_CTRL) 434 continue; 435 R600_ERR("unsupported dst (dimension)\n"); 436 return -EINVAL; 437 } 438 } 439 return 0; 440 } 441 442 int eg_get_interpolator_index(unsigned interpolate, unsigned location) 443 { 444 if (interpolate == TGSI_INTERPOLATE_COLOR || 445 interpolate == TGSI_INTERPOLATE_LINEAR || 446 interpolate == TGSI_INTERPOLATE_PERSPECTIVE) 447 { 448 int is_linear = interpolate == TGSI_INTERPOLATE_LINEAR; 449 int loc; 450 451 switch(location) { 452 case TGSI_INTERPOLATE_LOC_CENTER: 453 loc = 1; 454 break; 455 case TGSI_INTERPOLATE_LOC_CENTROID: 456 loc = 2; 457 break; 458 case TGSI_INTERPOLATE_LOC_SAMPLE: 459 default: 460 loc = 0; break; 461 } 462 463 return is_linear * 3 + loc; 464 } 465 466 return -1; 467 } 468 469 static void evergreen_interp_assign_ij_index(struct r600_shader_ctx *ctx, 470 int input) 471 { 472 int i = eg_get_interpolator_index( 473 ctx->shader->input[input].interpolate, 474 ctx->shader->input[input].interpolate_location); 475 assert(i >= 0); 476 ctx->shader->input[input].ij_index = ctx->eg_interpolators[i].ij_index; 477 } 478 479 static int evergreen_interp_alu(struct r600_shader_ctx *ctx, int input) 480 { 481 int i, r; 482 struct r600_bytecode_alu alu; 483 int gpr = 0, base_chan = 0; 484 int ij_index = ctx->shader->input[input].ij_index; 485 486 /* work out gpr and base_chan from index */ 487 gpr = ij_index / 2; 488 base_chan = (2 * (ij_index % 2)) + 1; 489 490 for (i = 0; i < 8; i++) { 491 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 492 493 if (i < 4) 494 alu.op = ALU_OP2_INTERP_ZW; 495 else 496 alu.op = ALU_OP2_INTERP_XY; 497 498 if ((i > 1) && (i < 6)) { 499 alu.dst.sel = ctx->shader->input[input].gpr; 500 alu.dst.write = 1; 501 } 502 503 alu.dst.chan = i % 4; 504 505 alu.src[0].sel = gpr; 506 alu.src[0].chan = (base_chan - (i % 2)); 507 508 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 509 510 alu.bank_swizzle_force = SQ_ALU_VEC_210; 511 if ((i % 4) == 3) 512 alu.last = 1; 513 r = r600_bytecode_add_alu(ctx->bc, &alu); 514 if (r) 515 return r; 516 } 517 return 0; 518 } 519 520 static int evergreen_interp_flat(struct r600_shader_ctx *ctx, int input) 521 { 522 int i, r; 523 struct r600_bytecode_alu alu; 524 525 for (i = 0; i < 4; i++) { 526 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 527 528 alu.op = ALU_OP1_INTERP_LOAD_P0; 529 530 alu.dst.sel = ctx->shader->input[input].gpr; 531 alu.dst.write = 1; 532 533 alu.dst.chan = i; 534 535 alu.src[0].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 536 alu.src[0].chan = i; 537 538 if (i == 3) 539 alu.last = 1; 540 r = r600_bytecode_add_alu(ctx->bc, &alu); 541 if (r) 542 return r; 543 } 544 return 0; 545 } 546 547 /* 548 * Special export handling in shaders 549 * 550 * shader export ARRAY_BASE for EXPORT_POS: 551 * 60 is position 552 * 61 is misc vector 553 * 62, 63 are clip distance vectors 554 * 555 * The use of the values exported in 61-63 are controlled by PA_CL_VS_OUT_CNTL: 556 * VS_OUT_MISC_VEC_ENA - enables the use of all fields in export 61 557 * USE_VTX_POINT_SIZE - point size in the X channel of export 61 558 * USE_VTX_EDGE_FLAG - edge flag in the Y channel of export 61 559 * USE_VTX_RENDER_TARGET_INDX - render target index in the Z channel of export 61 560 * USE_VTX_VIEWPORT_INDX - viewport index in the W channel of export 61 561 * USE_VTX_KILL_FLAG - kill flag in the Z channel of export 61 (mutually 562 * exclusive from render target index) 563 * VS_OUT_CCDIST0_VEC_ENA/VS_OUT_CCDIST1_VEC_ENA - enable clip distance vectors 564 * 565 * 566 * shader export ARRAY_BASE for EXPORT_PIXEL: 567 * 0-7 CB targets 568 * 61 computed Z vector 569 * 570 * The use of the values exported in the computed Z vector are controlled 571 * by DB_SHADER_CONTROL: 572 * Z_EXPORT_ENABLE - Z as a float in RED 573 * STENCIL_REF_EXPORT_ENABLE - stencil ref as int in GREEN 574 * COVERAGE_TO_MASK_ENABLE - alpha to mask in ALPHA 575 * MASK_EXPORT_ENABLE - pixel sample mask in BLUE 576 * DB_SOURCE_FORMAT - export control restrictions 577 * 578 */ 579 580 581 /* Map name/sid pair from tgsi to the 8-bit semantic index for SPI setup */ 582 static int r600_spi_sid(struct r600_shader_io * io) 583 { 584 int index, name = io->name; 585 586 /* These params are handled differently, they don't need 587 * semantic indices, so we'll use 0 for them. 588 */ 589 if (name == TGSI_SEMANTIC_POSITION || 590 name == TGSI_SEMANTIC_PSIZE || 591 name == TGSI_SEMANTIC_EDGEFLAG || 592 name == TGSI_SEMANTIC_FACE || 593 name == TGSI_SEMANTIC_SAMPLEMASK) 594 index = 0; 595 else { 596 if (name == TGSI_SEMANTIC_GENERIC) { 597 /* For generic params simply use sid from tgsi */ 598 index = io->sid; 599 } else { 600 /* For non-generic params - pack name and sid into 8 bits */ 601 index = 0x80 | (name<<3) | (io->sid); 602 } 603 604 /* Make sure that all really used indices have nonzero value, so 605 * we can just compare it to 0 later instead of comparing the name 606 * with different values to detect special cases. */ 607 index++; 608 } 609 610 return index; 611 }; 612 613 /* we need this to get a common lds index for vs/tcs/tes input/outputs */ 614 int r600_get_lds_unique_index(unsigned semantic_name, unsigned index) 615 { 616 switch (semantic_name) { 617 case TGSI_SEMANTIC_POSITION: 618 return 0; 619 case TGSI_SEMANTIC_PSIZE: 620 return 1; 621 case TGSI_SEMANTIC_CLIPDIST: 622 assert(index <= 1); 623 return 2 + index; 624 case TGSI_SEMANTIC_GENERIC: 625 if (index <= 63-4) 626 return 4 + index - 9; 627 else 628 /* same explanation as in the default statement, 629 * the only user hitting this is st/nine. 630 */ 631 return 0; 632 633 /* patch indices are completely separate and thus start from 0 */ 634 case TGSI_SEMANTIC_TESSOUTER: 635 return 0; 636 case TGSI_SEMANTIC_TESSINNER: 637 return 1; 638 case TGSI_SEMANTIC_PATCH: 639 return 2 + index; 640 641 default: 642 /* Don't fail here. The result of this function is only used 643 * for LS, TCS, TES, and GS, where legacy GL semantics can't 644 * occur, but this function is called for all vertex shaders 645 * before it's known whether LS will be compiled or not. 646 */ 647 return 0; 648 } 649 } 650 651 /* turn input into interpolate on EG */ 652 static int evergreen_interp_input(struct r600_shader_ctx *ctx, int index) 653 { 654 int r = 0; 655 656 if (ctx->shader->input[index].spi_sid) { 657 ctx->shader->input[index].lds_pos = ctx->shader->nlds++; 658 if (ctx->shader->input[index].interpolate > 0) { 659 evergreen_interp_assign_ij_index(ctx, index); 660 r = evergreen_interp_alu(ctx, index); 661 } else { 662 r = evergreen_interp_flat(ctx, index); 663 } 664 } 665 return r; 666 } 667 668 static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back) 669 { 670 struct r600_bytecode_alu alu; 671 int i, r; 672 int gpr_front = ctx->shader->input[front].gpr; 673 int gpr_back = ctx->shader->input[back].gpr; 674 675 for (i = 0; i < 4; i++) { 676 memset(&alu, 0, sizeof(alu)); 677 alu.op = ALU_OP3_CNDGT; 678 alu.is_op3 = 1; 679 alu.dst.write = 1; 680 alu.dst.sel = gpr_front; 681 alu.src[0].sel = ctx->face_gpr; 682 alu.src[1].sel = gpr_front; 683 alu.src[2].sel = gpr_back; 684 685 alu.dst.chan = i; 686 alu.src[1].chan = i; 687 alu.src[2].chan = i; 688 alu.last = (i==3); 689 690 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 691 return r; 692 } 693 694 return 0; 695 } 696 697 /* execute a single slot ALU calculation */ 698 static int single_alu_op2(struct r600_shader_ctx *ctx, int op, 699 int dst_sel, int dst_chan, 700 int src0_sel, unsigned src0_chan_val, 701 int src1_sel, unsigned src1_chan_val) 702 { 703 struct r600_bytecode_alu alu; 704 int r, i; 705 706 if (ctx->bc->chip_class == CAYMAN && op == ALU_OP2_MULLO_INT) { 707 for (i = 0; i < 4; i++) { 708 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 709 alu.op = op; 710 alu.src[0].sel = src0_sel; 711 if (src0_sel == V_SQ_ALU_SRC_LITERAL) 712 alu.src[0].value = src0_chan_val; 713 else 714 alu.src[0].chan = src0_chan_val; 715 alu.src[1].sel = src1_sel; 716 if (src1_sel == V_SQ_ALU_SRC_LITERAL) 717 alu.src[1].value = src1_chan_val; 718 else 719 alu.src[1].chan = src1_chan_val; 720 alu.dst.sel = dst_sel; 721 alu.dst.chan = i; 722 alu.dst.write = i == dst_chan; 723 alu.last = (i == 3); 724 r = r600_bytecode_add_alu(ctx->bc, &alu); 725 if (r) 726 return r; 727 } 728 return 0; 729 } 730 731 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 732 alu.op = op; 733 alu.src[0].sel = src0_sel; 734 if (src0_sel == V_SQ_ALU_SRC_LITERAL) 735 alu.src[0].value = src0_chan_val; 736 else 737 alu.src[0].chan = src0_chan_val; 738 alu.src[1].sel = src1_sel; 739 if (src1_sel == V_SQ_ALU_SRC_LITERAL) 740 alu.src[1].value = src1_chan_val; 741 else 742 alu.src[1].chan = src1_chan_val; 743 alu.dst.sel = dst_sel; 744 alu.dst.chan = dst_chan; 745 alu.dst.write = 1; 746 alu.last = 1; 747 r = r600_bytecode_add_alu(ctx->bc, &alu); 748 if (r) 749 return r; 750 return 0; 751 } 752 753 /* execute a single slot ALU calculation */ 754 static int single_alu_op3(struct r600_shader_ctx *ctx, int op, 755 int dst_sel, int dst_chan, 756 int src0_sel, unsigned src0_chan_val, 757 int src1_sel, unsigned src1_chan_val, 758 int src2_sel, unsigned src2_chan_val) 759 { 760 struct r600_bytecode_alu alu; 761 int r; 762 763 /* validate this for other ops */ 764 assert(op == ALU_OP3_MULADD_UINT24); 765 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 766 alu.op = op; 767 alu.src[0].sel = src0_sel; 768 if (src0_sel == V_SQ_ALU_SRC_LITERAL) 769 alu.src[0].value = src0_chan_val; 770 else 771 alu.src[0].chan = src0_chan_val; 772 alu.src[1].sel = src1_sel; 773 if (src1_sel == V_SQ_ALU_SRC_LITERAL) 774 alu.src[1].value = src1_chan_val; 775 else 776 alu.src[1].chan = src1_chan_val; 777 alu.src[2].sel = src2_sel; 778 if (src2_sel == V_SQ_ALU_SRC_LITERAL) 779 alu.src[2].value = src2_chan_val; 780 else 781 alu.src[2].chan = src2_chan_val; 782 alu.dst.sel = dst_sel; 783 alu.dst.chan = dst_chan; 784 alu.is_op3 = 1; 785 alu.last = 1; 786 r = r600_bytecode_add_alu(ctx->bc, &alu); 787 if (r) 788 return r; 789 return 0; 790 } 791 792 /* put it in temp_reg.x */ 793 static int get_lds_offset0(struct r600_shader_ctx *ctx, 794 int rel_patch_chan, 795 int temp_reg, bool is_patch_var) 796 { 797 int r; 798 799 /* MUL temp.x, patch_stride (input_vals.x), rel_patch_id (r0.y (tcs)) */ 800 /* ADD 801 Dimension - patch0_offset (input_vals.z), 802 Non-dim - patch0_data_offset (input_vals.w) 803 */ 804 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 805 temp_reg, 0, 806 ctx->tess_output_info, 0, 807 0, rel_patch_chan, 808 ctx->tess_output_info, is_patch_var ? 3 : 2); 809 if (r) 810 return r; 811 return 0; 812 } 813 814 static inline int get_address_file_reg(struct r600_shader_ctx *ctx, int index) 815 { 816 return index > 0 ? ctx->bc->index_reg[index - 1] : ctx->bc->ar_reg; 817 } 818 819 static int r600_get_temp(struct r600_shader_ctx *ctx) 820 { 821 return ctx->temp_reg + ctx->max_driver_temp_used++; 822 } 823 824 static int vs_add_primid_output(struct r600_shader_ctx *ctx, int prim_id_sid) 825 { 826 int i; 827 i = ctx->shader->noutput++; 828 ctx->shader->output[i].name = TGSI_SEMANTIC_PRIMID; 829 ctx->shader->output[i].sid = 0; 830 ctx->shader->output[i].gpr = 0; 831 ctx->shader->output[i].interpolate = TGSI_INTERPOLATE_CONSTANT; 832 ctx->shader->output[i].write_mask = 0x4; 833 ctx->shader->output[i].spi_sid = prim_id_sid; 834 835 return 0; 836 } 837 838 static int tgsi_barrier(struct r600_shader_ctx *ctx) 839 { 840 struct r600_bytecode_alu alu; 841 int r; 842 843 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 844 alu.op = ctx->inst_info->op; 845 alu.last = 1; 846 847 r = r600_bytecode_add_alu(ctx->bc, &alu); 848 if (r) 849 return r; 850 return 0; 851 } 852 853 static int tgsi_declaration(struct r600_shader_ctx *ctx) 854 { 855 struct tgsi_full_declaration *d = &ctx->parse.FullToken.FullDeclaration; 856 int r, i, j, count = d->Range.Last - d->Range.First + 1; 857 858 switch (d->Declaration.File) { 859 case TGSI_FILE_INPUT: 860 for (j = 0; j < count; j++) { 861 i = ctx->shader->ninput + j; 862 assert(i < ARRAY_SIZE(ctx->shader->input)); 863 ctx->shader->input[i].name = d->Semantic.Name; 864 ctx->shader->input[i].sid = d->Semantic.Index + j; 865 ctx->shader->input[i].interpolate = d->Interp.Interpolate; 866 ctx->shader->input[i].interpolate_location = d->Interp.Location; 867 ctx->shader->input[i].gpr = ctx->file_offset[TGSI_FILE_INPUT] + d->Range.First + j; 868 if (ctx->type == PIPE_SHADER_FRAGMENT) { 869 ctx->shader->input[i].spi_sid = r600_spi_sid(&ctx->shader->input[i]); 870 switch (ctx->shader->input[i].name) { 871 case TGSI_SEMANTIC_FACE: 872 if (ctx->face_gpr != -1) 873 ctx->shader->input[i].gpr = ctx->face_gpr; /* already allocated by allocate_system_value_inputs */ 874 else 875 ctx->face_gpr = ctx->shader->input[i].gpr; 876 break; 877 case TGSI_SEMANTIC_COLOR: 878 ctx->colors_used++; 879 break; 880 case TGSI_SEMANTIC_POSITION: 881 ctx->fragcoord_input = i; 882 break; 883 case TGSI_SEMANTIC_PRIMID: 884 /* set this for now */ 885 ctx->shader->gs_prim_id_input = true; 886 ctx->shader->ps_prim_id_input = i; 887 break; 888 } 889 if (ctx->bc->chip_class >= EVERGREEN) { 890 if ((r = evergreen_interp_input(ctx, i))) 891 return r; 892 } 893 } else if (ctx->type == PIPE_SHADER_GEOMETRY) { 894 /* FIXME probably skip inputs if they aren't passed in the ring */ 895 ctx->shader->input[i].ring_offset = ctx->next_ring_offset; 896 ctx->next_ring_offset += 16; 897 if (ctx->shader->input[i].name == TGSI_SEMANTIC_PRIMID) 898 ctx->shader->gs_prim_id_input = true; 899 } 900 } 901 ctx->shader->ninput += count; 902 break; 903 case TGSI_FILE_OUTPUT: 904 for (j = 0; j < count; j++) { 905 i = ctx->shader->noutput + j; 906 assert(i < ARRAY_SIZE(ctx->shader->output)); 907 ctx->shader->output[i].name = d->Semantic.Name; 908 ctx->shader->output[i].sid = d->Semantic.Index + j; 909 ctx->shader->output[i].gpr = ctx->file_offset[TGSI_FILE_OUTPUT] + d->Range.First + j; 910 ctx->shader->output[i].interpolate = d->Interp.Interpolate; 911 ctx->shader->output[i].write_mask = d->Declaration.UsageMask; 912 if (ctx->type == PIPE_SHADER_VERTEX || 913 ctx->type == PIPE_SHADER_GEOMETRY || 914 ctx->type == PIPE_SHADER_TESS_EVAL) { 915 ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]); 916 switch (d->Semantic.Name) { 917 case TGSI_SEMANTIC_CLIPDIST: 918 ctx->shader->clip_dist_write |= d->Declaration.UsageMask << 919 ((d->Semantic.Index + j) << 2); 920 break; 921 case TGSI_SEMANTIC_PSIZE: 922 ctx->shader->vs_out_misc_write = 1; 923 ctx->shader->vs_out_point_size = 1; 924 break; 925 case TGSI_SEMANTIC_EDGEFLAG: 926 ctx->shader->vs_out_misc_write = 1; 927 ctx->shader->vs_out_edgeflag = 1; 928 ctx->edgeflag_output = i; 929 break; 930 case TGSI_SEMANTIC_VIEWPORT_INDEX: 931 ctx->shader->vs_out_misc_write = 1; 932 ctx->shader->vs_out_viewport = 1; 933 break; 934 case TGSI_SEMANTIC_LAYER: 935 ctx->shader->vs_out_misc_write = 1; 936 ctx->shader->vs_out_layer = 1; 937 break; 938 case TGSI_SEMANTIC_CLIPVERTEX: 939 ctx->clip_vertex_write = TRUE; 940 ctx->cv_output = i; 941 break; 942 } 943 if (ctx->type == PIPE_SHADER_GEOMETRY) { 944 ctx->gs_out_ring_offset += 16; 945 } 946 } else if (ctx->type == PIPE_SHADER_FRAGMENT) { 947 switch (d->Semantic.Name) { 948 case TGSI_SEMANTIC_COLOR: 949 ctx->shader->nr_ps_max_color_exports++; 950 break; 951 } 952 } 953 } 954 ctx->shader->noutput += count; 955 break; 956 case TGSI_FILE_TEMPORARY: 957 if (ctx->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) { 958 if (d->Array.ArrayID) { 959 r600_add_gpr_array(ctx->shader, 960 ctx->file_offset[TGSI_FILE_TEMPORARY] + 961 d->Range.First, 962 d->Range.Last - d->Range.First + 1, 0x0F); 963 } 964 } 965 break; 966 967 case TGSI_FILE_CONSTANT: 968 case TGSI_FILE_SAMPLER: 969 case TGSI_FILE_SAMPLER_VIEW: 970 case TGSI_FILE_ADDRESS: 971 break; 972 973 case TGSI_FILE_SYSTEM_VALUE: 974 if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK || 975 d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID || 976 d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) { 977 break; /* Already handled from allocate_system_value_inputs */ 978 } else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) { 979 if (!ctx->native_integers) { 980 struct r600_bytecode_alu alu; 981 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 982 983 alu.op = ALU_OP1_INT_TO_FLT; 984 alu.src[0].sel = 0; 985 alu.src[0].chan = 3; 986 987 alu.dst.sel = 0; 988 alu.dst.chan = 3; 989 alu.dst.write = 1; 990 alu.last = 1; 991 992 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 993 return r; 994 } 995 break; 996 } else if (d->Semantic.Name == TGSI_SEMANTIC_VERTEXID) 997 break; 998 else if (d->Semantic.Name == TGSI_SEMANTIC_INVOCATIONID) 999 break; 1000 else if (d->Semantic.Name == TGSI_SEMANTIC_TESSINNER || 1001 d->Semantic.Name == TGSI_SEMANTIC_TESSOUTER) { 1002 int param = r600_get_lds_unique_index(d->Semantic.Name, 0); 1003 int dreg = d->Semantic.Name == TGSI_SEMANTIC_TESSINNER ? 3 : 2; 1004 unsigned temp_reg = r600_get_temp(ctx); 1005 1006 r = get_lds_offset0(ctx, 2, temp_reg, true); 1007 if (r) 1008 return r; 1009 1010 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 1011 temp_reg, 0, 1012 temp_reg, 0, 1013 V_SQ_ALU_SRC_LITERAL, param * 16); 1014 if (r) 1015 return r; 1016 1017 do_lds_fetch_values(ctx, temp_reg, dreg); 1018 } 1019 else if (d->Semantic.Name == TGSI_SEMANTIC_TESSCOORD) { 1020 /* MOV r1.x, r0.x; 1021 MOV r1.y, r0.y; 1022 */ 1023 for (i = 0; i < 2; i++) { 1024 struct r600_bytecode_alu alu; 1025 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1026 alu.op = ALU_OP1_MOV; 1027 alu.src[0].sel = 0; 1028 alu.src[0].chan = 0 + i; 1029 alu.dst.sel = 1; 1030 alu.dst.chan = 0 + i; 1031 alu.dst.write = 1; 1032 alu.last = (i == 1) ? 1 : 0; 1033 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1034 return r; 1035 } 1036 /* ADD r1.z, 1.0f, -r0.x */ 1037 struct r600_bytecode_alu alu; 1038 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1039 alu.op = ALU_OP2_ADD; 1040 alu.src[0].sel = V_SQ_ALU_SRC_1; 1041 alu.src[1].sel = 1; 1042 alu.src[1].chan = 0; 1043 alu.src[1].neg = 1; 1044 alu.dst.sel = 1; 1045 alu.dst.chan = 2; 1046 alu.dst.write = 1; 1047 alu.last = 1; 1048 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1049 return r; 1050 1051 /* ADD r1.z, r1.z, -r1.y */ 1052 alu.op = ALU_OP2_ADD; 1053 alu.src[0].sel = 1; 1054 alu.src[0].chan = 2; 1055 alu.src[1].sel = 1; 1056 alu.src[1].chan = 1; 1057 alu.src[1].neg = 1; 1058 alu.dst.sel = 1; 1059 alu.dst.chan = 2; 1060 alu.dst.write = 1; 1061 alu.last = 1; 1062 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1063 return r; 1064 break; 1065 } 1066 break; 1067 default: 1068 R600_ERR("unsupported file %d declaration\n", d->Declaration.File); 1069 return -EINVAL; 1070 } 1071 return 0; 1072 } 1073 1074 static int allocate_system_value_inputs(struct r600_shader_ctx *ctx, int gpr_offset) 1075 { 1076 struct tgsi_parse_context parse; 1077 struct { 1078 boolean enabled; 1079 int *reg; 1080 unsigned name, alternate_name; 1081 } inputs[2] = { 1082 { false, &ctx->face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /* lives in Front Face GPR.z */ 1083 1084 { false, &ctx->fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID, TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in Fixed Point Position GPR.w */ 1085 }; 1086 int i, k, num_regs = 0; 1087 1088 if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) { 1089 return 0; 1090 } 1091 1092 /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */ 1093 while (!tgsi_parse_end_of_tokens(&parse)) { 1094 tgsi_parse_token(&parse); 1095 1096 if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { 1097 const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; 1098 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE || 1099 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 1100 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID) 1101 { 1102 int interpolate, location, k; 1103 1104 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 1105 location = TGSI_INTERPOLATE_LOC_CENTER; 1106 inputs[1].enabled = true; /* needs SAMPLEID */ 1107 } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) { 1108 location = TGSI_INTERPOLATE_LOC_CENTER; 1109 /* Needs sample positions, currently those are always available */ 1110 } else { 1111 location = TGSI_INTERPOLATE_LOC_CENTROID; 1112 } 1113 1114 interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index]; 1115 k = eg_get_interpolator_index(interpolate, location); 1116 ctx->eg_interpolators[k].enabled = true; 1117 } 1118 } else if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_DECLARATION) { 1119 struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration; 1120 if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 1121 for (k = 0; k < ARRAY_SIZE(inputs); k++) { 1122 if (d->Semantic.Name == inputs[k].name || 1123 d->Semantic.Name == inputs[k].alternate_name) { 1124 inputs[k].enabled = true; 1125 } 1126 } 1127 } 1128 } 1129 } 1130 1131 tgsi_parse_free(&parse); 1132 1133 for (i = 0; i < ARRAY_SIZE(inputs); i++) { 1134 boolean enabled = inputs[i].enabled; 1135 int *reg = inputs[i].reg; 1136 unsigned name = inputs[i].name; 1137 1138 if (enabled) { 1139 int gpr = gpr_offset + num_regs++; 1140 1141 // add to inputs, allocate a gpr 1142 k = ctx->shader->ninput ++; 1143 ctx->shader->input[k].name = name; 1144 ctx->shader->input[k].sid = 0; 1145 ctx->shader->input[k].interpolate = TGSI_INTERPOLATE_CONSTANT; 1146 ctx->shader->input[k].interpolate_location = TGSI_INTERPOLATE_LOC_CENTER; 1147 *reg = ctx->shader->input[k].gpr = gpr; 1148 } 1149 } 1150 1151 return gpr_offset + num_regs; 1152 } 1153 1154 /* 1155 * for evergreen we need to scan the shader to find the number of GPRs we need to 1156 * reserve for interpolation and system values 1157 * 1158 * we need to know if we are going to emit 1159 * any sample or centroid inputs 1160 * if perspective and linear are required 1161 */ 1162 static int evergreen_gpr_count(struct r600_shader_ctx *ctx) 1163 { 1164 unsigned i; 1165 int num_baryc; 1166 struct tgsi_parse_context parse; 1167 1168 memset(&ctx->eg_interpolators, 0, sizeof(ctx->eg_interpolators)); 1169 1170 for (i = 0; i < ctx->info.num_inputs; i++) { 1171 int k; 1172 /* skip position/face/mask/sampleid */ 1173 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_POSITION || 1174 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FACE || 1175 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEMASK || 1176 ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_SAMPLEID) 1177 continue; 1178 1179 k = eg_get_interpolator_index( 1180 ctx->info.input_interpolate[i], 1181 ctx->info.input_interpolate_loc[i]); 1182 if (k >= 0) 1183 ctx->eg_interpolators[k].enabled = TRUE; 1184 } 1185 1186 if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) { 1187 return 0; 1188 } 1189 1190 /* need to scan shader for system values and interpolateAtSample/Offset/Centroid */ 1191 while (!tgsi_parse_end_of_tokens(&parse)) { 1192 tgsi_parse_token(&parse); 1193 1194 if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) { 1195 const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction; 1196 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE || 1197 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 1198 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID) 1199 { 1200 int interpolate, location, k; 1201 1202 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 1203 location = TGSI_INTERPOLATE_LOC_CENTER; 1204 } else if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET) { 1205 location = TGSI_INTERPOLATE_LOC_CENTER; 1206 } else { 1207 location = TGSI_INTERPOLATE_LOC_CENTROID; 1208 } 1209 1210 interpolate = ctx->info.input_interpolate[inst->Src[0].Register.Index]; 1211 k = eg_get_interpolator_index(interpolate, location); 1212 ctx->eg_interpolators[k].enabled = true; 1213 } 1214 } 1215 } 1216 1217 tgsi_parse_free(&parse); 1218 1219 /* assign gpr to each interpolator according to priority */ 1220 num_baryc = 0; 1221 for (i = 0; i < ARRAY_SIZE(ctx->eg_interpolators); i++) { 1222 if (ctx->eg_interpolators[i].enabled) { 1223 ctx->eg_interpolators[i].ij_index = num_baryc; 1224 num_baryc ++; 1225 } 1226 } 1227 1228 /* XXX PULL MODEL and LINE STIPPLE */ 1229 1230 num_baryc = (num_baryc + 1) >> 1; 1231 return allocate_system_value_inputs(ctx, num_baryc); 1232 } 1233 1234 /* sample_id_sel == NULL means fetch for current sample */ 1235 static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_src *sample_id, int chan_sel) 1236 { 1237 struct r600_bytecode_vtx vtx; 1238 int r, t1; 1239 1240 assert(ctx->fixed_pt_position_gpr != -1); 1241 1242 t1 = r600_get_temp(ctx); 1243 1244 memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 1245 vtx.op = FETCH_OP_VFETCH; 1246 vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER; 1247 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1248 if (sample_id == NULL) { 1249 vtx.src_gpr = ctx->fixed_pt_position_gpr; // SAMPLEID is in .w; 1250 vtx.src_sel_x = 3; 1251 } 1252 else { 1253 struct r600_bytecode_alu alu; 1254 1255 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1256 alu.op = ALU_OP1_MOV; 1257 r600_bytecode_src(&alu.src[0], sample_id, chan_sel); 1258 alu.dst.sel = t1; 1259 alu.dst.write = 1; 1260 alu.last = 1; 1261 r = r600_bytecode_add_alu(ctx->bc, &alu); 1262 if (r) 1263 return r; 1264 1265 vtx.src_gpr = t1; 1266 vtx.src_sel_x = 0; 1267 } 1268 vtx.mega_fetch_count = 16; 1269 vtx.dst_gpr = t1; 1270 vtx.dst_sel_x = 0; 1271 vtx.dst_sel_y = 1; 1272 vtx.dst_sel_z = 2; 1273 vtx.dst_sel_w = 3; 1274 vtx.data_format = FMT_32_32_32_32_FLOAT; 1275 vtx.num_format_all = 2; 1276 vtx.format_comp_all = 1; 1277 vtx.use_const_fields = 0; 1278 vtx.offset = 1; // first element is size of buffer 1279 vtx.endian = r600_endian_swap(32); 1280 vtx.srf_mode_all = 1; /* SRF_MODE_NO_ZERO */ 1281 1282 r = r600_bytecode_add_vtx(ctx->bc, &vtx); 1283 if (r) 1284 return r; 1285 1286 return t1; 1287 } 1288 1289 static void tgsi_src(struct r600_shader_ctx *ctx, 1290 const struct tgsi_full_src_register *tgsi_src, 1291 struct r600_shader_src *r600_src) 1292 { 1293 memset(r600_src, 0, sizeof(*r600_src)); 1294 r600_src->swizzle[0] = tgsi_src->Register.SwizzleX; 1295 r600_src->swizzle[1] = tgsi_src->Register.SwizzleY; 1296 r600_src->swizzle[2] = tgsi_src->Register.SwizzleZ; 1297 r600_src->swizzle[3] = tgsi_src->Register.SwizzleW; 1298 r600_src->neg = tgsi_src->Register.Negate; 1299 r600_src->abs = tgsi_src->Register.Absolute; 1300 1301 if (tgsi_src->Register.File == TGSI_FILE_IMMEDIATE) { 1302 int index; 1303 if ((tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleY) && 1304 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleZ) && 1305 (tgsi_src->Register.SwizzleX == tgsi_src->Register.SwizzleW)) { 1306 1307 index = tgsi_src->Register.Index * 4 + tgsi_src->Register.SwizzleX; 1308 r600_bytecode_special_constants(ctx->literals[index], &r600_src->sel, &r600_src->neg, r600_src->abs); 1309 if (r600_src->sel != V_SQ_ALU_SRC_LITERAL) 1310 return; 1311 } 1312 index = tgsi_src->Register.Index; 1313 r600_src->sel = V_SQ_ALU_SRC_LITERAL; 1314 memcpy(r600_src->value, ctx->literals + index * 4, sizeof(r600_src->value)); 1315 } else if (tgsi_src->Register.File == TGSI_FILE_SYSTEM_VALUE) { 1316 if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEMASK) { 1317 r600_src->swizzle[0] = 2; // Z value 1318 r600_src->swizzle[1] = 2; 1319 r600_src->swizzle[2] = 2; 1320 r600_src->swizzle[3] = 2; 1321 r600_src->sel = ctx->face_gpr; 1322 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEID) { 1323 r600_src->swizzle[0] = 3; // W value 1324 r600_src->swizzle[1] = 3; 1325 r600_src->swizzle[2] = 3; 1326 r600_src->swizzle[3] = 3; 1327 r600_src->sel = ctx->fixed_pt_position_gpr; 1328 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_SAMPLEPOS) { 1329 r600_src->swizzle[0] = 0; 1330 r600_src->swizzle[1] = 1; 1331 r600_src->swizzle[2] = 4; 1332 r600_src->swizzle[3] = 4; 1333 r600_src->sel = load_sample_position(ctx, NULL, -1); 1334 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INSTANCEID) { 1335 r600_src->swizzle[0] = 3; 1336 r600_src->swizzle[1] = 3; 1337 r600_src->swizzle[2] = 3; 1338 r600_src->swizzle[3] = 3; 1339 r600_src->sel = 0; 1340 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTEXID) { 1341 r600_src->swizzle[0] = 0; 1342 r600_src->swizzle[1] = 0; 1343 r600_src->swizzle[2] = 0; 1344 r600_src->swizzle[3] = 0; 1345 r600_src->sel = 0; 1346 } else if (ctx->type != PIPE_SHADER_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) { 1347 r600_src->swizzle[0] = 3; 1348 r600_src->swizzle[1] = 3; 1349 r600_src->swizzle[2] = 3; 1350 r600_src->swizzle[3] = 3; 1351 r600_src->sel = 1; 1352 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_INVOCATIONID) { 1353 r600_src->swizzle[0] = 2; 1354 r600_src->swizzle[1] = 2; 1355 r600_src->swizzle[2] = 2; 1356 r600_src->swizzle[3] = 2; 1357 r600_src->sel = 0; 1358 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSCOORD) { 1359 r600_src->sel = 1; 1360 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSINNER) { 1361 r600_src->sel = 3; 1362 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_TESSOUTER) { 1363 r600_src->sel = 2; 1364 } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_VERTICESIN) { 1365 if (ctx->type == PIPE_SHADER_TESS_CTRL) { 1366 r600_src->sel = ctx->tess_input_info; 1367 r600_src->swizzle[0] = 2; 1368 r600_src->swizzle[1] = 2; 1369 r600_src->swizzle[2] = 2; 1370 r600_src->swizzle[3] = 2; 1371 } else { 1372 r600_src->sel = ctx->tess_input_info; 1373 r600_src->swizzle[0] = 3; 1374 r600_src->swizzle[1] = 3; 1375 r600_src->swizzle[2] = 3; 1376 r600_src->swizzle[3] = 3; 1377 } 1378 } else if (ctx->type == PIPE_SHADER_TESS_CTRL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) { 1379 r600_src->sel = 0; 1380 r600_src->swizzle[0] = 0; 1381 r600_src->swizzle[1] = 0; 1382 r600_src->swizzle[2] = 0; 1383 r600_src->swizzle[3] = 0; 1384 } else if (ctx->type == PIPE_SHADER_TESS_EVAL && ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_PRIMID) { 1385 r600_src->sel = 0; 1386 r600_src->swizzle[0] = 3; 1387 r600_src->swizzle[1] = 3; 1388 r600_src->swizzle[2] = 3; 1389 r600_src->swizzle[3] = 3; 1390 } 1391 } else { 1392 if (tgsi_src->Register.Indirect) 1393 r600_src->rel = V_SQ_REL_RELATIVE; 1394 r600_src->sel = tgsi_src->Register.Index; 1395 r600_src->sel += ctx->file_offset[tgsi_src->Register.File]; 1396 } 1397 if (tgsi_src->Register.File == TGSI_FILE_CONSTANT) { 1398 if (tgsi_src->Register.Dimension) { 1399 r600_src->kc_bank = tgsi_src->Dimension.Index; 1400 if (tgsi_src->Dimension.Indirect) { 1401 r600_src->kc_rel = 1; 1402 } 1403 } 1404 } 1405 } 1406 1407 static int tgsi_fetch_rel_const(struct r600_shader_ctx *ctx, 1408 unsigned int cb_idx, unsigned cb_rel, unsigned int offset, unsigned ar_chan, 1409 unsigned int dst_reg) 1410 { 1411 struct r600_bytecode_vtx vtx; 1412 unsigned int ar_reg; 1413 int r; 1414 1415 if (offset) { 1416 struct r600_bytecode_alu alu; 1417 1418 memset(&alu, 0, sizeof(alu)); 1419 1420 alu.op = ALU_OP2_ADD_INT; 1421 alu.src[0].sel = ctx->bc->ar_reg; 1422 alu.src[0].chan = ar_chan; 1423 1424 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 1425 alu.src[1].value = offset; 1426 1427 alu.dst.sel = dst_reg; 1428 alu.dst.chan = ar_chan; 1429 alu.dst.write = 1; 1430 alu.last = 1; 1431 1432 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 1433 return r; 1434 1435 ar_reg = dst_reg; 1436 } else { 1437 ar_reg = ctx->bc->ar_reg; 1438 } 1439 1440 memset(&vtx, 0, sizeof(vtx)); 1441 vtx.buffer_id = cb_idx; 1442 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1443 vtx.src_gpr = ar_reg; 1444 vtx.src_sel_x = ar_chan; 1445 vtx.mega_fetch_count = 16; 1446 vtx.dst_gpr = dst_reg; 1447 vtx.dst_sel_x = 0; /* SEL_X */ 1448 vtx.dst_sel_y = 1; /* SEL_Y */ 1449 vtx.dst_sel_z = 2; /* SEL_Z */ 1450 vtx.dst_sel_w = 3; /* SEL_W */ 1451 vtx.data_format = FMT_32_32_32_32_FLOAT; 1452 vtx.num_format_all = 2; /* NUM_FORMAT_SCALED */ 1453 vtx.format_comp_all = 1; /* FORMAT_COMP_SIGNED */ 1454 vtx.endian = r600_endian_swap(32); 1455 vtx.buffer_index_mode = cb_rel; // cb_rel ? V_SQ_CF_INDEX_0 : V_SQ_CF_INDEX_NONE; 1456 1457 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 1458 return r; 1459 1460 return 0; 1461 } 1462 1463 static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1464 { 1465 struct r600_bytecode_vtx vtx; 1466 int r; 1467 unsigned index = src->Register.Index; 1468 unsigned vtx_id = src->Dimension.Index; 1469 int offset_reg = vtx_id / 3; 1470 int offset_chan = vtx_id % 3; 1471 int t2 = 0; 1472 1473 /* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y, 1474 * R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */ 1475 1476 if (offset_reg == 0 && offset_chan == 2) 1477 offset_chan = 3; 1478 1479 if (src->Dimension.Indirect || src->Register.Indirect) 1480 t2 = r600_get_temp(ctx); 1481 1482 if (src->Dimension.Indirect) { 1483 int treg[3]; 1484 struct r600_bytecode_alu alu; 1485 int r, i; 1486 unsigned addr_reg; 1487 addr_reg = get_address_file_reg(ctx, src->DimIndirect.Index); 1488 if (src->DimIndirect.Index > 0) { 1489 r = single_alu_op2(ctx, ALU_OP1_MOV, 1490 ctx->bc->ar_reg, 0, 1491 addr_reg, 0, 1492 0, 0); 1493 if (r) 1494 return r; 1495 } 1496 /* 1497 we have to put the R0.x/y/w into Rt.x Rt+1.x Rt+2.x then index reg from Rt. 1498 at least this is what fglrx seems to do. */ 1499 for (i = 0; i < 3; i++) { 1500 treg[i] = r600_get_temp(ctx); 1501 } 1502 r600_add_gpr_array(ctx->shader, treg[0], 3, 0x0F); 1503 1504 for (i = 0; i < 3; i++) { 1505 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1506 alu.op = ALU_OP1_MOV; 1507 alu.src[0].sel = 0; 1508 alu.src[0].chan = i == 2 ? 3 : i; 1509 alu.dst.sel = treg[i]; 1510 alu.dst.chan = 0; 1511 alu.dst.write = 1; 1512 alu.last = 1; 1513 r = r600_bytecode_add_alu(ctx->bc, &alu); 1514 if (r) 1515 return r; 1516 } 1517 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1518 alu.op = ALU_OP1_MOV; 1519 alu.src[0].sel = treg[0]; 1520 alu.src[0].rel = 1; 1521 alu.dst.sel = t2; 1522 alu.dst.write = 1; 1523 alu.last = 1; 1524 r = r600_bytecode_add_alu(ctx->bc, &alu); 1525 if (r) 1526 return r; 1527 offset_reg = t2; 1528 offset_chan = 0; 1529 } 1530 1531 if (src->Register.Indirect) { 1532 int addr_reg; 1533 unsigned first = ctx->info.input_array_first[src->Indirect.ArrayID]; 1534 1535 addr_reg = get_address_file_reg(ctx, src->Indirect.Index); 1536 1537 /* pull the value from index_reg */ 1538 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 1539 t2, 1, 1540 addr_reg, 0, 1541 V_SQ_ALU_SRC_LITERAL, first); 1542 if (r) 1543 return r; 1544 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 1545 t2, 0, 1546 t2, 1, 1547 V_SQ_ALU_SRC_LITERAL, 4, 1548 offset_reg, offset_chan); 1549 if (r) 1550 return r; 1551 offset_reg = t2; 1552 offset_chan = 0; 1553 index = src->Register.Index - first; 1554 } 1555 1556 memset(&vtx, 0, sizeof(vtx)); 1557 vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 1558 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 1559 vtx.src_gpr = offset_reg; 1560 vtx.src_sel_x = offset_chan; 1561 vtx.offset = index * 16; /*bytes*/ 1562 vtx.mega_fetch_count = 16; 1563 vtx.dst_gpr = dst_reg; 1564 vtx.dst_sel_x = 0; /* SEL_X */ 1565 vtx.dst_sel_y = 1; /* SEL_Y */ 1566 vtx.dst_sel_z = 2; /* SEL_Z */ 1567 vtx.dst_sel_w = 3; /* SEL_W */ 1568 if (ctx->bc->chip_class >= EVERGREEN) { 1569 vtx.use_const_fields = 1; 1570 } else { 1571 vtx.data_format = FMT_32_32_32_32_FLOAT; 1572 } 1573 1574 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 1575 return r; 1576 1577 return 0; 1578 } 1579 1580 static int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx) 1581 { 1582 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1583 unsigned i; 1584 1585 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1586 struct tgsi_full_src_register *src = &inst->Src[i]; 1587 1588 if (src->Register.File == TGSI_FILE_INPUT) { 1589 if (ctx->shader->input[src->Register.Index].name == TGSI_SEMANTIC_PRIMID) { 1590 /* primitive id is in R0.z */ 1591 ctx->src[i].sel = 0; 1592 ctx->src[i].swizzle[0] = 2; 1593 } 1594 } 1595 if (src->Register.File == TGSI_FILE_INPUT && src->Register.Dimension) { 1596 int treg = r600_get_temp(ctx); 1597 1598 fetch_gs_input(ctx, src, treg); 1599 ctx->src[i].sel = treg; 1600 ctx->src[i].rel = 0; 1601 } 1602 } 1603 return 0; 1604 } 1605 1606 1607 /* Tessellation shaders pass outputs to the next shader using LDS. 1608 * 1609 * LS outputs = TCS(HS) inputs 1610 * TCS(HS) outputs = TES(DS) inputs 1611 * 1612 * The LDS layout is: 1613 * - TCS inputs for patch 0 1614 * - TCS inputs for patch 1 1615 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2) 1616 * - ... 1617 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset 1618 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset 1619 * - TCS outputs for patch 1 1620 * - Per-patch TCS outputs for patch 1 1621 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2) 1622 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2) 1623 * - ... 1624 * 1625 * All three shaders VS(LS), TCS, TES share the same LDS space. 1626 */ 1627 /* this will return with the dw address in temp_reg.x */ 1628 static int r600_get_byte_address(struct r600_shader_ctx *ctx, int temp_reg, 1629 const struct tgsi_full_dst_register *dst, 1630 const struct tgsi_full_src_register *src, 1631 int stride_bytes_reg, int stride_bytes_chan) 1632 { 1633 struct tgsi_full_dst_register reg; 1634 ubyte *name, *index, *array_first; 1635 int r; 1636 int param; 1637 struct tgsi_shader_info *info = &ctx->info; 1638 /* Set the register description. The address computation is the same 1639 * for sources and destinations. */ 1640 if (src) { 1641 reg.Register.File = src->Register.File; 1642 reg.Register.Index = src->Register.Index; 1643 reg.Register.Indirect = src->Register.Indirect; 1644 reg.Register.Dimension = src->Register.Dimension; 1645 reg.Indirect = src->Indirect; 1646 reg.Dimension = src->Dimension; 1647 reg.DimIndirect = src->DimIndirect; 1648 } else 1649 reg = *dst; 1650 1651 /* If the register is 2-dimensional (e.g. an array of vertices 1652 * in a primitive), calculate the base address of the vertex. */ 1653 if (reg.Register.Dimension) { 1654 int sel, chan; 1655 if (reg.Dimension.Indirect) { 1656 unsigned addr_reg; 1657 assert (reg.DimIndirect.File == TGSI_FILE_ADDRESS); 1658 1659 addr_reg = get_address_file_reg(ctx, reg.DimIndirect.Index); 1660 /* pull the value from index_reg */ 1661 sel = addr_reg; 1662 chan = 0; 1663 } else { 1664 sel = V_SQ_ALU_SRC_LITERAL; 1665 chan = reg.Dimension.Index; 1666 } 1667 1668 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 1669 temp_reg, 0, 1670 stride_bytes_reg, stride_bytes_chan, 1671 sel, chan, 1672 temp_reg, 0); 1673 if (r) 1674 return r; 1675 } 1676 1677 if (reg.Register.File == TGSI_FILE_INPUT) { 1678 name = info->input_semantic_name; 1679 index = info->input_semantic_index; 1680 array_first = info->input_array_first; 1681 } else if (reg.Register.File == TGSI_FILE_OUTPUT) { 1682 name = info->output_semantic_name; 1683 index = info->output_semantic_index; 1684 array_first = info->output_array_first; 1685 } else { 1686 assert(0); 1687 return -1; 1688 } 1689 if (reg.Register.Indirect) { 1690 int addr_reg; 1691 int first; 1692 /* Add the relative address of the element. */ 1693 if (reg.Indirect.ArrayID) 1694 first = array_first[reg.Indirect.ArrayID]; 1695 else 1696 first = reg.Register.Index; 1697 1698 addr_reg = get_address_file_reg(ctx, reg.Indirect.Index); 1699 1700 /* pull the value from index_reg */ 1701 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 1702 temp_reg, 0, 1703 V_SQ_ALU_SRC_LITERAL, 16, 1704 addr_reg, 0, 1705 temp_reg, 0); 1706 if (r) 1707 return r; 1708 1709 param = r600_get_lds_unique_index(name[first], 1710 index[first]); 1711 1712 } else { 1713 param = r600_get_lds_unique_index(name[reg.Register.Index], 1714 index[reg.Register.Index]); 1715 } 1716 1717 /* add to base_addr - passed in temp_reg.x */ 1718 if (param) { 1719 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 1720 temp_reg, 0, 1721 temp_reg, 0, 1722 V_SQ_ALU_SRC_LITERAL, param * 16); 1723 if (r) 1724 return r; 1725 1726 } 1727 return 0; 1728 } 1729 1730 static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg, 1731 unsigned dst_reg) 1732 { 1733 struct r600_bytecode_alu alu; 1734 int r, i; 1735 1736 if ((ctx->bc->cf_last->ndw>>1) >= 0x60) 1737 ctx->bc->force_add_cf = 1; 1738 for (i = 1; i < 4; i++) { 1739 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 1740 temp_reg, i, 1741 temp_reg, 0, 1742 V_SQ_ALU_SRC_LITERAL, 4 * i); 1743 if (r) 1744 return r; 1745 } 1746 for (i = 0; i < 4; i++) { 1747 /* emit an LDS_READ_RET */ 1748 memset(&alu, 0, sizeof(alu)); 1749 alu.op = LDS_OP1_LDS_READ_RET; 1750 alu.src[0].sel = temp_reg; 1751 alu.src[0].chan = i; 1752 alu.src[1].sel = V_SQ_ALU_SRC_0; 1753 alu.src[2].sel = V_SQ_ALU_SRC_0; 1754 alu.dst.chan = 0; 1755 alu.is_lds_idx_op = true; 1756 alu.last = 1; 1757 r = r600_bytecode_add_alu(ctx->bc, &alu); 1758 if (r) 1759 return r; 1760 } 1761 for (i = 0; i < 4; i++) { 1762 /* then read from LDS_OQ_A_POP */ 1763 memset(&alu, 0, sizeof(alu)); 1764 1765 alu.op = ALU_OP1_MOV; 1766 alu.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP; 1767 alu.src[0].chan = 0; 1768 alu.dst.sel = dst_reg; 1769 alu.dst.chan = i; 1770 alu.dst.write = 1; 1771 alu.last = 1; 1772 r = r600_bytecode_add_alu(ctx->bc, &alu); 1773 if (r) 1774 return r; 1775 } 1776 return 0; 1777 } 1778 1779 static int fetch_tes_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1780 { 1781 int r; 1782 unsigned temp_reg = r600_get_temp(ctx); 1783 1784 r = get_lds_offset0(ctx, 2, temp_reg, 1785 src->Register.Dimension ? false : true); 1786 if (r) 1787 return r; 1788 1789 /* the base address is now in temp.x */ 1790 r = r600_get_byte_address(ctx, temp_reg, 1791 NULL, src, ctx->tess_output_info, 1); 1792 if (r) 1793 return r; 1794 1795 r = do_lds_fetch_values(ctx, temp_reg, dst_reg); 1796 if (r) 1797 return r; 1798 return 0; 1799 } 1800 1801 static int fetch_tcs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1802 { 1803 int r; 1804 unsigned temp_reg = r600_get_temp(ctx); 1805 1806 /* t.x = ips * r0.y */ 1807 r = single_alu_op2(ctx, ALU_OP2_MUL_UINT24, 1808 temp_reg, 0, 1809 ctx->tess_input_info, 0, 1810 0, 1); 1811 1812 if (r) 1813 return r; 1814 1815 /* the base address is now in temp.x */ 1816 r = r600_get_byte_address(ctx, temp_reg, 1817 NULL, src, ctx->tess_input_info, 1); 1818 if (r) 1819 return r; 1820 1821 r = do_lds_fetch_values(ctx, temp_reg, dst_reg); 1822 if (r) 1823 return r; 1824 return 0; 1825 } 1826 1827 static int fetch_tcs_output(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) 1828 { 1829 int r; 1830 unsigned temp_reg = r600_get_temp(ctx); 1831 1832 r = get_lds_offset0(ctx, 1, temp_reg, 1833 src->Register.Dimension ? false : true); 1834 if (r) 1835 return r; 1836 /* the base address is now in temp.x */ 1837 r = r600_get_byte_address(ctx, temp_reg, 1838 NULL, src, 1839 ctx->tess_output_info, 1); 1840 if (r) 1841 return r; 1842 1843 r = do_lds_fetch_values(ctx, temp_reg, dst_reg); 1844 if (r) 1845 return r; 1846 return 0; 1847 } 1848 1849 static int tgsi_split_lds_inputs(struct r600_shader_ctx *ctx) 1850 { 1851 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1852 unsigned i; 1853 1854 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1855 struct tgsi_full_src_register *src = &inst->Src[i]; 1856 1857 if (ctx->type == PIPE_SHADER_TESS_EVAL && src->Register.File == TGSI_FILE_INPUT) { 1858 int treg = r600_get_temp(ctx); 1859 fetch_tes_input(ctx, src, treg); 1860 ctx->src[i].sel = treg; 1861 ctx->src[i].rel = 0; 1862 } 1863 if (ctx->type == PIPE_SHADER_TESS_CTRL && src->Register.File == TGSI_FILE_INPUT) { 1864 int treg = r600_get_temp(ctx); 1865 fetch_tcs_input(ctx, src, treg); 1866 ctx->src[i].sel = treg; 1867 ctx->src[i].rel = 0; 1868 } 1869 if (ctx->type == PIPE_SHADER_TESS_CTRL && src->Register.File == TGSI_FILE_OUTPUT) { 1870 int treg = r600_get_temp(ctx); 1871 fetch_tcs_output(ctx, src, treg); 1872 ctx->src[i].sel = treg; 1873 ctx->src[i].rel = 0; 1874 } 1875 } 1876 return 0; 1877 } 1878 1879 static int tgsi_split_constant(struct r600_shader_ctx *ctx) 1880 { 1881 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1882 struct r600_bytecode_alu alu; 1883 int i, j, k, nconst, r; 1884 1885 for (i = 0, nconst = 0; i < inst->Instruction.NumSrcRegs; i++) { 1886 if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT) { 1887 nconst++; 1888 } 1889 tgsi_src(ctx, &inst->Src[i], &ctx->src[i]); 1890 } 1891 for (i = 0, j = nconst - 1; i < inst->Instruction.NumSrcRegs; i++) { 1892 if (inst->Src[i].Register.File != TGSI_FILE_CONSTANT) { 1893 continue; 1894 } 1895 1896 if (ctx->src[i].rel) { 1897 int chan = inst->Src[i].Indirect.Swizzle; 1898 int treg = r600_get_temp(ctx); 1899 if ((r = tgsi_fetch_rel_const(ctx, ctx->src[i].kc_bank, ctx->src[i].kc_rel, ctx->src[i].sel - 512, chan, treg))) 1900 return r; 1901 1902 ctx->src[i].kc_bank = 0; 1903 ctx->src[i].kc_rel = 0; 1904 ctx->src[i].sel = treg; 1905 ctx->src[i].rel = 0; 1906 j--; 1907 } else if (j > 0) { 1908 int treg = r600_get_temp(ctx); 1909 for (k = 0; k < 4; k++) { 1910 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1911 alu.op = ALU_OP1_MOV; 1912 alu.src[0].sel = ctx->src[i].sel; 1913 alu.src[0].chan = k; 1914 alu.src[0].rel = ctx->src[i].rel; 1915 alu.src[0].kc_bank = ctx->src[i].kc_bank; 1916 alu.src[0].kc_rel = ctx->src[i].kc_rel; 1917 alu.dst.sel = treg; 1918 alu.dst.chan = k; 1919 alu.dst.write = 1; 1920 if (k == 3) 1921 alu.last = 1; 1922 r = r600_bytecode_add_alu(ctx->bc, &alu); 1923 if (r) 1924 return r; 1925 } 1926 ctx->src[i].sel = treg; 1927 ctx->src[i].rel =0; 1928 j--; 1929 } 1930 } 1931 return 0; 1932 } 1933 1934 /* need to move any immediate into a temp - for trig functions which use literal for PI stuff */ 1935 static int tgsi_split_literal_constant(struct r600_shader_ctx *ctx) 1936 { 1937 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 1938 struct r600_bytecode_alu alu; 1939 int i, j, k, nliteral, r; 1940 1941 for (i = 0, nliteral = 0; i < inst->Instruction.NumSrcRegs; i++) { 1942 if (ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1943 nliteral++; 1944 } 1945 } 1946 for (i = 0, j = nliteral - 1; i < inst->Instruction.NumSrcRegs; i++) { 1947 if (j > 0 && ctx->src[i].sel == V_SQ_ALU_SRC_LITERAL) { 1948 int treg = r600_get_temp(ctx); 1949 for (k = 0; k < 4; k++) { 1950 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 1951 alu.op = ALU_OP1_MOV; 1952 alu.src[0].sel = ctx->src[i].sel; 1953 alu.src[0].chan = k; 1954 alu.src[0].value = ctx->src[i].value[k]; 1955 alu.dst.sel = treg; 1956 alu.dst.chan = k; 1957 alu.dst.write = 1; 1958 if (k == 3) 1959 alu.last = 1; 1960 r = r600_bytecode_add_alu(ctx->bc, &alu); 1961 if (r) 1962 return r; 1963 } 1964 ctx->src[i].sel = treg; 1965 j--; 1966 } 1967 } 1968 return 0; 1969 } 1970 1971 static int process_twoside_color_inputs(struct r600_shader_ctx *ctx) 1972 { 1973 int i, r, count = ctx->shader->ninput; 1974 1975 for (i = 0; i < count; i++) { 1976 if (ctx->shader->input[i].name == TGSI_SEMANTIC_COLOR) { 1977 r = select_twoside_color(ctx, i, ctx->shader->input[i].back_color_input); 1978 if (r) 1979 return r; 1980 } 1981 } 1982 return 0; 1983 } 1984 1985 static int emit_streamout(struct r600_shader_ctx *ctx, struct pipe_stream_output_info *so, 1986 int stream, unsigned *stream_item_size) 1987 { 1988 unsigned so_gpr[PIPE_MAX_SHADER_OUTPUTS]; 1989 unsigned start_comp[PIPE_MAX_SHADER_OUTPUTS]; 1990 int i, j, r; 1991 1992 /* Sanity checking. */ 1993 if (so->num_outputs > PIPE_MAX_SO_OUTPUTS) { 1994 R600_ERR("Too many stream outputs: %d\n", so->num_outputs); 1995 r = -EINVAL; 1996 goto out_err; 1997 } 1998 for (i = 0; i < so->num_outputs; i++) { 1999 if (so->output[i].output_buffer >= 4) { 2000 R600_ERR("Exceeded the max number of stream output buffers, got: %d\n", 2001 so->output[i].output_buffer); 2002 r = -EINVAL; 2003 goto out_err; 2004 } 2005 } 2006 2007 /* Initialize locations where the outputs are stored. */ 2008 for (i = 0; i < so->num_outputs; i++) { 2009 2010 so_gpr[i] = ctx->shader->output[so->output[i].register_index].gpr; 2011 start_comp[i] = so->output[i].start_component; 2012 /* Lower outputs with dst_offset < start_component. 2013 * 2014 * We can only output 4D vectors with a write mask, e.g. we can 2015 * only output the W component at offset 3, etc. If we want 2016 * to store Y, Z, or W at buffer offset 0, we need to use MOV 2017 * to move it to X and output X. */ 2018 if (so->output[i].dst_offset < so->output[i].start_component) { 2019 unsigned tmp = r600_get_temp(ctx); 2020 2021 for (j = 0; j < so->output[i].num_components; j++) { 2022 struct r600_bytecode_alu alu; 2023 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2024 alu.op = ALU_OP1_MOV; 2025 alu.src[0].sel = so_gpr[i]; 2026 alu.src[0].chan = so->output[i].start_component + j; 2027 2028 alu.dst.sel = tmp; 2029 alu.dst.chan = j; 2030 alu.dst.write = 1; 2031 if (j == so->output[i].num_components - 1) 2032 alu.last = 1; 2033 r = r600_bytecode_add_alu(ctx->bc, &alu); 2034 if (r) 2035 return r; 2036 } 2037 start_comp[i] = 0; 2038 so_gpr[i] = tmp; 2039 } 2040 } 2041 2042 /* Write outputs to buffers. */ 2043 for (i = 0; i < so->num_outputs; i++) { 2044 struct r600_bytecode_output output; 2045 2046 if (stream != -1 && stream != so->output[i].output_buffer) 2047 continue; 2048 2049 memset(&output, 0, sizeof(struct r600_bytecode_output)); 2050 output.gpr = so_gpr[i]; 2051 output.elem_size = so->output[i].num_components - 1; 2052 if (output.elem_size == 2) 2053 output.elem_size = 3; // 3 not supported, write 4 with junk at end 2054 output.array_base = so->output[i].dst_offset - start_comp[i]; 2055 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 2056 output.burst_count = 1; 2057 /* array_size is an upper limit for the burst_count 2058 * with MEM_STREAM instructions */ 2059 output.array_size = 0xFFF; 2060 output.comp_mask = ((1 << so->output[i].num_components) - 1) << start_comp[i]; 2061 2062 if (ctx->bc->chip_class >= EVERGREEN) { 2063 switch (so->output[i].output_buffer) { 2064 case 0: 2065 output.op = CF_OP_MEM_STREAM0_BUF0; 2066 break; 2067 case 1: 2068 output.op = CF_OP_MEM_STREAM0_BUF1; 2069 break; 2070 case 2: 2071 output.op = CF_OP_MEM_STREAM0_BUF2; 2072 break; 2073 case 3: 2074 output.op = CF_OP_MEM_STREAM0_BUF3; 2075 break; 2076 } 2077 output.op += so->output[i].stream * 4; 2078 assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3); 2079 ctx->enabled_stream_buffers_mask |= (1 << so->output[i].output_buffer) << so->output[i].stream * 4; 2080 } else { 2081 switch (so->output[i].output_buffer) { 2082 case 0: 2083 output.op = CF_OP_MEM_STREAM0; 2084 break; 2085 case 1: 2086 output.op = CF_OP_MEM_STREAM1; 2087 break; 2088 case 2: 2089 output.op = CF_OP_MEM_STREAM2; 2090 break; 2091 case 3: 2092 output.op = CF_OP_MEM_STREAM3; 2093 break; 2094 } 2095 ctx->enabled_stream_buffers_mask |= 1 << so->output[i].output_buffer; 2096 } 2097 r = r600_bytecode_add_output(ctx->bc, &output); 2098 if (r) 2099 goto out_err; 2100 } 2101 return 0; 2102 out_err: 2103 return r; 2104 } 2105 2106 static void convert_edgeflag_to_int(struct r600_shader_ctx *ctx) 2107 { 2108 struct r600_bytecode_alu alu; 2109 unsigned reg; 2110 2111 if (!ctx->shader->vs_out_edgeflag) 2112 return; 2113 2114 reg = ctx->shader->output[ctx->edgeflag_output].gpr; 2115 2116 /* clamp(x, 0, 1) */ 2117 memset(&alu, 0, sizeof(alu)); 2118 alu.op = ALU_OP1_MOV; 2119 alu.src[0].sel = reg; 2120 alu.dst.sel = reg; 2121 alu.dst.write = 1; 2122 alu.dst.clamp = 1; 2123 alu.last = 1; 2124 r600_bytecode_add_alu(ctx->bc, &alu); 2125 2126 memset(&alu, 0, sizeof(alu)); 2127 alu.op = ALU_OP1_FLT_TO_INT; 2128 alu.src[0].sel = reg; 2129 alu.dst.sel = reg; 2130 alu.dst.write = 1; 2131 alu.last = 1; 2132 r600_bytecode_add_alu(ctx->bc, &alu); 2133 } 2134 2135 static int generate_gs_copy_shader(struct r600_context *rctx, 2136 struct r600_pipe_shader *gs, 2137 struct pipe_stream_output_info *so) 2138 { 2139 struct r600_shader_ctx ctx = {}; 2140 struct r600_shader *gs_shader = &gs->shader; 2141 struct r600_pipe_shader *cshader; 2142 int ocnt = gs_shader->noutput; 2143 struct r600_bytecode_alu alu; 2144 struct r600_bytecode_vtx vtx; 2145 struct r600_bytecode_output output; 2146 struct r600_bytecode_cf *cf_jump, *cf_pop, 2147 *last_exp_pos = NULL, *last_exp_param = NULL; 2148 int i, j, next_clip_pos = 61, next_param = 0; 2149 int ring; 2150 bool only_ring_0 = true; 2151 cshader = calloc(1, sizeof(struct r600_pipe_shader)); 2152 if (!cshader) 2153 return 0; 2154 2155 memcpy(cshader->shader.output, gs_shader->output, ocnt * 2156 sizeof(struct r600_shader_io)); 2157 2158 cshader->shader.noutput = ocnt; 2159 2160 ctx.shader = &cshader->shader; 2161 ctx.bc = &ctx.shader->bc; 2162 ctx.type = ctx.bc->type = PIPE_SHADER_VERTEX; 2163 2164 r600_bytecode_init(ctx.bc, rctx->b.chip_class, rctx->b.family, 2165 rctx->screen->has_compressed_msaa_texturing); 2166 2167 ctx.bc->isa = rctx->isa; 2168 2169 cf_jump = NULL; 2170 memset(cshader->shader.ring_item_sizes, 0, sizeof(cshader->shader.ring_item_sizes)); 2171 2172 /* R0.x = R0.x & 0x3fffffff */ 2173 memset(&alu, 0, sizeof(alu)); 2174 alu.op = ALU_OP2_AND_INT; 2175 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2176 alu.src[1].value = 0x3fffffff; 2177 alu.dst.write = 1; 2178 r600_bytecode_add_alu(ctx.bc, &alu); 2179 2180 /* R0.y = R0.x >> 30 */ 2181 memset(&alu, 0, sizeof(alu)); 2182 alu.op = ALU_OP2_LSHR_INT; 2183 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2184 alu.src[1].value = 0x1e; 2185 alu.dst.chan = 1; 2186 alu.dst.write = 1; 2187 alu.last = 1; 2188 r600_bytecode_add_alu(ctx.bc, &alu); 2189 2190 /* fetch vertex data from GSVS ring */ 2191 for (i = 0; i < ocnt; ++i) { 2192 struct r600_shader_io *out = &ctx.shader->output[i]; 2193 2194 out->gpr = i + 1; 2195 out->ring_offset = i * 16; 2196 2197 memset(&vtx, 0, sizeof(vtx)); 2198 vtx.op = FETCH_OP_VFETCH; 2199 vtx.buffer_id = R600_GS_RING_CONST_BUFFER; 2200 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 2201 vtx.mega_fetch_count = 16; 2202 vtx.offset = out->ring_offset; 2203 vtx.dst_gpr = out->gpr; 2204 vtx.src_gpr = 0; 2205 vtx.dst_sel_x = 0; 2206 vtx.dst_sel_y = 1; 2207 vtx.dst_sel_z = 2; 2208 vtx.dst_sel_w = 3; 2209 if (rctx->b.chip_class >= EVERGREEN) { 2210 vtx.use_const_fields = 1; 2211 } else { 2212 vtx.data_format = FMT_32_32_32_32_FLOAT; 2213 } 2214 2215 r600_bytecode_add_vtx(ctx.bc, &vtx); 2216 } 2217 ctx.temp_reg = i + 1; 2218 for (ring = 3; ring >= 0; --ring) { 2219 bool enabled = false; 2220 for (i = 0; i < so->num_outputs; i++) { 2221 if (so->output[i].stream == ring) { 2222 enabled = true; 2223 if (ring > 0) 2224 only_ring_0 = false; 2225 break; 2226 } 2227 } 2228 if (ring != 0 && !enabled) { 2229 cshader->shader.ring_item_sizes[ring] = 0; 2230 continue; 2231 } 2232 2233 if (cf_jump) { 2234 // Patch up jump label 2235 r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); 2236 cf_pop = ctx.bc->cf_last; 2237 2238 cf_jump->cf_addr = cf_pop->id + 2; 2239 cf_jump->pop_count = 1; 2240 cf_pop->cf_addr = cf_pop->id + 2; 2241 cf_pop->pop_count = 1; 2242 } 2243 2244 /* PRED_SETE_INT __, R0.y, ring */ 2245 memset(&alu, 0, sizeof(alu)); 2246 alu.op = ALU_OP2_PRED_SETE_INT; 2247 alu.src[0].chan = 1; 2248 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2249 alu.src[1].value = ring; 2250 alu.execute_mask = 1; 2251 alu.update_pred = 1; 2252 alu.last = 1; 2253 r600_bytecode_add_alu_type(ctx.bc, &alu, CF_OP_ALU_PUSH_BEFORE); 2254 2255 r600_bytecode_add_cfinst(ctx.bc, CF_OP_JUMP); 2256 cf_jump = ctx.bc->cf_last; 2257 2258 if (enabled) 2259 emit_streamout(&ctx, so, only_ring_0 ? -1 : ring, &cshader->shader.ring_item_sizes[ring]); 2260 cshader->shader.ring_item_sizes[ring] = ocnt * 16; 2261 } 2262 2263 /* bc adds nops - copy it */ 2264 if (ctx.bc->chip_class == R600) { 2265 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2266 alu.op = ALU_OP0_NOP; 2267 alu.last = 1; 2268 r600_bytecode_add_alu(ctx.bc, &alu); 2269 2270 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 2271 } 2272 2273 /* export vertex data */ 2274 /* XXX factor out common code with r600_shader_from_tgsi ? */ 2275 for (i = 0; i < ocnt; ++i) { 2276 struct r600_shader_io *out = &ctx.shader->output[i]; 2277 bool instream0 = true; 2278 if (out->name == TGSI_SEMANTIC_CLIPVERTEX) 2279 continue; 2280 2281 for (j = 0; j < so->num_outputs; j++) { 2282 if (so->output[j].register_index == i) { 2283 if (so->output[j].stream == 0) 2284 break; 2285 if (so->output[j].stream > 0) 2286 instream0 = false; 2287 } 2288 } 2289 if (!instream0) 2290 continue; 2291 memset(&output, 0, sizeof(output)); 2292 output.gpr = out->gpr; 2293 output.elem_size = 3; 2294 output.swizzle_x = 0; 2295 output.swizzle_y = 1; 2296 output.swizzle_z = 2; 2297 output.swizzle_w = 3; 2298 output.burst_count = 1; 2299 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2300 output.op = CF_OP_EXPORT; 2301 switch (out->name) { 2302 case TGSI_SEMANTIC_POSITION: 2303 output.array_base = 60; 2304 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2305 break; 2306 2307 case TGSI_SEMANTIC_PSIZE: 2308 output.array_base = 61; 2309 if (next_clip_pos == 61) 2310 next_clip_pos = 62; 2311 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2312 output.swizzle_y = 7; 2313 output.swizzle_z = 7; 2314 output.swizzle_w = 7; 2315 ctx.shader->vs_out_misc_write = 1; 2316 ctx.shader->vs_out_point_size = 1; 2317 break; 2318 case TGSI_SEMANTIC_LAYER: 2319 if (out->spi_sid) { 2320 /* duplicate it as PARAM to pass to the pixel shader */ 2321 output.array_base = next_param++; 2322 r600_bytecode_add_output(ctx.bc, &output); 2323 last_exp_param = ctx.bc->cf_last; 2324 } 2325 output.array_base = 61; 2326 if (next_clip_pos == 61) 2327 next_clip_pos = 62; 2328 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2329 output.swizzle_x = 7; 2330 output.swizzle_y = 7; 2331 output.swizzle_z = 0; 2332 output.swizzle_w = 7; 2333 ctx.shader->vs_out_misc_write = 1; 2334 ctx.shader->vs_out_layer = 1; 2335 break; 2336 case TGSI_SEMANTIC_VIEWPORT_INDEX: 2337 if (out->spi_sid) { 2338 /* duplicate it as PARAM to pass to the pixel shader */ 2339 output.array_base = next_param++; 2340 r600_bytecode_add_output(ctx.bc, &output); 2341 last_exp_param = ctx.bc->cf_last; 2342 } 2343 output.array_base = 61; 2344 if (next_clip_pos == 61) 2345 next_clip_pos = 62; 2346 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2347 ctx.shader->vs_out_misc_write = 1; 2348 ctx.shader->vs_out_viewport = 1; 2349 output.swizzle_x = 7; 2350 output.swizzle_y = 7; 2351 output.swizzle_z = 7; 2352 output.swizzle_w = 0; 2353 break; 2354 case TGSI_SEMANTIC_CLIPDIST: 2355 /* spi_sid is 0 for clipdistance outputs that were generated 2356 * for clipvertex - we don't need to pass them to PS */ 2357 ctx.shader->clip_dist_write = gs->shader.clip_dist_write; 2358 if (out->spi_sid) { 2359 /* duplicate it as PARAM to pass to the pixel shader */ 2360 output.array_base = next_param++; 2361 r600_bytecode_add_output(ctx.bc, &output); 2362 last_exp_param = ctx.bc->cf_last; 2363 } 2364 output.array_base = next_clip_pos++; 2365 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2366 break; 2367 case TGSI_SEMANTIC_FOG: 2368 output.swizzle_y = 4; /* 0 */ 2369 output.swizzle_z = 4; /* 0 */ 2370 output.swizzle_w = 5; /* 1 */ 2371 break; 2372 default: 2373 output.array_base = next_param++; 2374 break; 2375 } 2376 r600_bytecode_add_output(ctx.bc, &output); 2377 if (output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM) 2378 last_exp_param = ctx.bc->cf_last; 2379 else 2380 last_exp_pos = ctx.bc->cf_last; 2381 } 2382 2383 if (!last_exp_pos) { 2384 memset(&output, 0, sizeof(output)); 2385 output.gpr = 0; 2386 output.elem_size = 3; 2387 output.swizzle_x = 7; 2388 output.swizzle_y = 7; 2389 output.swizzle_z = 7; 2390 output.swizzle_w = 7; 2391 output.burst_count = 1; 2392 output.type = 2; 2393 output.op = CF_OP_EXPORT; 2394 output.array_base = 60; 2395 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 2396 r600_bytecode_add_output(ctx.bc, &output); 2397 last_exp_pos = ctx.bc->cf_last; 2398 } 2399 2400 if (!last_exp_param) { 2401 memset(&output, 0, sizeof(output)); 2402 output.gpr = 0; 2403 output.elem_size = 3; 2404 output.swizzle_x = 7; 2405 output.swizzle_y = 7; 2406 output.swizzle_z = 7; 2407 output.swizzle_w = 7; 2408 output.burst_count = 1; 2409 output.type = 2; 2410 output.op = CF_OP_EXPORT; 2411 output.array_base = next_param++; 2412 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 2413 r600_bytecode_add_output(ctx.bc, &output); 2414 last_exp_param = ctx.bc->cf_last; 2415 } 2416 2417 last_exp_pos->op = CF_OP_EXPORT_DONE; 2418 last_exp_param->op = CF_OP_EXPORT_DONE; 2419 2420 r600_bytecode_add_cfinst(ctx.bc, CF_OP_POP); 2421 cf_pop = ctx.bc->cf_last; 2422 2423 cf_jump->cf_addr = cf_pop->id + 2; 2424 cf_jump->pop_count = 1; 2425 cf_pop->cf_addr = cf_pop->id + 2; 2426 cf_pop->pop_count = 1; 2427 2428 if (ctx.bc->chip_class == CAYMAN) 2429 cm_bytecode_add_cf_end(ctx.bc); 2430 else { 2431 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 2432 ctx.bc->cf_last->end_of_program = 1; 2433 } 2434 2435 gs->gs_copy_shader = cshader; 2436 cshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; 2437 2438 ctx.bc->nstack = 1; 2439 2440 return r600_bytecode_build(ctx.bc); 2441 } 2442 2443 static int emit_inc_ring_offset(struct r600_shader_ctx *ctx, int idx, bool ind) 2444 { 2445 if (ind) { 2446 struct r600_bytecode_alu alu; 2447 int r; 2448 2449 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2450 alu.op = ALU_OP2_ADD_INT; 2451 alu.src[0].sel = ctx->gs_export_gpr_tregs[idx]; 2452 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2453 alu.src[1].value = ctx->gs_out_ring_offset >> 4; 2454 alu.dst.sel = ctx->gs_export_gpr_tregs[idx]; 2455 alu.dst.write = 1; 2456 alu.last = 1; 2457 r = r600_bytecode_add_alu(ctx->bc, &alu); 2458 if (r) 2459 return r; 2460 } 2461 return 0; 2462 } 2463 2464 static int emit_gs_ring_writes(struct r600_shader_ctx *ctx, const struct pipe_stream_output_info *so, int stream, bool ind) 2465 { 2466 struct r600_bytecode_output output; 2467 int i, k, ring_offset; 2468 int effective_stream = stream == -1 ? 0 : stream; 2469 int idx = 0; 2470 2471 for (i = 0; i < ctx->shader->noutput; i++) { 2472 if (ctx->gs_for_vs) { 2473 /* for ES we need to lookup corresponding ring offset expected by GS 2474 * (map this output to GS input by name and sid) */ 2475 /* FIXME precompute offsets */ 2476 ring_offset = -1; 2477 for(k = 0; k < ctx->gs_for_vs->ninput; ++k) { 2478 struct r600_shader_io *in = &ctx->gs_for_vs->input[k]; 2479 struct r600_shader_io *out = &ctx->shader->output[i]; 2480 if (in->name == out->name && in->sid == out->sid) 2481 ring_offset = in->ring_offset; 2482 } 2483 2484 if (ring_offset == -1) 2485 continue; 2486 } else { 2487 ring_offset = idx * 16; 2488 idx++; 2489 } 2490 2491 if (stream > 0 && ctx->shader->output[i].name == TGSI_SEMANTIC_POSITION) 2492 continue; 2493 /* next_ring_offset after parsing input decls contains total size of 2494 * single vertex data, gs_next_vertex - current vertex index */ 2495 if (!ind) 2496 ring_offset += ctx->gs_out_ring_offset * ctx->gs_next_vertex; 2497 2498 memset(&output, 0, sizeof(struct r600_bytecode_output)); 2499 output.gpr = ctx->shader->output[i].gpr; 2500 output.elem_size = 3; 2501 output.comp_mask = 0xF; 2502 output.burst_count = 1; 2503 2504 if (ind) 2505 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND; 2506 else 2507 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE; 2508 2509 switch (stream) { 2510 default: 2511 case 0: 2512 output.op = CF_OP_MEM_RING; break; 2513 case 1: 2514 output.op = CF_OP_MEM_RING1; break; 2515 case 2: 2516 output.op = CF_OP_MEM_RING2; break; 2517 case 3: 2518 output.op = CF_OP_MEM_RING3; break; 2519 } 2520 2521 if (ind) { 2522 output.array_base = ring_offset >> 2; /* in dwords */ 2523 output.array_size = 0xfff; 2524 output.index_gpr = ctx->gs_export_gpr_tregs[effective_stream]; 2525 } else 2526 output.array_base = ring_offset >> 2; /* in dwords */ 2527 r600_bytecode_add_output(ctx->bc, &output); 2528 } 2529 2530 ++ctx->gs_next_vertex; 2531 return 0; 2532 } 2533 2534 2535 static int r600_fetch_tess_io_info(struct r600_shader_ctx *ctx) 2536 { 2537 int r; 2538 struct r600_bytecode_vtx vtx; 2539 int temp_val = ctx->temp_reg; 2540 /* need to store the TCS output somewhere */ 2541 r = single_alu_op2(ctx, ALU_OP1_MOV, 2542 temp_val, 0, 2543 V_SQ_ALU_SRC_LITERAL, 0, 2544 0, 0); 2545 if (r) 2546 return r; 2547 2548 /* used by VS/TCS */ 2549 if (ctx->tess_input_info) { 2550 /* fetch tcs input values into resv space */ 2551 memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 2552 vtx.op = FETCH_OP_VFETCH; 2553 vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER; 2554 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 2555 vtx.mega_fetch_count = 16; 2556 vtx.data_format = FMT_32_32_32_32; 2557 vtx.num_format_all = 2; 2558 vtx.format_comp_all = 1; 2559 vtx.use_const_fields = 0; 2560 vtx.endian = r600_endian_swap(32); 2561 vtx.srf_mode_all = 1; 2562 vtx.offset = 0; 2563 vtx.dst_gpr = ctx->tess_input_info; 2564 vtx.dst_sel_x = 0; 2565 vtx.dst_sel_y = 1; 2566 vtx.dst_sel_z = 2; 2567 vtx.dst_sel_w = 3; 2568 vtx.src_gpr = temp_val; 2569 vtx.src_sel_x = 0; 2570 2571 r = r600_bytecode_add_vtx(ctx->bc, &vtx); 2572 if (r) 2573 return r; 2574 } 2575 2576 /* used by TCS/TES */ 2577 if (ctx->tess_output_info) { 2578 /* fetch tcs output values into resv space */ 2579 memset(&vtx, 0, sizeof(struct r600_bytecode_vtx)); 2580 vtx.op = FETCH_OP_VFETCH; 2581 vtx.buffer_id = R600_LDS_INFO_CONST_BUFFER; 2582 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 2583 vtx.mega_fetch_count = 16; 2584 vtx.data_format = FMT_32_32_32_32; 2585 vtx.num_format_all = 2; 2586 vtx.format_comp_all = 1; 2587 vtx.use_const_fields = 0; 2588 vtx.endian = r600_endian_swap(32); 2589 vtx.srf_mode_all = 1; 2590 vtx.offset = 16; 2591 vtx.dst_gpr = ctx->tess_output_info; 2592 vtx.dst_sel_x = 0; 2593 vtx.dst_sel_y = 1; 2594 vtx.dst_sel_z = 2; 2595 vtx.dst_sel_w = 3; 2596 vtx.src_gpr = temp_val; 2597 vtx.src_sel_x = 0; 2598 2599 r = r600_bytecode_add_vtx(ctx->bc, &vtx); 2600 if (r) 2601 return r; 2602 } 2603 return 0; 2604 } 2605 2606 static int emit_lds_vs_writes(struct r600_shader_ctx *ctx) 2607 { 2608 int i, j, r; 2609 int temp_reg; 2610 2611 /* fetch tcs input values into input_vals */ 2612 ctx->tess_input_info = r600_get_temp(ctx); 2613 ctx->tess_output_info = 0; 2614 r = r600_fetch_tess_io_info(ctx); 2615 if (r) 2616 return r; 2617 2618 temp_reg = r600_get_temp(ctx); 2619 /* dst reg contains LDS address stride * idx */ 2620 /* MUL vertexID, vertex_dw_stride */ 2621 r = single_alu_op2(ctx, ALU_OP2_MUL_UINT24, 2622 temp_reg, 0, 2623 ctx->tess_input_info, 1, 2624 0, 1); /* rel id in r0.y? */ 2625 if (r) 2626 return r; 2627 2628 for (i = 0; i < ctx->shader->noutput; i++) { 2629 struct r600_bytecode_alu alu; 2630 int param = r600_get_lds_unique_index(ctx->shader->output[i].name, ctx->shader->output[i].sid); 2631 2632 if (param) { 2633 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2634 temp_reg, 1, 2635 temp_reg, 0, 2636 V_SQ_ALU_SRC_LITERAL, param * 16); 2637 if (r) 2638 return r; 2639 } 2640 2641 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2642 temp_reg, 2, 2643 temp_reg, param ? 1 : 0, 2644 V_SQ_ALU_SRC_LITERAL, 8); 2645 if (r) 2646 return r; 2647 2648 2649 for (j = 0; j < 2; j++) { 2650 int chan = (j == 1) ? 2 : (param ? 1 : 0); 2651 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2652 alu.op = LDS_OP3_LDS_WRITE_REL; 2653 alu.src[0].sel = temp_reg; 2654 alu.src[0].chan = chan; 2655 alu.src[1].sel = ctx->shader->output[i].gpr; 2656 alu.src[1].chan = j * 2; 2657 alu.src[2].sel = ctx->shader->output[i].gpr; 2658 alu.src[2].chan = (j * 2) + 1; 2659 alu.last = 1; 2660 alu.dst.chan = 0; 2661 alu.lds_idx = 1; 2662 alu.is_lds_idx_op = true; 2663 r = r600_bytecode_add_alu(ctx->bc, &alu); 2664 if (r) 2665 return r; 2666 } 2667 } 2668 return 0; 2669 } 2670 2671 static int r600_store_tcs_output(struct r600_shader_ctx *ctx) 2672 { 2673 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 2674 const struct tgsi_full_dst_register *dst = &inst->Dst[0]; 2675 int i, r, lasti; 2676 int temp_reg = r600_get_temp(ctx); 2677 struct r600_bytecode_alu alu; 2678 unsigned write_mask = dst->Register.WriteMask; 2679 2680 if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT) 2681 return 0; 2682 2683 r = get_lds_offset0(ctx, 1, temp_reg, dst->Register.Dimension ? false : true); 2684 if (r) 2685 return r; 2686 2687 /* the base address is now in temp.x */ 2688 r = r600_get_byte_address(ctx, temp_reg, 2689 &inst->Dst[0], NULL, ctx->tess_output_info, 1); 2690 if (r) 2691 return r; 2692 2693 /* LDS write */ 2694 lasti = tgsi_last_instruction(write_mask); 2695 for (i = 1; i <= lasti; i++) { 2696 2697 if (!(write_mask & (1 << i))) 2698 continue; 2699 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2700 temp_reg, i, 2701 temp_reg, 0, 2702 V_SQ_ALU_SRC_LITERAL, 4 * i); 2703 if (r) 2704 return r; 2705 } 2706 2707 for (i = 0; i <= lasti; i++) { 2708 if (!(write_mask & (1 << i))) 2709 continue; 2710 2711 if ((i == 0 && ((write_mask & 3) == 3)) || 2712 (i == 2 && ((write_mask & 0xc) == 0xc))) { 2713 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2714 alu.op = LDS_OP3_LDS_WRITE_REL; 2715 alu.src[0].sel = temp_reg; 2716 alu.src[0].chan = i; 2717 2718 alu.src[1].sel = dst->Register.Index; 2719 alu.src[1].sel += ctx->file_offset[dst->Register.File]; 2720 alu.src[1].chan = i; 2721 2722 alu.src[2].sel = dst->Register.Index; 2723 alu.src[2].sel += ctx->file_offset[dst->Register.File]; 2724 alu.src[2].chan = i + 1; 2725 alu.lds_idx = 1; 2726 alu.dst.chan = 0; 2727 alu.last = 1; 2728 alu.is_lds_idx_op = true; 2729 r = r600_bytecode_add_alu(ctx->bc, &alu); 2730 if (r) 2731 return r; 2732 i += 1; 2733 continue; 2734 } 2735 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 2736 alu.op = LDS_OP2_LDS_WRITE; 2737 alu.src[0].sel = temp_reg; 2738 alu.src[0].chan = i; 2739 2740 alu.src[1].sel = dst->Register.Index; 2741 alu.src[1].sel += ctx->file_offset[dst->Register.File]; 2742 alu.src[1].chan = i; 2743 2744 alu.src[2].sel = V_SQ_ALU_SRC_0; 2745 alu.dst.chan = 0; 2746 alu.last = 1; 2747 alu.is_lds_idx_op = true; 2748 r = r600_bytecode_add_alu(ctx->bc, &alu); 2749 if (r) 2750 return r; 2751 } 2752 return 0; 2753 } 2754 2755 static int r600_tess_factor_read(struct r600_shader_ctx *ctx, 2756 int output_idx) 2757 { 2758 int param; 2759 unsigned temp_reg = r600_get_temp(ctx); 2760 unsigned name = ctx->shader->output[output_idx].name; 2761 int dreg = ctx->shader->output[output_idx].gpr; 2762 int r; 2763 2764 param = r600_get_lds_unique_index(name, 0); 2765 r = get_lds_offset0(ctx, 1, temp_reg, true); 2766 if (r) 2767 return r; 2768 2769 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2770 temp_reg, 0, 2771 temp_reg, 0, 2772 V_SQ_ALU_SRC_LITERAL, param * 16); 2773 if (r) 2774 return r; 2775 2776 do_lds_fetch_values(ctx, temp_reg, dreg); 2777 return 0; 2778 } 2779 2780 static int r600_emit_tess_factor(struct r600_shader_ctx *ctx) 2781 { 2782 unsigned i; 2783 int stride, outer_comps, inner_comps; 2784 int tessinner_idx = -1, tessouter_idx = -1; 2785 int r; 2786 int temp_reg = r600_get_temp(ctx); 2787 int treg[3] = {-1, -1, -1}; 2788 struct r600_bytecode_alu alu; 2789 struct r600_bytecode_cf *cf_jump, *cf_pop; 2790 2791 /* only execute factor emission for invocation 0 */ 2792 /* PRED_SETE_INT __, R0.x, 0 */ 2793 memset(&alu, 0, sizeof(alu)); 2794 alu.op = ALU_OP2_PRED_SETE_INT; 2795 alu.src[0].chan = 2; 2796 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 2797 alu.execute_mask = 1; 2798 alu.update_pred = 1; 2799 alu.last = 1; 2800 r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_PUSH_BEFORE); 2801 2802 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); 2803 cf_jump = ctx->bc->cf_last; 2804 2805 treg[0] = r600_get_temp(ctx); 2806 switch (ctx->shader->tcs_prim_mode) { 2807 case PIPE_PRIM_LINES: 2808 stride = 8; /* 2 dwords, 1 vec2 store */ 2809 outer_comps = 2; 2810 inner_comps = 0; 2811 break; 2812 case PIPE_PRIM_TRIANGLES: 2813 stride = 16; /* 4 dwords, 1 vec4 store */ 2814 outer_comps = 3; 2815 inner_comps = 1; 2816 treg[1] = r600_get_temp(ctx); 2817 break; 2818 case PIPE_PRIM_QUADS: 2819 stride = 24; /* 6 dwords, 2 stores (vec4 + vec2) */ 2820 outer_comps = 4; 2821 inner_comps = 2; 2822 treg[1] = r600_get_temp(ctx); 2823 treg[2] = r600_get_temp(ctx); 2824 break; 2825 default: 2826 assert(0); 2827 return -1; 2828 } 2829 2830 /* R0 is InvocationID, RelPatchID, PatchID, tf_base */ 2831 /* TF_WRITE takes index in R.x, value in R.y */ 2832 for (i = 0; i < ctx->shader->noutput; i++) { 2833 if (ctx->shader->output[i].name == TGSI_SEMANTIC_TESSINNER) 2834 tessinner_idx = i; 2835 if (ctx->shader->output[i].name == TGSI_SEMANTIC_TESSOUTER) 2836 tessouter_idx = i; 2837 } 2838 2839 if (tessouter_idx == -1) 2840 return -1; 2841 2842 if (tessinner_idx == -1 && inner_comps) 2843 return -1; 2844 2845 if (tessouter_idx != -1) { 2846 r = r600_tess_factor_read(ctx, tessouter_idx); 2847 if (r) 2848 return r; 2849 } 2850 2851 if (tessinner_idx != -1) { 2852 r = r600_tess_factor_read(ctx, tessinner_idx); 2853 if (r) 2854 return r; 2855 } 2856 2857 /* r.x = tf_base(r0.w) + relpatchid(r0.y) * tf_stride */ 2858 /* r.x = relpatchid(r0.y) * tf_stride */ 2859 2860 /* multiply incoming r0.y * stride - t.x = r0.y * stride */ 2861 /* add incoming r0.w to it: t.x = t.x + r0.w */ 2862 r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24, 2863 temp_reg, 0, 2864 0, 1, 2865 V_SQ_ALU_SRC_LITERAL, stride, 2866 0, 3); 2867 if (r) 2868 return r; 2869 2870 for (i = 0; i < outer_comps + inner_comps; i++) { 2871 int out_idx = i >= outer_comps ? tessinner_idx : tessouter_idx; 2872 int out_comp = i >= outer_comps ? i - outer_comps : i; 2873 2874 r = single_alu_op2(ctx, ALU_OP2_ADD_INT, 2875 treg[i / 2], (2 * (i % 2)), 2876 temp_reg, 0, 2877 V_SQ_ALU_SRC_LITERAL, 4 * i); 2878 if (r) 2879 return r; 2880 r = single_alu_op2(ctx, ALU_OP1_MOV, 2881 treg[i / 2], 1 + (2 * (i%2)), 2882 ctx->shader->output[out_idx].gpr, out_comp, 2883 0, 0); 2884 if (r) 2885 return r; 2886 } 2887 for (i = 0; i < outer_comps + inner_comps; i++) { 2888 struct r600_bytecode_gds gds; 2889 2890 memset(&gds, 0, sizeof(struct r600_bytecode_gds)); 2891 gds.src_gpr = treg[i / 2]; 2892 gds.src_sel_x = 2 * (i % 2); 2893 gds.src_sel_y = 1 + (2 * (i % 2)); 2894 gds.src_sel_z = 4; 2895 gds.dst_sel_x = 7; 2896 gds.dst_sel_y = 7; 2897 gds.dst_sel_z = 7; 2898 gds.dst_sel_w = 7; 2899 gds.op = FETCH_OP_TF_WRITE; 2900 r = r600_bytecode_add_gds(ctx->bc, &gds); 2901 if (r) 2902 return r; 2903 } 2904 2905 // Patch up jump label 2906 r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP); 2907 cf_pop = ctx->bc->cf_last; 2908 2909 cf_jump->cf_addr = cf_pop->id + 2; 2910 cf_jump->pop_count = 1; 2911 cf_pop->cf_addr = cf_pop->id + 2; 2912 cf_pop->pop_count = 1; 2913 2914 return 0; 2915 } 2916 2917 static int r600_shader_from_tgsi(struct r600_context *rctx, 2918 struct r600_pipe_shader *pipeshader, 2919 union r600_shader_key key) 2920 { 2921 struct r600_screen *rscreen = rctx->screen; 2922 struct r600_shader *shader = &pipeshader->shader; 2923 struct tgsi_token *tokens = pipeshader->selector->tokens; 2924 struct pipe_stream_output_info so = pipeshader->selector->so; 2925 struct tgsi_full_immediate *immediate; 2926 struct r600_shader_ctx ctx; 2927 struct r600_bytecode_output output[ARRAY_SIZE(shader->output)]; 2928 unsigned output_done, noutput; 2929 unsigned opcode; 2930 int i, j, k, r = 0; 2931 int next_param_base = 0, next_clip_base; 2932 int max_color_exports = MAX2(key.ps.nr_cbufs, 1); 2933 bool indirect_gprs; 2934 bool ring_outputs = false; 2935 bool lds_outputs = false; 2936 bool lds_inputs = false; 2937 bool pos_emitted = false; 2938 2939 ctx.bc = &shader->bc; 2940 ctx.shader = shader; 2941 ctx.native_integers = true; 2942 2943 r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family, 2944 rscreen->has_compressed_msaa_texturing); 2945 ctx.tokens = tokens; 2946 tgsi_scan_shader(tokens, &ctx.info); 2947 shader->indirect_files = ctx.info.indirect_files; 2948 2949 shader->uses_doubles = ctx.info.uses_doubles; 2950 2951 indirect_gprs = ctx.info.indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER)); 2952 tgsi_parse_init(&ctx.parse, tokens); 2953 ctx.type = ctx.info.processor; 2954 shader->processor_type = ctx.type; 2955 ctx.bc->type = shader->processor_type; 2956 2957 switch (ctx.type) { 2958 case PIPE_SHADER_VERTEX: 2959 shader->vs_as_gs_a = key.vs.as_gs_a; 2960 shader->vs_as_es = key.vs.as_es; 2961 shader->vs_as_ls = key.vs.as_ls; 2962 if (shader->vs_as_es) 2963 ring_outputs = true; 2964 if (shader->vs_as_ls) 2965 lds_outputs = true; 2966 break; 2967 case PIPE_SHADER_GEOMETRY: 2968 ring_outputs = true; 2969 break; 2970 case PIPE_SHADER_TESS_CTRL: 2971 shader->tcs_prim_mode = key.tcs.prim_mode; 2972 lds_outputs = true; 2973 lds_inputs = true; 2974 break; 2975 case PIPE_SHADER_TESS_EVAL: 2976 shader->tes_as_es = key.tes.as_es; 2977 lds_inputs = true; 2978 if (shader->tes_as_es) 2979 ring_outputs = true; 2980 break; 2981 case PIPE_SHADER_FRAGMENT: 2982 shader->two_side = key.ps.color_two_side; 2983 break; 2984 default: 2985 break; 2986 } 2987 2988 if (shader->vs_as_es || shader->tes_as_es) { 2989 ctx.gs_for_vs = &rctx->gs_shader->current->shader; 2990 } else { 2991 ctx.gs_for_vs = NULL; 2992 } 2993 2994 ctx.next_ring_offset = 0; 2995 ctx.gs_out_ring_offset = 0; 2996 ctx.gs_next_vertex = 0; 2997 ctx.gs_stream_output_info = &so; 2998 2999 ctx.face_gpr = -1; 3000 ctx.fixed_pt_position_gpr = -1; 3001 ctx.fragcoord_input = -1; 3002 ctx.colors_used = 0; 3003 ctx.clip_vertex_write = 0; 3004 3005 shader->nr_ps_color_exports = 0; 3006 shader->nr_ps_max_color_exports = 0; 3007 3008 3009 /* register allocations */ 3010 /* Values [0,127] correspond to GPR[0..127]. 3011 * Values [128,159] correspond to constant buffer bank 0 3012 * Values [160,191] correspond to constant buffer bank 1 3013 * Values [256,511] correspond to cfile constants c[0..255]. (Gone on EG) 3014 * Values [256,287] correspond to constant buffer bank 2 (EG) 3015 * Values [288,319] correspond to constant buffer bank 3 (EG) 3016 * Other special values are shown in the list below. 3017 * 244 ALU_SRC_1_DBL_L: special constant 1.0 double-float, LSW. (RV670+) 3018 * 245 ALU_SRC_1_DBL_M: special constant 1.0 double-float, MSW. (RV670+) 3019 * 246 ALU_SRC_0_5_DBL_L: special constant 0.5 double-float, LSW. (RV670+) 3020 * 247 ALU_SRC_0_5_DBL_M: special constant 0.5 double-float, MSW. (RV670+) 3021 * 248 SQ_ALU_SRC_0: special constant 0.0. 3022 * 249 SQ_ALU_SRC_1: special constant 1.0 float. 3023 * 250 SQ_ALU_SRC_1_INT: special constant 1 integer. 3024 * 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer. 3025 * 252 SQ_ALU_SRC_0_5: special constant 0.5 float. 3026 * 253 SQ_ALU_SRC_LITERAL: literal constant. 3027 * 254 SQ_ALU_SRC_PV: previous vector result. 3028 * 255 SQ_ALU_SRC_PS: previous scalar result. 3029 */ 3030 for (i = 0; i < TGSI_FILE_COUNT; i++) { 3031 ctx.file_offset[i] = 0; 3032 } 3033 3034 if (ctx.type == PIPE_SHADER_VERTEX) { 3035 ctx.file_offset[TGSI_FILE_INPUT] = 1; 3036 r600_bytecode_add_cfinst(ctx.bc, CF_OP_CALL_FS); 3037 } 3038 if (ctx.type == PIPE_SHADER_FRAGMENT) { 3039 if (ctx.bc->chip_class >= EVERGREEN) 3040 ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx); 3041 else 3042 ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]); 3043 } 3044 if (ctx.type == PIPE_SHADER_GEOMETRY) { 3045 /* FIXME 1 would be enough in some cases (3 or less input vertices) */ 3046 ctx.file_offset[TGSI_FILE_INPUT] = 2; 3047 } 3048 if (ctx.type == PIPE_SHADER_TESS_CTRL) 3049 ctx.file_offset[TGSI_FILE_INPUT] = 1; 3050 if (ctx.type == PIPE_SHADER_TESS_EVAL) { 3051 bool add_tesscoord = false, add_tess_inout = false; 3052 ctx.file_offset[TGSI_FILE_INPUT] = 1; 3053 for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) { 3054 /* if we have tesscoord save one reg */ 3055 if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSCOORD) 3056 add_tesscoord = true; 3057 if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSINNER || 3058 ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_TESSOUTER) 3059 add_tess_inout = true; 3060 } 3061 if (add_tesscoord || add_tess_inout) 3062 ctx.file_offset[TGSI_FILE_INPUT]++; 3063 if (add_tess_inout) 3064 ctx.file_offset[TGSI_FILE_INPUT]+=2; 3065 } 3066 3067 ctx.file_offset[TGSI_FILE_OUTPUT] = 3068 ctx.file_offset[TGSI_FILE_INPUT] + 3069 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 3070 ctx.file_offset[TGSI_FILE_TEMPORARY] = ctx.file_offset[TGSI_FILE_OUTPUT] + 3071 ctx.info.file_max[TGSI_FILE_OUTPUT] + 1; 3072 3073 /* Outside the GPR range. This will be translated to one of the 3074 * kcache banks later. */ 3075 ctx.file_offset[TGSI_FILE_CONSTANT] = 512; 3076 3077 ctx.file_offset[TGSI_FILE_IMMEDIATE] = V_SQ_ALU_SRC_LITERAL; 3078 ctx.bc->ar_reg = ctx.file_offset[TGSI_FILE_TEMPORARY] + 3079 ctx.info.file_max[TGSI_FILE_TEMPORARY] + 1; 3080 ctx.bc->index_reg[0] = ctx.bc->ar_reg + 1; 3081 ctx.bc->index_reg[1] = ctx.bc->ar_reg + 2; 3082 3083 if (ctx.type == PIPE_SHADER_TESS_CTRL) { 3084 ctx.tess_input_info = ctx.bc->ar_reg + 3; 3085 ctx.tess_output_info = ctx.bc->ar_reg + 4; 3086 ctx.temp_reg = ctx.bc->ar_reg + 5; 3087 } else if (ctx.type == PIPE_SHADER_TESS_EVAL) { 3088 ctx.tess_input_info = 0; 3089 ctx.tess_output_info = ctx.bc->ar_reg + 3; 3090 ctx.temp_reg = ctx.bc->ar_reg + 4; 3091 } else if (ctx.type == PIPE_SHADER_GEOMETRY) { 3092 ctx.gs_export_gpr_tregs[0] = ctx.bc->ar_reg + 3; 3093 ctx.gs_export_gpr_tregs[1] = ctx.bc->ar_reg + 4; 3094 ctx.gs_export_gpr_tregs[2] = ctx.bc->ar_reg + 5; 3095 ctx.gs_export_gpr_tregs[3] = ctx.bc->ar_reg + 6; 3096 ctx.temp_reg = ctx.bc->ar_reg + 7; 3097 } else { 3098 ctx.temp_reg = ctx.bc->ar_reg + 3; 3099 } 3100 3101 shader->max_arrays = 0; 3102 shader->num_arrays = 0; 3103 if (indirect_gprs) { 3104 3105 if (ctx.info.indirect_files & (1 << TGSI_FILE_INPUT)) { 3106 r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_INPUT], 3107 ctx.file_offset[TGSI_FILE_OUTPUT] - 3108 ctx.file_offset[TGSI_FILE_INPUT], 3109 0x0F); 3110 } 3111 if (ctx.info.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 3112 r600_add_gpr_array(shader, ctx.file_offset[TGSI_FILE_OUTPUT], 3113 ctx.file_offset[TGSI_FILE_TEMPORARY] - 3114 ctx.file_offset[TGSI_FILE_OUTPUT], 3115 0x0F); 3116 } 3117 } 3118 3119 ctx.nliterals = 0; 3120 ctx.literals = NULL; 3121 3122 shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] && 3123 ctx.info.colors_written == 1; 3124 shader->vs_position_window_space = ctx.info.properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; 3125 shader->ps_conservative_z = (uint8_t)ctx.info.properties[TGSI_PROPERTY_FS_DEPTH_LAYOUT]; 3126 3127 if (shader->vs_as_gs_a) 3128 vs_add_primid_output(&ctx, key.vs.prim_id_out); 3129 3130 if (ctx.type == PIPE_SHADER_TESS_EVAL) 3131 r600_fetch_tess_io_info(&ctx); 3132 3133 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 3134 tgsi_parse_token(&ctx.parse); 3135 switch (ctx.parse.FullToken.Token.Type) { 3136 case TGSI_TOKEN_TYPE_IMMEDIATE: 3137 immediate = &ctx.parse.FullToken.FullImmediate; 3138 ctx.literals = realloc(ctx.literals, (ctx.nliterals + 1) * 16); 3139 if(ctx.literals == NULL) { 3140 r = -ENOMEM; 3141 goto out_err; 3142 } 3143 ctx.literals[ctx.nliterals * 4 + 0] = immediate->u[0].Uint; 3144 ctx.literals[ctx.nliterals * 4 + 1] = immediate->u[1].Uint; 3145 ctx.literals[ctx.nliterals * 4 + 2] = immediate->u[2].Uint; 3146 ctx.literals[ctx.nliterals * 4 + 3] = immediate->u[3].Uint; 3147 ctx.nliterals++; 3148 break; 3149 case TGSI_TOKEN_TYPE_DECLARATION: 3150 r = tgsi_declaration(&ctx); 3151 if (r) 3152 goto out_err; 3153 break; 3154 case TGSI_TOKEN_TYPE_INSTRUCTION: 3155 case TGSI_TOKEN_TYPE_PROPERTY: 3156 break; 3157 default: 3158 R600_ERR("unsupported token type %d\n", ctx.parse.FullToken.Token.Type); 3159 r = -EINVAL; 3160 goto out_err; 3161 } 3162 } 3163 3164 shader->ring_item_sizes[0] = ctx.next_ring_offset; 3165 shader->ring_item_sizes[1] = 0; 3166 shader->ring_item_sizes[2] = 0; 3167 shader->ring_item_sizes[3] = 0; 3168 3169 /* Process two side if needed */ 3170 if (shader->two_side && ctx.colors_used) { 3171 int i, count = ctx.shader->ninput; 3172 unsigned next_lds_loc = ctx.shader->nlds; 3173 3174 /* additional inputs will be allocated right after the existing inputs, 3175 * we won't need them after the color selection, so we don't need to 3176 * reserve these gprs for the rest of the shader code and to adjust 3177 * output offsets etc. */ 3178 int gpr = ctx.file_offset[TGSI_FILE_INPUT] + 3179 ctx.info.file_max[TGSI_FILE_INPUT] + 1; 3180 3181 /* if two sided and neither face or sample mask is used by shader, ensure face_gpr is emitted */ 3182 if (ctx.face_gpr == -1) { 3183 i = ctx.shader->ninput++; 3184 ctx.shader->input[i].name = TGSI_SEMANTIC_FACE; 3185 ctx.shader->input[i].spi_sid = 0; 3186 ctx.shader->input[i].gpr = gpr++; 3187 ctx.face_gpr = ctx.shader->input[i].gpr; 3188 } 3189 3190 for (i = 0; i < count; i++) { 3191 if (ctx.shader->input[i].name == TGSI_SEMANTIC_COLOR) { 3192 int ni = ctx.shader->ninput++; 3193 memcpy(&ctx.shader->input[ni],&ctx.shader->input[i], sizeof(struct r600_shader_io)); 3194 ctx.shader->input[ni].name = TGSI_SEMANTIC_BCOLOR; 3195 ctx.shader->input[ni].spi_sid = r600_spi_sid(&ctx.shader->input[ni]); 3196 ctx.shader->input[ni].gpr = gpr++; 3197 // TGSI to LLVM needs to know the lds position of inputs. 3198 // Non LLVM path computes it later (in process_twoside_color) 3199 ctx.shader->input[ni].lds_pos = next_lds_loc++; 3200 ctx.shader->input[i].back_color_input = ni; 3201 if (ctx.bc->chip_class >= EVERGREEN) { 3202 if ((r = evergreen_interp_input(&ctx, ni))) 3203 return r; 3204 } 3205 } 3206 } 3207 } 3208 3209 if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN) 3210 shader->nr_ps_max_color_exports = 8; 3211 3212 if (ctx.fragcoord_input >= 0) { 3213 if (ctx.bc->chip_class == CAYMAN) { 3214 for (j = 0 ; j < 4; j++) { 3215 struct r600_bytecode_alu alu; 3216 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3217 alu.op = ALU_OP1_RECIP_IEEE; 3218 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 3219 alu.src[0].chan = 3; 3220 3221 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 3222 alu.dst.chan = j; 3223 alu.dst.write = (j == 3); 3224 alu.last = 1; 3225 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 3226 return r; 3227 } 3228 } else { 3229 struct r600_bytecode_alu alu; 3230 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3231 alu.op = ALU_OP1_RECIP_IEEE; 3232 alu.src[0].sel = shader->input[ctx.fragcoord_input].gpr; 3233 alu.src[0].chan = 3; 3234 3235 alu.dst.sel = shader->input[ctx.fragcoord_input].gpr; 3236 alu.dst.chan = 3; 3237 alu.dst.write = 1; 3238 alu.last = 1; 3239 if ((r = r600_bytecode_add_alu(ctx.bc, &alu))) 3240 return r; 3241 } 3242 } 3243 3244 if (ctx.type == PIPE_SHADER_GEOMETRY) { 3245 struct r600_bytecode_alu alu; 3246 int r; 3247 3248 /* GS thread with no output workaround - emit a cut at start of GS */ 3249 if (ctx.bc->chip_class == R600) 3250 r600_bytecode_add_cfinst(ctx.bc, CF_OP_CUT_VERTEX); 3251 3252 for (j = 0; j < 4; j++) { 3253 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3254 alu.op = ALU_OP1_MOV; 3255 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 3256 alu.src[0].value = 0; 3257 alu.dst.sel = ctx.gs_export_gpr_tregs[j]; 3258 alu.dst.write = 1; 3259 alu.last = 1; 3260 r = r600_bytecode_add_alu(ctx.bc, &alu); 3261 if (r) 3262 return r; 3263 } 3264 } 3265 3266 if (ctx.type == PIPE_SHADER_TESS_CTRL) 3267 r600_fetch_tess_io_info(&ctx); 3268 3269 if (shader->two_side && ctx.colors_used) { 3270 if ((r = process_twoside_color_inputs(&ctx))) 3271 return r; 3272 } 3273 3274 tgsi_parse_init(&ctx.parse, tokens); 3275 while (!tgsi_parse_end_of_tokens(&ctx.parse)) { 3276 tgsi_parse_token(&ctx.parse); 3277 switch (ctx.parse.FullToken.Token.Type) { 3278 case TGSI_TOKEN_TYPE_INSTRUCTION: 3279 r = tgsi_is_supported(&ctx); 3280 if (r) 3281 goto out_err; 3282 ctx.max_driver_temp_used = 0; 3283 /* reserve first tmp for everyone */ 3284 r600_get_temp(&ctx); 3285 3286 opcode = ctx.parse.FullToken.FullInstruction.Instruction.Opcode; 3287 if ((r = tgsi_split_constant(&ctx))) 3288 goto out_err; 3289 if ((r = tgsi_split_literal_constant(&ctx))) 3290 goto out_err; 3291 if (ctx.type == PIPE_SHADER_GEOMETRY) { 3292 if ((r = tgsi_split_gs_inputs(&ctx))) 3293 goto out_err; 3294 } else if (lds_inputs) { 3295 if ((r = tgsi_split_lds_inputs(&ctx))) 3296 goto out_err; 3297 } 3298 if (ctx.bc->chip_class == CAYMAN) 3299 ctx.inst_info = &cm_shader_tgsi_instruction[opcode]; 3300 else if (ctx.bc->chip_class >= EVERGREEN) 3301 ctx.inst_info = &eg_shader_tgsi_instruction[opcode]; 3302 else 3303 ctx.inst_info = &r600_shader_tgsi_instruction[opcode]; 3304 r = ctx.inst_info->process(&ctx); 3305 if (r) 3306 goto out_err; 3307 3308 if (ctx.type == PIPE_SHADER_TESS_CTRL) { 3309 r = r600_store_tcs_output(&ctx); 3310 if (r) 3311 goto out_err; 3312 } 3313 break; 3314 default: 3315 break; 3316 } 3317 } 3318 3319 /* Reset the temporary register counter. */ 3320 ctx.max_driver_temp_used = 0; 3321 3322 noutput = shader->noutput; 3323 3324 if (!ring_outputs && ctx.clip_vertex_write) { 3325 unsigned clipdist_temp[2]; 3326 3327 clipdist_temp[0] = r600_get_temp(&ctx); 3328 clipdist_temp[1] = r600_get_temp(&ctx); 3329 3330 /* need to convert a clipvertex write into clipdistance writes and not export 3331 the clip vertex anymore */ 3332 3333 memset(&shader->output[noutput], 0, 2*sizeof(struct r600_shader_io)); 3334 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 3335 shader->output[noutput].gpr = clipdist_temp[0]; 3336 noutput++; 3337 shader->output[noutput].name = TGSI_SEMANTIC_CLIPDIST; 3338 shader->output[noutput].gpr = clipdist_temp[1]; 3339 noutput++; 3340 3341 /* reset spi_sid for clipvertex output to avoid confusing spi */ 3342 shader->output[ctx.cv_output].spi_sid = 0; 3343 3344 shader->clip_dist_write = 0xFF; 3345 3346 for (i = 0; i < 8; i++) { 3347 int oreg = i >> 2; 3348 int ochan = i & 3; 3349 3350 for (j = 0; j < 4; j++) { 3351 struct r600_bytecode_alu alu; 3352 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3353 alu.op = ALU_OP2_DOT4; 3354 alu.src[0].sel = shader->output[ctx.cv_output].gpr; 3355 alu.src[0].chan = j; 3356 3357 alu.src[1].sel = 512 + i; 3358 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 3359 alu.src[1].chan = j; 3360 3361 alu.dst.sel = clipdist_temp[oreg]; 3362 alu.dst.chan = j; 3363 alu.dst.write = (j == ochan); 3364 if (j == 3) 3365 alu.last = 1; 3366 r = r600_bytecode_add_alu(ctx.bc, &alu); 3367 if (r) 3368 return r; 3369 } 3370 } 3371 } 3372 3373 /* Add stream outputs. */ 3374 if (so.num_outputs) { 3375 bool emit = false; 3376 if (!lds_outputs && !ring_outputs && ctx.type == PIPE_SHADER_VERTEX) 3377 emit = true; 3378 if (!ring_outputs && ctx.type == PIPE_SHADER_TESS_EVAL) 3379 emit = true; 3380 if (emit) 3381 emit_streamout(&ctx, &so, -1, NULL); 3382 } 3383 pipeshader->enabled_stream_buffers_mask = ctx.enabled_stream_buffers_mask; 3384 convert_edgeflag_to_int(&ctx); 3385 3386 if (ctx.type == PIPE_SHADER_TESS_CTRL) 3387 r600_emit_tess_factor(&ctx); 3388 3389 if (lds_outputs) { 3390 if (ctx.type == PIPE_SHADER_VERTEX) { 3391 if (ctx.shader->noutput) 3392 emit_lds_vs_writes(&ctx); 3393 } 3394 } else if (ring_outputs) { 3395 if (shader->vs_as_es || shader->tes_as_es) { 3396 ctx.gs_export_gpr_tregs[0] = r600_get_temp(&ctx); 3397 ctx.gs_export_gpr_tregs[1] = -1; 3398 ctx.gs_export_gpr_tregs[2] = -1; 3399 ctx.gs_export_gpr_tregs[3] = -1; 3400 3401 emit_gs_ring_writes(&ctx, &so, -1, FALSE); 3402 } 3403 } else { 3404 /* Export output */ 3405 next_clip_base = shader->vs_out_misc_write ? 62 : 61; 3406 3407 for (i = 0, j = 0; i < noutput; i++, j++) { 3408 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3409 output[j].gpr = shader->output[i].gpr; 3410 output[j].elem_size = 3; 3411 output[j].swizzle_x = 0; 3412 output[j].swizzle_y = 1; 3413 output[j].swizzle_z = 2; 3414 output[j].swizzle_w = 3; 3415 output[j].burst_count = 1; 3416 output[j].type = -1; 3417 output[j].op = CF_OP_EXPORT; 3418 switch (ctx.type) { 3419 case PIPE_SHADER_VERTEX: 3420 case PIPE_SHADER_TESS_EVAL: 3421 switch (shader->output[i].name) { 3422 case TGSI_SEMANTIC_POSITION: 3423 output[j].array_base = 60; 3424 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3425 pos_emitted = true; 3426 break; 3427 3428 case TGSI_SEMANTIC_PSIZE: 3429 output[j].array_base = 61; 3430 output[j].swizzle_y = 7; 3431 output[j].swizzle_z = 7; 3432 output[j].swizzle_w = 7; 3433 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3434 pos_emitted = true; 3435 break; 3436 case TGSI_SEMANTIC_EDGEFLAG: 3437 output[j].array_base = 61; 3438 output[j].swizzle_x = 7; 3439 output[j].swizzle_y = 0; 3440 output[j].swizzle_z = 7; 3441 output[j].swizzle_w = 7; 3442 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3443 pos_emitted = true; 3444 break; 3445 case TGSI_SEMANTIC_LAYER: 3446 /* spi_sid is 0 for outputs that are 3447 * not consumed by PS */ 3448 if (shader->output[i].spi_sid) { 3449 output[j].array_base = next_param_base++; 3450 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3451 j++; 3452 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 3453 } 3454 output[j].array_base = 61; 3455 output[j].swizzle_x = 7; 3456 output[j].swizzle_y = 7; 3457 output[j].swizzle_z = 0; 3458 output[j].swizzle_w = 7; 3459 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3460 pos_emitted = true; 3461 break; 3462 case TGSI_SEMANTIC_VIEWPORT_INDEX: 3463 /* spi_sid is 0 for outputs that are 3464 * not consumed by PS */ 3465 if (shader->output[i].spi_sid) { 3466 output[j].array_base = next_param_base++; 3467 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3468 j++; 3469 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 3470 } 3471 output[j].array_base = 61; 3472 output[j].swizzle_x = 7; 3473 output[j].swizzle_y = 7; 3474 output[j].swizzle_z = 7; 3475 output[j].swizzle_w = 0; 3476 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3477 pos_emitted = true; 3478 break; 3479 case TGSI_SEMANTIC_CLIPVERTEX: 3480 j--; 3481 break; 3482 case TGSI_SEMANTIC_CLIPDIST: 3483 output[j].array_base = next_clip_base++; 3484 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3485 pos_emitted = true; 3486 /* spi_sid is 0 for clipdistance outputs that were generated 3487 * for clipvertex - we don't need to pass them to PS */ 3488 if (shader->output[i].spi_sid) { 3489 j++; 3490 /* duplicate it as PARAM to pass to the pixel shader */ 3491 memcpy(&output[j], &output[j-1], sizeof(struct r600_bytecode_output)); 3492 output[j].array_base = next_param_base++; 3493 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3494 } 3495 break; 3496 case TGSI_SEMANTIC_FOG: 3497 output[j].swizzle_y = 4; /* 0 */ 3498 output[j].swizzle_z = 4; /* 0 */ 3499 output[j].swizzle_w = 5; /* 1 */ 3500 break; 3501 case TGSI_SEMANTIC_PRIMID: 3502 output[j].swizzle_x = 2; 3503 output[j].swizzle_y = 4; /* 0 */ 3504 output[j].swizzle_z = 4; /* 0 */ 3505 output[j].swizzle_w = 4; /* 0 */ 3506 break; 3507 } 3508 3509 break; 3510 case PIPE_SHADER_FRAGMENT: 3511 if (shader->output[i].name == TGSI_SEMANTIC_COLOR) { 3512 /* never export more colors than the number of CBs */ 3513 if (shader->output[i].sid >= max_color_exports) { 3514 /* skip export */ 3515 j--; 3516 continue; 3517 } 3518 output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; 3519 output[j].array_base = shader->output[i].sid; 3520 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3521 shader->nr_ps_color_exports++; 3522 if (shader->fs_write_all && (rscreen->b.chip_class >= EVERGREEN)) { 3523 for (k = 1; k < max_color_exports; k++) { 3524 j++; 3525 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3526 output[j].gpr = shader->output[i].gpr; 3527 output[j].elem_size = 3; 3528 output[j].swizzle_x = 0; 3529 output[j].swizzle_y = 1; 3530 output[j].swizzle_z = 2; 3531 output[j].swizzle_w = key.ps.alpha_to_one ? 5 : 3; 3532 output[j].burst_count = 1; 3533 output[j].array_base = k; 3534 output[j].op = CF_OP_EXPORT; 3535 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3536 shader->nr_ps_color_exports++; 3537 } 3538 } 3539 } else if (shader->output[i].name == TGSI_SEMANTIC_POSITION) { 3540 output[j].array_base = 61; 3541 output[j].swizzle_x = 2; 3542 output[j].swizzle_y = 7; 3543 output[j].swizzle_z = output[j].swizzle_w = 7; 3544 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3545 } else if (shader->output[i].name == TGSI_SEMANTIC_STENCIL) { 3546 output[j].array_base = 61; 3547 output[j].swizzle_x = 7; 3548 output[j].swizzle_y = 1; 3549 output[j].swizzle_z = output[j].swizzle_w = 7; 3550 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3551 } else if (shader->output[i].name == TGSI_SEMANTIC_SAMPLEMASK) { 3552 output[j].array_base = 61; 3553 output[j].swizzle_x = 7; 3554 output[j].swizzle_y = 7; 3555 output[j].swizzle_z = 0; 3556 output[j].swizzle_w = 7; 3557 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3558 } else { 3559 R600_ERR("unsupported fragment output name %d\n", shader->output[i].name); 3560 r = -EINVAL; 3561 goto out_err; 3562 } 3563 break; 3564 case PIPE_SHADER_TESS_CTRL: 3565 break; 3566 default: 3567 R600_ERR("unsupported processor type %d\n", ctx.type); 3568 r = -EINVAL; 3569 goto out_err; 3570 } 3571 3572 if (output[j].type==-1) { 3573 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3574 output[j].array_base = next_param_base++; 3575 } 3576 } 3577 3578 /* add fake position export */ 3579 if ((ctx.type == PIPE_SHADER_VERTEX || ctx.type == PIPE_SHADER_TESS_EVAL) && pos_emitted == false) { 3580 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3581 output[j].gpr = 0; 3582 output[j].elem_size = 3; 3583 output[j].swizzle_x = 7; 3584 output[j].swizzle_y = 7; 3585 output[j].swizzle_z = 7; 3586 output[j].swizzle_w = 7; 3587 output[j].burst_count = 1; 3588 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_POS; 3589 output[j].array_base = 60; 3590 output[j].op = CF_OP_EXPORT; 3591 j++; 3592 } 3593 3594 /* add fake param output for vertex shader if no param is exported */ 3595 if ((ctx.type == PIPE_SHADER_VERTEX || ctx.type == PIPE_SHADER_TESS_EVAL) && next_param_base == 0) { 3596 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3597 output[j].gpr = 0; 3598 output[j].elem_size = 3; 3599 output[j].swizzle_x = 7; 3600 output[j].swizzle_y = 7; 3601 output[j].swizzle_z = 7; 3602 output[j].swizzle_w = 7; 3603 output[j].burst_count = 1; 3604 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PARAM; 3605 output[j].array_base = 0; 3606 output[j].op = CF_OP_EXPORT; 3607 j++; 3608 } 3609 3610 /* add fake pixel export */ 3611 if (ctx.type == PIPE_SHADER_FRAGMENT && shader->nr_ps_color_exports == 0) { 3612 memset(&output[j], 0, sizeof(struct r600_bytecode_output)); 3613 output[j].gpr = 0; 3614 output[j].elem_size = 3; 3615 output[j].swizzle_x = 7; 3616 output[j].swizzle_y = 7; 3617 output[j].swizzle_z = 7; 3618 output[j].swizzle_w = 7; 3619 output[j].burst_count = 1; 3620 output[j].type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_PIXEL; 3621 output[j].array_base = 0; 3622 output[j].op = CF_OP_EXPORT; 3623 j++; 3624 shader->nr_ps_color_exports++; 3625 } 3626 3627 noutput = j; 3628 3629 /* set export done on last export of each type */ 3630 for (i = noutput - 1, output_done = 0; i >= 0; i--) { 3631 if (!(output_done & (1 << output[i].type))) { 3632 output_done |= (1 << output[i].type); 3633 output[i].op = CF_OP_EXPORT_DONE; 3634 } 3635 } 3636 /* add output to bytecode */ 3637 for (i = 0; i < noutput; i++) { 3638 r = r600_bytecode_add_output(ctx.bc, &output[i]); 3639 if (r) 3640 goto out_err; 3641 } 3642 } 3643 3644 /* add program end */ 3645 if (ctx.bc->chip_class == CAYMAN) 3646 cm_bytecode_add_cf_end(ctx.bc); 3647 else { 3648 const struct cf_op_info *last = NULL; 3649 3650 if (ctx.bc->cf_last) 3651 last = r600_isa_cf(ctx.bc->cf_last->op); 3652 3653 /* alu clause instructions don't have EOP bit, so add NOP */ 3654 if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS || ctx.bc->cf_last->op == CF_OP_POP || ctx.bc->cf_last->op == CF_OP_GDS) 3655 r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP); 3656 3657 ctx.bc->cf_last->end_of_program = 1; 3658 } 3659 3660 /* check GPR limit - we have 124 = 128 - 4 3661 * (4 are reserved as alu clause temporary registers) */ 3662 if (ctx.bc->ngpr > 124) { 3663 R600_ERR("GPR limit exceeded - shader requires %d registers\n", ctx.bc->ngpr); 3664 r = -ENOMEM; 3665 goto out_err; 3666 } 3667 3668 if (ctx.type == PIPE_SHADER_GEOMETRY) { 3669 if ((r = generate_gs_copy_shader(rctx, pipeshader, &so))) 3670 return r; 3671 } 3672 3673 free(ctx.literals); 3674 tgsi_parse_free(&ctx.parse); 3675 return 0; 3676 out_err: 3677 free(ctx.literals); 3678 tgsi_parse_free(&ctx.parse); 3679 return r; 3680 } 3681 3682 static int tgsi_unsupported(struct r600_shader_ctx *ctx) 3683 { 3684 const unsigned tgsi_opcode = 3685 ctx->parse.FullToken.FullInstruction.Instruction.Opcode; 3686 R600_ERR("%s tgsi opcode unsupported\n", 3687 tgsi_get_opcode_name(tgsi_opcode)); 3688 return -EINVAL; 3689 } 3690 3691 static int tgsi_end(struct r600_shader_ctx *ctx) 3692 { 3693 return 0; 3694 } 3695 3696 static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, 3697 const struct r600_shader_src *shader_src, 3698 unsigned chan) 3699 { 3700 bc_src->sel = shader_src->sel; 3701 bc_src->chan = shader_src->swizzle[chan]; 3702 bc_src->neg = shader_src->neg; 3703 bc_src->abs = shader_src->abs; 3704 bc_src->rel = shader_src->rel; 3705 bc_src->value = shader_src->value[bc_src->chan]; 3706 bc_src->kc_bank = shader_src->kc_bank; 3707 bc_src->kc_rel = shader_src->kc_rel; 3708 } 3709 3710 static void r600_bytecode_src_set_abs(struct r600_bytecode_alu_src *bc_src) 3711 { 3712 bc_src->abs = 1; 3713 bc_src->neg = 0; 3714 } 3715 3716 static void r600_bytecode_src_toggle_neg(struct r600_bytecode_alu_src *bc_src) 3717 { 3718 bc_src->neg = !bc_src->neg; 3719 } 3720 3721 static void tgsi_dst(struct r600_shader_ctx *ctx, 3722 const struct tgsi_full_dst_register *tgsi_dst, 3723 unsigned swizzle, 3724 struct r600_bytecode_alu_dst *r600_dst) 3725 { 3726 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3727 3728 r600_dst->sel = tgsi_dst->Register.Index; 3729 r600_dst->sel += ctx->file_offset[tgsi_dst->Register.File]; 3730 r600_dst->chan = swizzle; 3731 r600_dst->write = 1; 3732 if (inst->Instruction.Saturate) { 3733 r600_dst->clamp = 1; 3734 } 3735 if (ctx->type == PIPE_SHADER_TESS_CTRL) { 3736 if (tgsi_dst->Register.File == TGSI_FILE_OUTPUT) { 3737 return; 3738 } 3739 } 3740 if (tgsi_dst->Register.Indirect) 3741 r600_dst->rel = V_SQ_REL_RELATIVE; 3742 3743 } 3744 3745 static int tgsi_op2_64_params(struct r600_shader_ctx *ctx, bool singledest, bool swap) 3746 { 3747 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3748 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3749 struct r600_bytecode_alu alu; 3750 int i, j, r, lasti = tgsi_last_instruction(write_mask); 3751 int use_tmp = 0; 3752 3753 if (singledest) { 3754 switch (write_mask) { 3755 case 0x1: 3756 write_mask = 0x3; 3757 break; 3758 case 0x2: 3759 use_tmp = 1; 3760 write_mask = 0x3; 3761 break; 3762 case 0x4: 3763 write_mask = 0xc; 3764 break; 3765 case 0x8: 3766 write_mask = 0xc; 3767 use_tmp = 3; 3768 break; 3769 } 3770 } 3771 3772 lasti = tgsi_last_instruction(write_mask); 3773 for (i = 0; i <= lasti; i++) { 3774 3775 if (!(write_mask & (1 << i))) 3776 continue; 3777 3778 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3779 3780 if (singledest) { 3781 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3782 if (use_tmp) { 3783 alu.dst.sel = ctx->temp_reg; 3784 alu.dst.chan = i; 3785 alu.dst.write = 1; 3786 } 3787 if (i == 1 || i == 3) 3788 alu.dst.write = 0; 3789 } else 3790 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3791 3792 alu.op = ctx->inst_info->op; 3793 if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DABS) { 3794 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 3795 } else if (!swap) { 3796 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3797 r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i)); 3798 } 3799 } else { 3800 r600_bytecode_src(&alu.src[0], &ctx->src[1], fp64_switch(i)); 3801 r600_bytecode_src(&alu.src[1], &ctx->src[0], fp64_switch(i)); 3802 } 3803 3804 /* handle some special cases */ 3805 if (i == 1 || i == 3) { 3806 switch (ctx->parse.FullToken.FullInstruction.Instruction.Opcode) { 3807 case TGSI_OPCODE_DABS: 3808 r600_bytecode_src_set_abs(&alu.src[0]); 3809 break; 3810 default: 3811 break; 3812 } 3813 } 3814 if (i == lasti) { 3815 alu.last = 1; 3816 } 3817 r = r600_bytecode_add_alu(ctx->bc, &alu); 3818 if (r) 3819 return r; 3820 } 3821 3822 if (use_tmp) { 3823 write_mask = inst->Dst[0].Register.WriteMask; 3824 3825 /* move result from temp to dst */ 3826 for (i = 0; i <= lasti; i++) { 3827 if (!(write_mask & (1 << i))) 3828 continue; 3829 3830 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3831 alu.op = ALU_OP1_MOV; 3832 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3833 alu.src[0].sel = ctx->temp_reg; 3834 alu.src[0].chan = use_tmp - 1; 3835 alu.last = (i == lasti); 3836 3837 r = r600_bytecode_add_alu(ctx->bc, &alu); 3838 if (r) 3839 return r; 3840 } 3841 } 3842 return 0; 3843 } 3844 3845 static int tgsi_op2_64(struct r600_shader_ctx *ctx) 3846 { 3847 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3848 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3849 /* confirm writemasking */ 3850 if ((write_mask & 0x3) != 0x3 && 3851 (write_mask & 0xc) != 0xc) { 3852 fprintf(stderr, "illegal writemask for 64-bit: 0x%x\n", write_mask); 3853 return -1; 3854 } 3855 return tgsi_op2_64_params(ctx, false, false); 3856 } 3857 3858 static int tgsi_op2_64_single_dest(struct r600_shader_ctx *ctx) 3859 { 3860 return tgsi_op2_64_params(ctx, true, false); 3861 } 3862 3863 static int tgsi_op2_64_single_dest_s(struct r600_shader_ctx *ctx) 3864 { 3865 return tgsi_op2_64_params(ctx, true, true); 3866 } 3867 3868 static int tgsi_op3_64(struct r600_shader_ctx *ctx) 3869 { 3870 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3871 struct r600_bytecode_alu alu; 3872 int i, j, r; 3873 int lasti = 3; 3874 int tmp = r600_get_temp(ctx); 3875 3876 for (i = 0; i < lasti + 1; i++) { 3877 3878 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3879 alu.op = ctx->inst_info->op; 3880 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3881 r600_bytecode_src(&alu.src[j], &ctx->src[j], i == 3 ? 0 : 1); 3882 } 3883 3884 if (inst->Dst[0].Register.WriteMask & (1 << i)) 3885 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3886 else 3887 alu.dst.sel = tmp; 3888 3889 alu.dst.chan = i; 3890 alu.is_op3 = 1; 3891 if (i == lasti) { 3892 alu.last = 1; 3893 } 3894 r = r600_bytecode_add_alu(ctx->bc, &alu); 3895 if (r) 3896 return r; 3897 } 3898 return 0; 3899 } 3900 3901 static int tgsi_op2_s(struct r600_shader_ctx *ctx, int swap, int trans_only) 3902 { 3903 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3904 struct r600_bytecode_alu alu; 3905 unsigned write_mask = inst->Dst[0].Register.WriteMask; 3906 int i, j, r, lasti = tgsi_last_instruction(write_mask); 3907 /* use temp register if trans_only and more than one dst component */ 3908 int use_tmp = trans_only && (write_mask ^ (1 << lasti)); 3909 3910 for (i = 0; i <= lasti; i++) { 3911 if (!(write_mask & (1 << i))) 3912 continue; 3913 3914 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3915 if (use_tmp) { 3916 alu.dst.sel = ctx->temp_reg; 3917 alu.dst.chan = i; 3918 alu.dst.write = 1; 3919 } else 3920 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3921 3922 alu.op = ctx->inst_info->op; 3923 if (!swap) { 3924 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 3925 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 3926 } 3927 } else { 3928 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 3929 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3930 } 3931 if (i == lasti || trans_only) { 3932 alu.last = 1; 3933 } 3934 r = r600_bytecode_add_alu(ctx->bc, &alu); 3935 if (r) 3936 return r; 3937 } 3938 3939 if (use_tmp) { 3940 /* move result from temp to dst */ 3941 for (i = 0; i <= lasti; i++) { 3942 if (!(write_mask & (1 << i))) 3943 continue; 3944 3945 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3946 alu.op = ALU_OP1_MOV; 3947 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3948 alu.src[0].sel = ctx->temp_reg; 3949 alu.src[0].chan = i; 3950 alu.last = (i == lasti); 3951 3952 r = r600_bytecode_add_alu(ctx->bc, &alu); 3953 if (r) 3954 return r; 3955 } 3956 } 3957 return 0; 3958 } 3959 3960 static int tgsi_op2(struct r600_shader_ctx *ctx) 3961 { 3962 return tgsi_op2_s(ctx, 0, 0); 3963 } 3964 3965 static int tgsi_op2_swap(struct r600_shader_ctx *ctx) 3966 { 3967 return tgsi_op2_s(ctx, 1, 0); 3968 } 3969 3970 static int tgsi_op2_trans(struct r600_shader_ctx *ctx) 3971 { 3972 return tgsi_op2_s(ctx, 0, 1); 3973 } 3974 3975 static int tgsi_ineg(struct r600_shader_ctx *ctx) 3976 { 3977 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 3978 struct r600_bytecode_alu alu; 3979 int i, r; 3980 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 3981 3982 for (i = 0; i < lasti + 1; i++) { 3983 3984 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 3985 continue; 3986 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 3987 alu.op = ctx->inst_info->op; 3988 3989 alu.src[0].sel = V_SQ_ALU_SRC_0; 3990 3991 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 3992 3993 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 3994 3995 if (i == lasti) { 3996 alu.last = 1; 3997 } 3998 r = r600_bytecode_add_alu(ctx->bc, &alu); 3999 if (r) 4000 return r; 4001 } 4002 return 0; 4003 4004 } 4005 4006 static int tgsi_dneg(struct r600_shader_ctx *ctx) 4007 { 4008 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4009 struct r600_bytecode_alu alu; 4010 int i, r; 4011 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4012 4013 for (i = 0; i < lasti + 1; i++) { 4014 4015 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4016 continue; 4017 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4018 alu.op = ALU_OP1_MOV; 4019 4020 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4021 4022 if (i == 1 || i == 3) 4023 r600_bytecode_src_toggle_neg(&alu.src[0]); 4024 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4025 4026 if (i == lasti) { 4027 alu.last = 1; 4028 } 4029 r = r600_bytecode_add_alu(ctx->bc, &alu); 4030 if (r) 4031 return r; 4032 } 4033 return 0; 4034 4035 } 4036 4037 static int tgsi_dfracexp(struct r600_shader_ctx *ctx) 4038 { 4039 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4040 struct r600_bytecode_alu alu; 4041 unsigned write_mask = inst->Dst[0].Register.WriteMask; 4042 int i, j, r; 4043 int firsti = write_mask == 0xc ? 2 : 0; 4044 4045 for (i = 0; i <= 3; i++) { 4046 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4047 alu.op = ctx->inst_info->op; 4048 4049 alu.dst.sel = ctx->temp_reg; 4050 alu.dst.chan = i; 4051 alu.dst.write = 1; 4052 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4053 r600_bytecode_src(&alu.src[j], &ctx->src[j], fp64_switch(i)); 4054 } 4055 4056 if (i == 3) 4057 alu.last = 1; 4058 4059 r = r600_bytecode_add_alu(ctx->bc, &alu); 4060 if (r) 4061 return r; 4062 } 4063 4064 /* MOV first two channels to writemask dst0 */ 4065 for (i = 0; i <= 1; i++) { 4066 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4067 alu.op = ALU_OP1_MOV; 4068 alu.src[0].chan = i + 2; 4069 alu.src[0].sel = ctx->temp_reg; 4070 4071 tgsi_dst(ctx, &inst->Dst[0], firsti + i, &alu.dst); 4072 alu.dst.write = (inst->Dst[0].Register.WriteMask >> (firsti + i)) & 1; 4073 alu.last = 1; 4074 r = r600_bytecode_add_alu(ctx->bc, &alu); 4075 if (r) 4076 return r; 4077 } 4078 4079 for (i = 0; i <= 3; i++) { 4080 if (inst->Dst[1].Register.WriteMask & (1 << i)) { 4081 /* MOV third channels to writemask dst1 */ 4082 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4083 alu.op = ALU_OP1_MOV; 4084 alu.src[0].chan = 1; 4085 alu.src[0].sel = ctx->temp_reg; 4086 4087 tgsi_dst(ctx, &inst->Dst[1], i, &alu.dst); 4088 alu.last = 1; 4089 r = r600_bytecode_add_alu(ctx->bc, &alu); 4090 if (r) 4091 return r; 4092 break; 4093 } 4094 } 4095 return 0; 4096 } 4097 4098 4099 static int egcm_int_to_double(struct r600_shader_ctx *ctx) 4100 { 4101 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4102 struct r600_bytecode_alu alu; 4103 int i, r; 4104 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4105 4106 assert(inst->Instruction.Opcode == TGSI_OPCODE_I2D || 4107 inst->Instruction.Opcode == TGSI_OPCODE_U2D); 4108 4109 for (i = 0; i <= (lasti+1)/2; i++) { 4110 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4111 alu.op = ctx->inst_info->op; 4112 4113 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 4114 alu.dst.sel = ctx->temp_reg; 4115 alu.dst.chan = i; 4116 alu.dst.write = 1; 4117 alu.last = 1; 4118 4119 r = r600_bytecode_add_alu(ctx->bc, &alu); 4120 if (r) 4121 return r; 4122 } 4123 4124 for (i = 0; i <= lasti; i++) { 4125 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4126 alu.op = ALU_OP1_FLT32_TO_FLT64; 4127 4128 alu.src[0].chan = i/2; 4129 if (i%2 == 0) 4130 alu.src[0].sel = ctx->temp_reg; 4131 else { 4132 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 4133 alu.src[0].value = 0x0; 4134 } 4135 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4136 alu.last = i == lasti; 4137 4138 r = r600_bytecode_add_alu(ctx->bc, &alu); 4139 if (r) 4140 return r; 4141 } 4142 4143 return 0; 4144 } 4145 4146 static int egcm_double_to_int(struct r600_shader_ctx *ctx) 4147 { 4148 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4149 struct r600_bytecode_alu alu; 4150 int i, r; 4151 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4152 4153 assert(inst->Instruction.Opcode == TGSI_OPCODE_D2I || 4154 inst->Instruction.Opcode == TGSI_OPCODE_D2U); 4155 4156 for (i = 0; i <= lasti; i++) { 4157 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4158 alu.op = ALU_OP1_FLT64_TO_FLT32; 4159 4160 r600_bytecode_src(&alu.src[0], &ctx->src[0], fp64_switch(i)); 4161 alu.dst.chan = i; 4162 alu.dst.sel = ctx->temp_reg; 4163 alu.dst.write = i%2 == 0; 4164 alu.last = i == lasti; 4165 4166 r = r600_bytecode_add_alu(ctx->bc, &alu); 4167 if (r) 4168 return r; 4169 } 4170 4171 for (i = 0; i <= (lasti+1)/2; i++) { 4172 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4173 alu.op = ctx->inst_info->op; 4174 4175 alu.src[0].chan = i*2; 4176 alu.src[0].sel = ctx->temp_reg; 4177 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 4178 alu.last = 1; 4179 4180 r = r600_bytecode_add_alu(ctx->bc, &alu); 4181 if (r) 4182 return r; 4183 } 4184 4185 return 0; 4186 } 4187 4188 static int cayman_emit_unary_double_raw(struct r600_bytecode *bc, 4189 unsigned op, 4190 int dst_reg, 4191 struct r600_shader_src *src, 4192 bool abs) 4193 { 4194 struct r600_bytecode_alu alu; 4195 const int last_slot = 3; 4196 int r; 4197 4198 /* these have to write the result to X/Y by the looks of it */ 4199 for (int i = 0 ; i < last_slot; i++) { 4200 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4201 alu.op = op; 4202 4203 r600_bytecode_src(&alu.src[0], src, 1); 4204 r600_bytecode_src(&alu.src[1], src, 0); 4205 4206 if (abs) 4207 r600_bytecode_src_set_abs(&alu.src[1]); 4208 4209 alu.dst.sel = dst_reg; 4210 alu.dst.chan = i; 4211 alu.dst.write = (i == 0 || i == 1); 4212 4213 if (bc->chip_class != CAYMAN || i == last_slot - 1) 4214 alu.last = 1; 4215 r = r600_bytecode_add_alu(bc, &alu); 4216 if (r) 4217 return r; 4218 } 4219 4220 return 0; 4221 } 4222 4223 static int cayman_emit_double_instr(struct r600_shader_ctx *ctx) 4224 { 4225 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4226 int i, r; 4227 struct r600_bytecode_alu alu; 4228 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4229 int t1 = ctx->temp_reg; 4230 4231 /* should only be one src regs */ 4232 assert(inst->Instruction.NumSrcRegs == 1); 4233 4234 /* only support one double at a time */ 4235 assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY || 4236 inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW); 4237 4238 r = cayman_emit_unary_double_raw( 4239 ctx->bc, ctx->inst_info->op, t1, 4240 &ctx->src[0], 4241 ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ || 4242 ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT); 4243 if (r) 4244 return r; 4245 4246 for (i = 0 ; i <= lasti; i++) { 4247 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4248 continue; 4249 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4250 alu.op = ALU_OP1_MOV; 4251 alu.src[0].sel = t1; 4252 alu.src[0].chan = (i == 0 || i == 2) ? 0 : 1; 4253 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4254 alu.dst.write = 1; 4255 if (i == lasti) 4256 alu.last = 1; 4257 r = r600_bytecode_add_alu(ctx->bc, &alu); 4258 if (r) 4259 return r; 4260 } 4261 return 0; 4262 } 4263 4264 static int cayman_emit_float_instr(struct r600_shader_ctx *ctx) 4265 { 4266 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4267 int i, j, r; 4268 struct r600_bytecode_alu alu; 4269 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 4270 4271 for (i = 0 ; i < last_slot; i++) { 4272 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4273 alu.op = ctx->inst_info->op; 4274 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4275 r600_bytecode_src(&alu.src[j], &ctx->src[j], 0); 4276 4277 /* RSQ should take the absolute value of src */ 4278 if (inst->Instruction.Opcode == TGSI_OPCODE_RSQ) { 4279 r600_bytecode_src_set_abs(&alu.src[j]); 4280 } 4281 } 4282 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4283 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 4284 4285 if (i == last_slot - 1) 4286 alu.last = 1; 4287 r = r600_bytecode_add_alu(ctx->bc, &alu); 4288 if (r) 4289 return r; 4290 } 4291 return 0; 4292 } 4293 4294 static int cayman_mul_int_instr(struct r600_shader_ctx *ctx) 4295 { 4296 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4297 int i, j, k, r; 4298 struct r600_bytecode_alu alu; 4299 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4300 int t1 = ctx->temp_reg; 4301 4302 for (k = 0; k <= lasti; k++) { 4303 if (!(inst->Dst[0].Register.WriteMask & (1 << k))) 4304 continue; 4305 4306 for (i = 0 ; i < 4; i++) { 4307 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4308 alu.op = ctx->inst_info->op; 4309 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4310 r600_bytecode_src(&alu.src[j], &ctx->src[j], k); 4311 } 4312 alu.dst.sel = t1; 4313 alu.dst.chan = i; 4314 alu.dst.write = (i == k); 4315 if (i == 3) 4316 alu.last = 1; 4317 r = r600_bytecode_add_alu(ctx->bc, &alu); 4318 if (r) 4319 return r; 4320 } 4321 } 4322 4323 for (i = 0 ; i <= lasti; i++) { 4324 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4325 continue; 4326 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4327 alu.op = ALU_OP1_MOV; 4328 alu.src[0].sel = t1; 4329 alu.src[0].chan = i; 4330 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4331 alu.dst.write = 1; 4332 if (i == lasti) 4333 alu.last = 1; 4334 r = r600_bytecode_add_alu(ctx->bc, &alu); 4335 if (r) 4336 return r; 4337 } 4338 4339 return 0; 4340 } 4341 4342 4343 static int cayman_mul_double_instr(struct r600_shader_ctx *ctx) 4344 { 4345 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4346 int i, j, k, r; 4347 struct r600_bytecode_alu alu; 4348 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4349 int t1 = ctx->temp_reg; 4350 4351 /* t1 would get overwritten below if we actually tried to 4352 * multiply two pairs of doubles at a time. */ 4353 assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY || 4354 inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW); 4355 4356 k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1; 4357 4358 for (i = 0; i < 4; i++) { 4359 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4360 alu.op = ctx->inst_info->op; 4361 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 4362 r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1)); 4363 } 4364 alu.dst.sel = t1; 4365 alu.dst.chan = i; 4366 alu.dst.write = 1; 4367 if (i == 3) 4368 alu.last = 1; 4369 r = r600_bytecode_add_alu(ctx->bc, &alu); 4370 if (r) 4371 return r; 4372 } 4373 4374 for (i = 0; i <= lasti; i++) { 4375 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4376 continue; 4377 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4378 alu.op = ALU_OP1_MOV; 4379 alu.src[0].sel = t1; 4380 alu.src[0].chan = i; 4381 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4382 alu.dst.write = 1; 4383 if (i == lasti) 4384 alu.last = 1; 4385 r = r600_bytecode_add_alu(ctx->bc, &alu); 4386 if (r) 4387 return r; 4388 } 4389 4390 return 0; 4391 } 4392 4393 /* 4394 * Emit RECIP_64 + MUL_64 to implement division. 4395 */ 4396 static int cayman_ddiv_instr(struct r600_shader_ctx *ctx) 4397 { 4398 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4399 int r; 4400 struct r600_bytecode_alu alu; 4401 int t1 = ctx->temp_reg; 4402 int k; 4403 4404 /* Only support one double at a time. This is the same constraint as 4405 * in DMUL lowering. */ 4406 assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY || 4407 inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW); 4408 4409 k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1; 4410 4411 r = cayman_emit_unary_double_raw(ctx->bc, ALU_OP2_RECIP_64, t1, &ctx->src[1], false); 4412 if (r) 4413 return r; 4414 4415 for (int i = 0; i < 4; i++) { 4416 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4417 alu.op = ALU_OP2_MUL_64; 4418 4419 r600_bytecode_src(&alu.src[0], &ctx->src[0], k * 2 + ((i == 3) ? 0 : 1)); 4420 4421 alu.src[1].sel = t1; 4422 alu.src[1].chan = (i == 3) ? 0 : 1; 4423 4424 alu.dst.sel = t1; 4425 alu.dst.chan = i; 4426 alu.dst.write = 1; 4427 if (i == 3) 4428 alu.last = 1; 4429 r = r600_bytecode_add_alu(ctx->bc, &alu); 4430 if (r) 4431 return r; 4432 } 4433 4434 for (int i = 0; i < 2; i++) { 4435 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4436 alu.op = ALU_OP1_MOV; 4437 alu.src[0].sel = t1; 4438 alu.src[0].chan = i; 4439 tgsi_dst(ctx, &inst->Dst[0], k * 2 + i, &alu.dst); 4440 alu.dst.write = 1; 4441 if (i == 1) 4442 alu.last = 1; 4443 r = r600_bytecode_add_alu(ctx->bc, &alu); 4444 if (r) 4445 return r; 4446 } 4447 return 0; 4448 } 4449 4450 /* 4451 * r600 - trunc to -PI..PI range 4452 * r700 - normalize by dividing by 2PI 4453 * see fdo bug 27901 4454 */ 4455 static int tgsi_setup_trig(struct r600_shader_ctx *ctx) 4456 { 4457 int r; 4458 struct r600_bytecode_alu alu; 4459 4460 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4461 alu.op = ALU_OP3_MULADD; 4462 alu.is_op3 = 1; 4463 4464 alu.dst.chan = 0; 4465 alu.dst.sel = ctx->temp_reg; 4466 alu.dst.write = 1; 4467 4468 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4469 4470 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4471 alu.src[1].chan = 0; 4472 alu.src[1].value = u_bitcast_f2u(0.5f * M_1_PI); 4473 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 4474 alu.src[2].chan = 0; 4475 alu.last = 1; 4476 r = r600_bytecode_add_alu(ctx->bc, &alu); 4477 if (r) 4478 return r; 4479 4480 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4481 alu.op = ALU_OP1_FRACT; 4482 4483 alu.dst.chan = 0; 4484 alu.dst.sel = ctx->temp_reg; 4485 alu.dst.write = 1; 4486 4487 alu.src[0].sel = ctx->temp_reg; 4488 alu.src[0].chan = 0; 4489 alu.last = 1; 4490 r = r600_bytecode_add_alu(ctx->bc, &alu); 4491 if (r) 4492 return r; 4493 4494 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4495 alu.op = ALU_OP3_MULADD; 4496 alu.is_op3 = 1; 4497 4498 alu.dst.chan = 0; 4499 alu.dst.sel = ctx->temp_reg; 4500 alu.dst.write = 1; 4501 4502 alu.src[0].sel = ctx->temp_reg; 4503 alu.src[0].chan = 0; 4504 4505 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 4506 alu.src[1].chan = 0; 4507 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 4508 alu.src[2].chan = 0; 4509 4510 if (ctx->bc->chip_class == R600) { 4511 alu.src[1].value = u_bitcast_f2u(2.0f * M_PI); 4512 alu.src[2].value = u_bitcast_f2u(-M_PI); 4513 } else { 4514 alu.src[1].sel = V_SQ_ALU_SRC_1; 4515 alu.src[2].sel = V_SQ_ALU_SRC_0_5; 4516 alu.src[2].neg = 1; 4517 } 4518 4519 alu.last = 1; 4520 r = r600_bytecode_add_alu(ctx->bc, &alu); 4521 if (r) 4522 return r; 4523 return 0; 4524 } 4525 4526 static int cayman_trig(struct r600_shader_ctx *ctx) 4527 { 4528 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4529 struct r600_bytecode_alu alu; 4530 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 4531 int i, r; 4532 4533 r = tgsi_setup_trig(ctx); 4534 if (r) 4535 return r; 4536 4537 4538 for (i = 0; i < last_slot; i++) { 4539 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4540 alu.op = ctx->inst_info->op; 4541 alu.dst.chan = i; 4542 4543 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4544 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 4545 4546 alu.src[0].sel = ctx->temp_reg; 4547 alu.src[0].chan = 0; 4548 if (i == last_slot - 1) 4549 alu.last = 1; 4550 r = r600_bytecode_add_alu(ctx->bc, &alu); 4551 if (r) 4552 return r; 4553 } 4554 return 0; 4555 } 4556 4557 static int tgsi_trig(struct r600_shader_ctx *ctx) 4558 { 4559 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4560 struct r600_bytecode_alu alu; 4561 int i, r; 4562 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 4563 4564 r = tgsi_setup_trig(ctx); 4565 if (r) 4566 return r; 4567 4568 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4569 alu.op = ctx->inst_info->op; 4570 alu.dst.chan = 0; 4571 alu.dst.sel = ctx->temp_reg; 4572 alu.dst.write = 1; 4573 4574 alu.src[0].sel = ctx->temp_reg; 4575 alu.src[0].chan = 0; 4576 alu.last = 1; 4577 r = r600_bytecode_add_alu(ctx->bc, &alu); 4578 if (r) 4579 return r; 4580 4581 /* replicate result */ 4582 for (i = 0; i < lasti + 1; i++) { 4583 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 4584 continue; 4585 4586 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4587 alu.op = ALU_OP1_MOV; 4588 4589 alu.src[0].sel = ctx->temp_reg; 4590 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4591 if (i == lasti) 4592 alu.last = 1; 4593 r = r600_bytecode_add_alu(ctx->bc, &alu); 4594 if (r) 4595 return r; 4596 } 4597 return 0; 4598 } 4599 4600 static int tgsi_scs(struct r600_shader_ctx *ctx) 4601 { 4602 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4603 struct r600_bytecode_alu alu; 4604 int i, r; 4605 4606 /* We'll only need the trig stuff if we are going to write to the 4607 * X or Y components of the destination vector. 4608 */ 4609 if (likely(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY)) { 4610 r = tgsi_setup_trig(ctx); 4611 if (r) 4612 return r; 4613 } 4614 4615 /* dst.x = COS */ 4616 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 4617 if (ctx->bc->chip_class == CAYMAN) { 4618 for (i = 0 ; i < 3; i++) { 4619 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4620 alu.op = ALU_OP1_COS; 4621 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4622 4623 if (i == 0) 4624 alu.dst.write = 1; 4625 else 4626 alu.dst.write = 0; 4627 alu.src[0].sel = ctx->temp_reg; 4628 alu.src[0].chan = 0; 4629 if (i == 2) 4630 alu.last = 1; 4631 r = r600_bytecode_add_alu(ctx->bc, &alu); 4632 if (r) 4633 return r; 4634 } 4635 } else { 4636 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4637 alu.op = ALU_OP1_COS; 4638 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 4639 4640 alu.src[0].sel = ctx->temp_reg; 4641 alu.src[0].chan = 0; 4642 alu.last = 1; 4643 r = r600_bytecode_add_alu(ctx->bc, &alu); 4644 if (r) 4645 return r; 4646 } 4647 } 4648 4649 /* dst.y = SIN */ 4650 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 4651 if (ctx->bc->chip_class == CAYMAN) { 4652 for (i = 0 ; i < 3; i++) { 4653 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4654 alu.op = ALU_OP1_SIN; 4655 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4656 if (i == 1) 4657 alu.dst.write = 1; 4658 else 4659 alu.dst.write = 0; 4660 alu.src[0].sel = ctx->temp_reg; 4661 alu.src[0].chan = 0; 4662 if (i == 2) 4663 alu.last = 1; 4664 r = r600_bytecode_add_alu(ctx->bc, &alu); 4665 if (r) 4666 return r; 4667 } 4668 } else { 4669 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4670 alu.op = ALU_OP1_SIN; 4671 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 4672 4673 alu.src[0].sel = ctx->temp_reg; 4674 alu.src[0].chan = 0; 4675 alu.last = 1; 4676 r = r600_bytecode_add_alu(ctx->bc, &alu); 4677 if (r) 4678 return r; 4679 } 4680 } 4681 4682 /* dst.z = 0.0; */ 4683 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 4684 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4685 4686 alu.op = ALU_OP1_MOV; 4687 4688 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 4689 4690 alu.src[0].sel = V_SQ_ALU_SRC_0; 4691 alu.src[0].chan = 0; 4692 4693 alu.last = 1; 4694 4695 r = r600_bytecode_add_alu(ctx->bc, &alu); 4696 if (r) 4697 return r; 4698 } 4699 4700 /* dst.w = 1.0; */ 4701 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 4702 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4703 4704 alu.op = ALU_OP1_MOV; 4705 4706 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 4707 4708 alu.src[0].sel = V_SQ_ALU_SRC_1; 4709 alu.src[0].chan = 0; 4710 4711 alu.last = 1; 4712 4713 r = r600_bytecode_add_alu(ctx->bc, &alu); 4714 if (r) 4715 return r; 4716 } 4717 4718 return 0; 4719 } 4720 4721 static int tgsi_kill(struct r600_shader_ctx *ctx) 4722 { 4723 const struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4724 struct r600_bytecode_alu alu; 4725 int i, r; 4726 4727 for (i = 0; i < 4; i++) { 4728 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4729 alu.op = ctx->inst_info->op; 4730 4731 alu.dst.chan = i; 4732 4733 alu.src[0].sel = V_SQ_ALU_SRC_0; 4734 4735 if (inst->Instruction.Opcode == TGSI_OPCODE_KILL) { 4736 alu.src[1].sel = V_SQ_ALU_SRC_1; 4737 alu.src[1].neg = 1; 4738 } else { 4739 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 4740 } 4741 if (i == 3) { 4742 alu.last = 1; 4743 } 4744 r = r600_bytecode_add_alu(ctx->bc, &alu); 4745 if (r) 4746 return r; 4747 } 4748 4749 /* kill must be last in ALU */ 4750 ctx->bc->force_add_cf = 1; 4751 ctx->shader->uses_kill = TRUE; 4752 return 0; 4753 } 4754 4755 static int tgsi_lit(struct r600_shader_ctx *ctx) 4756 { 4757 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4758 struct r600_bytecode_alu alu; 4759 int r; 4760 4761 /* tmp.x = max(src.y, 0.0) */ 4762 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4763 alu.op = ALU_OP2_MAX; 4764 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 4765 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 4766 alu.src[1].chan = 1; 4767 4768 alu.dst.sel = ctx->temp_reg; 4769 alu.dst.chan = 0; 4770 alu.dst.write = 1; 4771 4772 alu.last = 1; 4773 r = r600_bytecode_add_alu(ctx->bc, &alu); 4774 if (r) 4775 return r; 4776 4777 if (inst->Dst[0].Register.WriteMask & (1 << 2)) 4778 { 4779 int chan; 4780 int sel; 4781 unsigned i; 4782 4783 if (ctx->bc->chip_class == CAYMAN) { 4784 for (i = 0; i < 3; i++) { 4785 /* tmp.z = log(tmp.x) */ 4786 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4787 alu.op = ALU_OP1_LOG_CLAMPED; 4788 alu.src[0].sel = ctx->temp_reg; 4789 alu.src[0].chan = 0; 4790 alu.dst.sel = ctx->temp_reg; 4791 alu.dst.chan = i; 4792 if (i == 2) { 4793 alu.dst.write = 1; 4794 alu.last = 1; 4795 } else 4796 alu.dst.write = 0; 4797 4798 r = r600_bytecode_add_alu(ctx->bc, &alu); 4799 if (r) 4800 return r; 4801 } 4802 } else { 4803 /* tmp.z = log(tmp.x) */ 4804 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4805 alu.op = ALU_OP1_LOG_CLAMPED; 4806 alu.src[0].sel = ctx->temp_reg; 4807 alu.src[0].chan = 0; 4808 alu.dst.sel = ctx->temp_reg; 4809 alu.dst.chan = 2; 4810 alu.dst.write = 1; 4811 alu.last = 1; 4812 r = r600_bytecode_add_alu(ctx->bc, &alu); 4813 if (r) 4814 return r; 4815 } 4816 4817 chan = alu.dst.chan; 4818 sel = alu.dst.sel; 4819 4820 /* tmp.x = amd MUL_LIT(tmp.z, src.w, src.x ) */ 4821 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4822 alu.op = ALU_OP3_MUL_LIT; 4823 alu.src[0].sel = sel; 4824 alu.src[0].chan = chan; 4825 r600_bytecode_src(&alu.src[1], &ctx->src[0], 3); 4826 r600_bytecode_src(&alu.src[2], &ctx->src[0], 0); 4827 alu.dst.sel = ctx->temp_reg; 4828 alu.dst.chan = 0; 4829 alu.dst.write = 1; 4830 alu.is_op3 = 1; 4831 alu.last = 1; 4832 r = r600_bytecode_add_alu(ctx->bc, &alu); 4833 if (r) 4834 return r; 4835 4836 if (ctx->bc->chip_class == CAYMAN) { 4837 for (i = 0; i < 3; i++) { 4838 /* dst.z = exp(tmp.x) */ 4839 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4840 alu.op = ALU_OP1_EXP_IEEE; 4841 alu.src[0].sel = ctx->temp_reg; 4842 alu.src[0].chan = 0; 4843 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4844 if (i == 2) { 4845 alu.dst.write = 1; 4846 alu.last = 1; 4847 } else 4848 alu.dst.write = 0; 4849 r = r600_bytecode_add_alu(ctx->bc, &alu); 4850 if (r) 4851 return r; 4852 } 4853 } else { 4854 /* dst.z = exp(tmp.x) */ 4855 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4856 alu.op = ALU_OP1_EXP_IEEE; 4857 alu.src[0].sel = ctx->temp_reg; 4858 alu.src[0].chan = 0; 4859 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 4860 alu.last = 1; 4861 r = r600_bytecode_add_alu(ctx->bc, &alu); 4862 if (r) 4863 return r; 4864 } 4865 } 4866 4867 /* dst.x, <- 1.0 */ 4868 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4869 alu.op = ALU_OP1_MOV; 4870 alu.src[0].sel = V_SQ_ALU_SRC_1; /*1.0*/ 4871 alu.src[0].chan = 0; 4872 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 4873 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 0) & 1; 4874 r = r600_bytecode_add_alu(ctx->bc, &alu); 4875 if (r) 4876 return r; 4877 4878 /* dst.y = max(src.x, 0.0) */ 4879 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4880 alu.op = ALU_OP2_MAX; 4881 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4882 alu.src[1].sel = V_SQ_ALU_SRC_0; /*0.0*/ 4883 alu.src[1].chan = 0; 4884 tgsi_dst(ctx, &inst->Dst[0], 1, &alu.dst); 4885 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 1) & 1; 4886 r = r600_bytecode_add_alu(ctx->bc, &alu); 4887 if (r) 4888 return r; 4889 4890 /* dst.w, <- 1.0 */ 4891 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4892 alu.op = ALU_OP1_MOV; 4893 alu.src[0].sel = V_SQ_ALU_SRC_1; 4894 alu.src[0].chan = 0; 4895 tgsi_dst(ctx, &inst->Dst[0], 3, &alu.dst); 4896 alu.dst.write = (inst->Dst[0].Register.WriteMask >> 3) & 1; 4897 alu.last = 1; 4898 r = r600_bytecode_add_alu(ctx->bc, &alu); 4899 if (r) 4900 return r; 4901 4902 return 0; 4903 } 4904 4905 static int tgsi_rsq(struct r600_shader_ctx *ctx) 4906 { 4907 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4908 struct r600_bytecode_alu alu; 4909 int i, r; 4910 4911 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4912 4913 /* XXX: 4914 * For state trackers other than OpenGL, we'll want to use 4915 * _RECIPSQRT_IEEE instead. 4916 */ 4917 alu.op = ALU_OP1_RECIPSQRT_CLAMPED; 4918 4919 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 4920 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 4921 r600_bytecode_src_set_abs(&alu.src[i]); 4922 } 4923 alu.dst.sel = ctx->temp_reg; 4924 alu.dst.write = 1; 4925 alu.last = 1; 4926 r = r600_bytecode_add_alu(ctx->bc, &alu); 4927 if (r) 4928 return r; 4929 /* replicate result */ 4930 return tgsi_helper_tempx_replicate(ctx); 4931 } 4932 4933 static int tgsi_helper_tempx_replicate(struct r600_shader_ctx *ctx) 4934 { 4935 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4936 struct r600_bytecode_alu alu; 4937 int i, r; 4938 4939 for (i = 0; i < 4; i++) { 4940 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4941 alu.src[0].sel = ctx->temp_reg; 4942 alu.op = ALU_OP1_MOV; 4943 alu.dst.chan = i; 4944 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 4945 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 4946 if (i == 3) 4947 alu.last = 1; 4948 r = r600_bytecode_add_alu(ctx->bc, &alu); 4949 if (r) 4950 return r; 4951 } 4952 return 0; 4953 } 4954 4955 static int tgsi_trans_srcx_replicate(struct r600_shader_ctx *ctx) 4956 { 4957 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4958 struct r600_bytecode_alu alu; 4959 int i, r; 4960 4961 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4962 alu.op = ctx->inst_info->op; 4963 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 4964 r600_bytecode_src(&alu.src[i], &ctx->src[i], 0); 4965 } 4966 alu.dst.sel = ctx->temp_reg; 4967 alu.dst.write = 1; 4968 alu.last = 1; 4969 r = r600_bytecode_add_alu(ctx->bc, &alu); 4970 if (r) 4971 return r; 4972 /* replicate result */ 4973 return tgsi_helper_tempx_replicate(ctx); 4974 } 4975 4976 static int cayman_pow(struct r600_shader_ctx *ctx) 4977 { 4978 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 4979 int i, r; 4980 struct r600_bytecode_alu alu; 4981 int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; 4982 4983 for (i = 0; i < 3; i++) { 4984 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4985 alu.op = ALU_OP1_LOG_IEEE; 4986 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 4987 alu.dst.sel = ctx->temp_reg; 4988 alu.dst.chan = i; 4989 alu.dst.write = 1; 4990 if (i == 2) 4991 alu.last = 1; 4992 r = r600_bytecode_add_alu(ctx->bc, &alu); 4993 if (r) 4994 return r; 4995 } 4996 4997 /* b * LOG2(a) */ 4998 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 4999 alu.op = ALU_OP2_MUL; 5000 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 5001 alu.src[1].sel = ctx->temp_reg; 5002 alu.dst.sel = ctx->temp_reg; 5003 alu.dst.write = 1; 5004 alu.last = 1; 5005 r = r600_bytecode_add_alu(ctx->bc, &alu); 5006 if (r) 5007 return r; 5008 5009 for (i = 0; i < last_slot; i++) { 5010 /* POW(a,b) = EXP2(b * LOG2(a))*/ 5011 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5012 alu.op = ALU_OP1_EXP_IEEE; 5013 alu.src[0].sel = ctx->temp_reg; 5014 5015 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5016 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 5017 if (i == last_slot - 1) 5018 alu.last = 1; 5019 r = r600_bytecode_add_alu(ctx->bc, &alu); 5020 if (r) 5021 return r; 5022 } 5023 return 0; 5024 } 5025 5026 static int tgsi_pow(struct r600_shader_ctx *ctx) 5027 { 5028 struct r600_bytecode_alu alu; 5029 int r; 5030 5031 /* LOG2(a) */ 5032 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5033 alu.op = ALU_OP1_LOG_IEEE; 5034 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 5035 alu.dst.sel = ctx->temp_reg; 5036 alu.dst.write = 1; 5037 alu.last = 1; 5038 r = r600_bytecode_add_alu(ctx->bc, &alu); 5039 if (r) 5040 return r; 5041 /* b * LOG2(a) */ 5042 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5043 alu.op = ALU_OP2_MUL; 5044 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 5045 alu.src[1].sel = ctx->temp_reg; 5046 alu.dst.sel = ctx->temp_reg; 5047 alu.dst.write = 1; 5048 alu.last = 1; 5049 r = r600_bytecode_add_alu(ctx->bc, &alu); 5050 if (r) 5051 return r; 5052 /* POW(a,b) = EXP2(b * LOG2(a))*/ 5053 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5054 alu.op = ALU_OP1_EXP_IEEE; 5055 alu.src[0].sel = ctx->temp_reg; 5056 alu.dst.sel = ctx->temp_reg; 5057 alu.dst.write = 1; 5058 alu.last = 1; 5059 r = r600_bytecode_add_alu(ctx->bc, &alu); 5060 if (r) 5061 return r; 5062 return tgsi_helper_tempx_replicate(ctx); 5063 } 5064 5065 static int tgsi_divmod(struct r600_shader_ctx *ctx, int mod, int signed_op) 5066 { 5067 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5068 struct r600_bytecode_alu alu; 5069 int i, r, j; 5070 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5071 int tmp0 = ctx->temp_reg; 5072 int tmp1 = r600_get_temp(ctx); 5073 int tmp2 = r600_get_temp(ctx); 5074 int tmp3 = r600_get_temp(ctx); 5075 /* Unsigned path: 5076 * 5077 * we need to represent src1 as src2*q + r, where q - quotient, r - remainder 5078 * 5079 * 1. tmp0.x = rcp (src2) = 2^32/src2 + e, where e is rounding error 5080 * 2. tmp0.z = lo (tmp0.x * src2) 5081 * 3. tmp0.w = -tmp0.z 5082 * 4. tmp0.y = hi (tmp0.x * src2) 5083 * 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src2)) 5084 * 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error 5085 * 7. tmp1.x = tmp0.x - tmp0.w 5086 * 8. tmp1.y = tmp0.x + tmp0.w 5087 * 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) 5088 * 10. tmp0.z = hi(tmp0.x * src1) = q 5089 * 11. tmp0.y = lo (tmp0.z * src2) = src2*q = src1 - r 5090 * 5091 * 12. tmp0.w = src1 - tmp0.y = r 5092 * 13. tmp1.x = tmp0.w >= src2 = r >= src2 (uint comparison) 5093 * 14. tmp1.y = src1 >= tmp0.y = r >= 0 (uint comparison) 5094 * 5095 * if DIV 5096 * 5097 * 15. tmp1.z = tmp0.z + 1 = q + 1 5098 * 16. tmp1.w = tmp0.z - 1 = q - 1 5099 * 5100 * else MOD 5101 * 5102 * 15. tmp1.z = tmp0.w - src2 = r - src2 5103 * 16. tmp1.w = tmp0.w + src2 = r + src2 5104 * 5105 * endif 5106 * 5107 * 17. tmp1.x = tmp1.x & tmp1.y 5108 * 5109 * DIV: 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z 5110 * MOD: 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z 5111 * 5112 * 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z 5113 * 20. dst = src2==0 ? MAX_UINT : tmp0.z 5114 * 5115 * Signed path: 5116 * 5117 * Same as unsigned, using abs values of the operands, 5118 * and fixing the sign of the result in the end. 5119 */ 5120 5121 for (i = 0; i < 4; i++) { 5122 if (!(write_mask & (1<<i))) 5123 continue; 5124 5125 if (signed_op) { 5126 5127 /* tmp2.x = -src0 */ 5128 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5129 alu.op = ALU_OP2_SUB_INT; 5130 5131 alu.dst.sel = tmp2; 5132 alu.dst.chan = 0; 5133 alu.dst.write = 1; 5134 5135 alu.src[0].sel = V_SQ_ALU_SRC_0; 5136 5137 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5138 5139 alu.last = 1; 5140 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5141 return r; 5142 5143 /* tmp2.y = -src1 */ 5144 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5145 alu.op = ALU_OP2_SUB_INT; 5146 5147 alu.dst.sel = tmp2; 5148 alu.dst.chan = 1; 5149 alu.dst.write = 1; 5150 5151 alu.src[0].sel = V_SQ_ALU_SRC_0; 5152 5153 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5154 5155 alu.last = 1; 5156 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5157 return r; 5158 5159 /* tmp2.z sign bit is set if src0 and src2 signs are different */ 5160 /* it will be a sign of the quotient */ 5161 if (!mod) { 5162 5163 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5164 alu.op = ALU_OP2_XOR_INT; 5165 5166 alu.dst.sel = tmp2; 5167 alu.dst.chan = 2; 5168 alu.dst.write = 1; 5169 5170 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5171 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5172 5173 alu.last = 1; 5174 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5175 return r; 5176 } 5177 5178 /* tmp2.x = |src0| */ 5179 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5180 alu.op = ALU_OP3_CNDGE_INT; 5181 alu.is_op3 = 1; 5182 5183 alu.dst.sel = tmp2; 5184 alu.dst.chan = 0; 5185 alu.dst.write = 1; 5186 5187 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5188 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5189 alu.src[2].sel = tmp2; 5190 alu.src[2].chan = 0; 5191 5192 alu.last = 1; 5193 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5194 return r; 5195 5196 /* tmp2.y = |src1| */ 5197 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5198 alu.op = ALU_OP3_CNDGE_INT; 5199 alu.is_op3 = 1; 5200 5201 alu.dst.sel = tmp2; 5202 alu.dst.chan = 1; 5203 alu.dst.write = 1; 5204 5205 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5206 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5207 alu.src[2].sel = tmp2; 5208 alu.src[2].chan = 1; 5209 5210 alu.last = 1; 5211 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5212 return r; 5213 5214 } 5215 5216 /* 1. tmp0.x = rcp_u (src2) = 2^32/src2 + e, where e is rounding error */ 5217 if (ctx->bc->chip_class == CAYMAN) { 5218 /* tmp3.x = u2f(src2) */ 5219 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5220 alu.op = ALU_OP1_UINT_TO_FLT; 5221 5222 alu.dst.sel = tmp3; 5223 alu.dst.chan = 0; 5224 alu.dst.write = 1; 5225 5226 if (signed_op) { 5227 alu.src[0].sel = tmp2; 5228 alu.src[0].chan = 1; 5229 } else { 5230 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5231 } 5232 5233 alu.last = 1; 5234 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5235 return r; 5236 5237 /* tmp0.x = recip(tmp3.x) */ 5238 for (j = 0 ; j < 3; j++) { 5239 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5240 alu.op = ALU_OP1_RECIP_IEEE; 5241 5242 alu.dst.sel = tmp0; 5243 alu.dst.chan = j; 5244 alu.dst.write = (j == 0); 5245 5246 alu.src[0].sel = tmp3; 5247 alu.src[0].chan = 0; 5248 5249 if (j == 2) 5250 alu.last = 1; 5251 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5252 return r; 5253 } 5254 5255 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5256 alu.op = ALU_OP2_MUL; 5257 5258 alu.src[0].sel = tmp0; 5259 alu.src[0].chan = 0; 5260 5261 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 5262 alu.src[1].value = 0x4f800000; 5263 5264 alu.dst.sel = tmp3; 5265 alu.dst.write = 1; 5266 alu.last = 1; 5267 r = r600_bytecode_add_alu(ctx->bc, &alu); 5268 if (r) 5269 return r; 5270 5271 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5272 alu.op = ALU_OP1_FLT_TO_UINT; 5273 5274 alu.dst.sel = tmp0; 5275 alu.dst.chan = 0; 5276 alu.dst.write = 1; 5277 5278 alu.src[0].sel = tmp3; 5279 alu.src[0].chan = 0; 5280 5281 alu.last = 1; 5282 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5283 return r; 5284 5285 } else { 5286 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5287 alu.op = ALU_OP1_RECIP_UINT; 5288 5289 alu.dst.sel = tmp0; 5290 alu.dst.chan = 0; 5291 alu.dst.write = 1; 5292 5293 if (signed_op) { 5294 alu.src[0].sel = tmp2; 5295 alu.src[0].chan = 1; 5296 } else { 5297 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5298 } 5299 5300 alu.last = 1; 5301 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5302 return r; 5303 } 5304 5305 /* 2. tmp0.z = lo (tmp0.x * src2) */ 5306 if (ctx->bc->chip_class == CAYMAN) { 5307 for (j = 0 ; j < 4; j++) { 5308 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5309 alu.op = ALU_OP2_MULLO_UINT; 5310 5311 alu.dst.sel = tmp0; 5312 alu.dst.chan = j; 5313 alu.dst.write = (j == 2); 5314 5315 alu.src[0].sel = tmp0; 5316 alu.src[0].chan = 0; 5317 if (signed_op) { 5318 alu.src[1].sel = tmp2; 5319 alu.src[1].chan = 1; 5320 } else { 5321 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5322 } 5323 5324 alu.last = (j == 3); 5325 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5326 return r; 5327 } 5328 } else { 5329 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5330 alu.op = ALU_OP2_MULLO_UINT; 5331 5332 alu.dst.sel = tmp0; 5333 alu.dst.chan = 2; 5334 alu.dst.write = 1; 5335 5336 alu.src[0].sel = tmp0; 5337 alu.src[0].chan = 0; 5338 if (signed_op) { 5339 alu.src[1].sel = tmp2; 5340 alu.src[1].chan = 1; 5341 } else { 5342 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5343 } 5344 5345 alu.last = 1; 5346 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5347 return r; 5348 } 5349 5350 /* 3. tmp0.w = -tmp0.z */ 5351 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5352 alu.op = ALU_OP2_SUB_INT; 5353 5354 alu.dst.sel = tmp0; 5355 alu.dst.chan = 3; 5356 alu.dst.write = 1; 5357 5358 alu.src[0].sel = V_SQ_ALU_SRC_0; 5359 alu.src[1].sel = tmp0; 5360 alu.src[1].chan = 2; 5361 5362 alu.last = 1; 5363 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5364 return r; 5365 5366 /* 4. tmp0.y = hi (tmp0.x * src2) */ 5367 if (ctx->bc->chip_class == CAYMAN) { 5368 for (j = 0 ; j < 4; j++) { 5369 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5370 alu.op = ALU_OP2_MULHI_UINT; 5371 5372 alu.dst.sel = tmp0; 5373 alu.dst.chan = j; 5374 alu.dst.write = (j == 1); 5375 5376 alu.src[0].sel = tmp0; 5377 alu.src[0].chan = 0; 5378 5379 if (signed_op) { 5380 alu.src[1].sel = tmp2; 5381 alu.src[1].chan = 1; 5382 } else { 5383 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5384 } 5385 alu.last = (j == 3); 5386 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5387 return r; 5388 } 5389 } else { 5390 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5391 alu.op = ALU_OP2_MULHI_UINT; 5392 5393 alu.dst.sel = tmp0; 5394 alu.dst.chan = 1; 5395 alu.dst.write = 1; 5396 5397 alu.src[0].sel = tmp0; 5398 alu.src[0].chan = 0; 5399 5400 if (signed_op) { 5401 alu.src[1].sel = tmp2; 5402 alu.src[1].chan = 1; 5403 } else { 5404 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5405 } 5406 5407 alu.last = 1; 5408 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5409 return r; 5410 } 5411 5412 /* 5. tmp0.z = (tmp0.y == 0 ? tmp0.w : tmp0.z) = abs(lo(rcp*src)) */ 5413 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5414 alu.op = ALU_OP3_CNDE_INT; 5415 alu.is_op3 = 1; 5416 5417 alu.dst.sel = tmp0; 5418 alu.dst.chan = 2; 5419 alu.dst.write = 1; 5420 5421 alu.src[0].sel = tmp0; 5422 alu.src[0].chan = 1; 5423 alu.src[1].sel = tmp0; 5424 alu.src[1].chan = 3; 5425 alu.src[2].sel = tmp0; 5426 alu.src[2].chan = 2; 5427 5428 alu.last = 1; 5429 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5430 return r; 5431 5432 /* 6. tmp0.w = hi (tmp0.z * tmp0.x) = e, rounding error */ 5433 if (ctx->bc->chip_class == CAYMAN) { 5434 for (j = 0 ; j < 4; j++) { 5435 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5436 alu.op = ALU_OP2_MULHI_UINT; 5437 5438 alu.dst.sel = tmp0; 5439 alu.dst.chan = j; 5440 alu.dst.write = (j == 3); 5441 5442 alu.src[0].sel = tmp0; 5443 alu.src[0].chan = 2; 5444 5445 alu.src[1].sel = tmp0; 5446 alu.src[1].chan = 0; 5447 5448 alu.last = (j == 3); 5449 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5450 return r; 5451 } 5452 } else { 5453 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5454 alu.op = ALU_OP2_MULHI_UINT; 5455 5456 alu.dst.sel = tmp0; 5457 alu.dst.chan = 3; 5458 alu.dst.write = 1; 5459 5460 alu.src[0].sel = tmp0; 5461 alu.src[0].chan = 2; 5462 5463 alu.src[1].sel = tmp0; 5464 alu.src[1].chan = 0; 5465 5466 alu.last = 1; 5467 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5468 return r; 5469 } 5470 5471 /* 7. tmp1.x = tmp0.x - tmp0.w */ 5472 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5473 alu.op = ALU_OP2_SUB_INT; 5474 5475 alu.dst.sel = tmp1; 5476 alu.dst.chan = 0; 5477 alu.dst.write = 1; 5478 5479 alu.src[0].sel = tmp0; 5480 alu.src[0].chan = 0; 5481 alu.src[1].sel = tmp0; 5482 alu.src[1].chan = 3; 5483 5484 alu.last = 1; 5485 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5486 return r; 5487 5488 /* 8. tmp1.y = tmp0.x + tmp0.w */ 5489 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5490 alu.op = ALU_OP2_ADD_INT; 5491 5492 alu.dst.sel = tmp1; 5493 alu.dst.chan = 1; 5494 alu.dst.write = 1; 5495 5496 alu.src[0].sel = tmp0; 5497 alu.src[0].chan = 0; 5498 alu.src[1].sel = tmp0; 5499 alu.src[1].chan = 3; 5500 5501 alu.last = 1; 5502 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5503 return r; 5504 5505 /* 9. tmp0.x = (tmp0.y == 0 ? tmp1.y : tmp1.x) */ 5506 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5507 alu.op = ALU_OP3_CNDE_INT; 5508 alu.is_op3 = 1; 5509 5510 alu.dst.sel = tmp0; 5511 alu.dst.chan = 0; 5512 alu.dst.write = 1; 5513 5514 alu.src[0].sel = tmp0; 5515 alu.src[0].chan = 1; 5516 alu.src[1].sel = tmp1; 5517 alu.src[1].chan = 1; 5518 alu.src[2].sel = tmp1; 5519 alu.src[2].chan = 0; 5520 5521 alu.last = 1; 5522 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5523 return r; 5524 5525 /* 10. tmp0.z = hi(tmp0.x * src1) = q */ 5526 if (ctx->bc->chip_class == CAYMAN) { 5527 for (j = 0 ; j < 4; j++) { 5528 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5529 alu.op = ALU_OP2_MULHI_UINT; 5530 5531 alu.dst.sel = tmp0; 5532 alu.dst.chan = j; 5533 alu.dst.write = (j == 2); 5534 5535 alu.src[0].sel = tmp0; 5536 alu.src[0].chan = 0; 5537 5538 if (signed_op) { 5539 alu.src[1].sel = tmp2; 5540 alu.src[1].chan = 0; 5541 } else { 5542 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5543 } 5544 5545 alu.last = (j == 3); 5546 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5547 return r; 5548 } 5549 } else { 5550 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5551 alu.op = ALU_OP2_MULHI_UINT; 5552 5553 alu.dst.sel = tmp0; 5554 alu.dst.chan = 2; 5555 alu.dst.write = 1; 5556 5557 alu.src[0].sel = tmp0; 5558 alu.src[0].chan = 0; 5559 5560 if (signed_op) { 5561 alu.src[1].sel = tmp2; 5562 alu.src[1].chan = 0; 5563 } else { 5564 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 5565 } 5566 5567 alu.last = 1; 5568 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5569 return r; 5570 } 5571 5572 /* 11. tmp0.y = lo (src2 * tmp0.z) = src2*q = src1 - r */ 5573 if (ctx->bc->chip_class == CAYMAN) { 5574 for (j = 0 ; j < 4; j++) { 5575 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5576 alu.op = ALU_OP2_MULLO_UINT; 5577 5578 alu.dst.sel = tmp0; 5579 alu.dst.chan = j; 5580 alu.dst.write = (j == 1); 5581 5582 if (signed_op) { 5583 alu.src[0].sel = tmp2; 5584 alu.src[0].chan = 1; 5585 } else { 5586 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5587 } 5588 5589 alu.src[1].sel = tmp0; 5590 alu.src[1].chan = 2; 5591 5592 alu.last = (j == 3); 5593 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5594 return r; 5595 } 5596 } else { 5597 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5598 alu.op = ALU_OP2_MULLO_UINT; 5599 5600 alu.dst.sel = tmp0; 5601 alu.dst.chan = 1; 5602 alu.dst.write = 1; 5603 5604 if (signed_op) { 5605 alu.src[0].sel = tmp2; 5606 alu.src[0].chan = 1; 5607 } else { 5608 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 5609 } 5610 5611 alu.src[1].sel = tmp0; 5612 alu.src[1].chan = 2; 5613 5614 alu.last = 1; 5615 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5616 return r; 5617 } 5618 5619 /* 12. tmp0.w = src1 - tmp0.y = r */ 5620 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5621 alu.op = ALU_OP2_SUB_INT; 5622 5623 alu.dst.sel = tmp0; 5624 alu.dst.chan = 3; 5625 alu.dst.write = 1; 5626 5627 if (signed_op) { 5628 alu.src[0].sel = tmp2; 5629 alu.src[0].chan = 0; 5630 } else { 5631 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5632 } 5633 5634 alu.src[1].sel = tmp0; 5635 alu.src[1].chan = 1; 5636 5637 alu.last = 1; 5638 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5639 return r; 5640 5641 /* 13. tmp1.x = tmp0.w >= src2 = r >= src2 */ 5642 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5643 alu.op = ALU_OP2_SETGE_UINT; 5644 5645 alu.dst.sel = tmp1; 5646 alu.dst.chan = 0; 5647 alu.dst.write = 1; 5648 5649 alu.src[0].sel = tmp0; 5650 alu.src[0].chan = 3; 5651 if (signed_op) { 5652 alu.src[1].sel = tmp2; 5653 alu.src[1].chan = 1; 5654 } else { 5655 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5656 } 5657 5658 alu.last = 1; 5659 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5660 return r; 5661 5662 /* 14. tmp1.y = src1 >= tmp0.y = r >= 0 */ 5663 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5664 alu.op = ALU_OP2_SETGE_UINT; 5665 5666 alu.dst.sel = tmp1; 5667 alu.dst.chan = 1; 5668 alu.dst.write = 1; 5669 5670 if (signed_op) { 5671 alu.src[0].sel = tmp2; 5672 alu.src[0].chan = 0; 5673 } else { 5674 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5675 } 5676 5677 alu.src[1].sel = tmp0; 5678 alu.src[1].chan = 1; 5679 5680 alu.last = 1; 5681 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5682 return r; 5683 5684 if (mod) { /* UMOD */ 5685 5686 /* 15. tmp1.z = tmp0.w - src2 = r - src2 */ 5687 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5688 alu.op = ALU_OP2_SUB_INT; 5689 5690 alu.dst.sel = tmp1; 5691 alu.dst.chan = 2; 5692 alu.dst.write = 1; 5693 5694 alu.src[0].sel = tmp0; 5695 alu.src[0].chan = 3; 5696 5697 if (signed_op) { 5698 alu.src[1].sel = tmp2; 5699 alu.src[1].chan = 1; 5700 } else { 5701 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5702 } 5703 5704 alu.last = 1; 5705 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5706 return r; 5707 5708 /* 16. tmp1.w = tmp0.w + src2 = r + src2 */ 5709 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5710 alu.op = ALU_OP2_ADD_INT; 5711 5712 alu.dst.sel = tmp1; 5713 alu.dst.chan = 3; 5714 alu.dst.write = 1; 5715 5716 alu.src[0].sel = tmp0; 5717 alu.src[0].chan = 3; 5718 if (signed_op) { 5719 alu.src[1].sel = tmp2; 5720 alu.src[1].chan = 1; 5721 } else { 5722 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 5723 } 5724 5725 alu.last = 1; 5726 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5727 return r; 5728 5729 } else { /* UDIV */ 5730 5731 /* 15. tmp1.z = tmp0.z + 1 = q + 1 DIV */ 5732 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5733 alu.op = ALU_OP2_ADD_INT; 5734 5735 alu.dst.sel = tmp1; 5736 alu.dst.chan = 2; 5737 alu.dst.write = 1; 5738 5739 alu.src[0].sel = tmp0; 5740 alu.src[0].chan = 2; 5741 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 5742 5743 alu.last = 1; 5744 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5745 return r; 5746 5747 /* 16. tmp1.w = tmp0.z - 1 = q - 1 */ 5748 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5749 alu.op = ALU_OP2_ADD_INT; 5750 5751 alu.dst.sel = tmp1; 5752 alu.dst.chan = 3; 5753 alu.dst.write = 1; 5754 5755 alu.src[0].sel = tmp0; 5756 alu.src[0].chan = 2; 5757 alu.src[1].sel = V_SQ_ALU_SRC_M_1_INT; 5758 5759 alu.last = 1; 5760 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5761 return r; 5762 5763 } 5764 5765 /* 17. tmp1.x = tmp1.x & tmp1.y */ 5766 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5767 alu.op = ALU_OP2_AND_INT; 5768 5769 alu.dst.sel = tmp1; 5770 alu.dst.chan = 0; 5771 alu.dst.write = 1; 5772 5773 alu.src[0].sel = tmp1; 5774 alu.src[0].chan = 0; 5775 alu.src[1].sel = tmp1; 5776 alu.src[1].chan = 1; 5777 5778 alu.last = 1; 5779 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5780 return r; 5781 5782 /* 18. tmp0.z = tmp1.x==0 ? tmp0.z : tmp1.z DIV */ 5783 /* 18. tmp0.z = tmp1.x==0 ? tmp0.w : tmp1.z MOD */ 5784 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5785 alu.op = ALU_OP3_CNDE_INT; 5786 alu.is_op3 = 1; 5787 5788 alu.dst.sel = tmp0; 5789 alu.dst.chan = 2; 5790 alu.dst.write = 1; 5791 5792 alu.src[0].sel = tmp1; 5793 alu.src[0].chan = 0; 5794 alu.src[1].sel = tmp0; 5795 alu.src[1].chan = mod ? 3 : 2; 5796 alu.src[2].sel = tmp1; 5797 alu.src[2].chan = 2; 5798 5799 alu.last = 1; 5800 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5801 return r; 5802 5803 /* 19. tmp0.z = tmp1.y==0 ? tmp1.w : tmp0.z */ 5804 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5805 alu.op = ALU_OP3_CNDE_INT; 5806 alu.is_op3 = 1; 5807 5808 if (signed_op) { 5809 alu.dst.sel = tmp0; 5810 alu.dst.chan = 2; 5811 alu.dst.write = 1; 5812 } else { 5813 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5814 } 5815 5816 alu.src[0].sel = tmp1; 5817 alu.src[0].chan = 1; 5818 alu.src[1].sel = tmp1; 5819 alu.src[1].chan = 3; 5820 alu.src[2].sel = tmp0; 5821 alu.src[2].chan = 2; 5822 5823 alu.last = 1; 5824 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5825 return r; 5826 5827 if (signed_op) { 5828 5829 /* fix the sign of the result */ 5830 5831 if (mod) { 5832 5833 /* tmp0.x = -tmp0.z */ 5834 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5835 alu.op = ALU_OP2_SUB_INT; 5836 5837 alu.dst.sel = tmp0; 5838 alu.dst.chan = 0; 5839 alu.dst.write = 1; 5840 5841 alu.src[0].sel = V_SQ_ALU_SRC_0; 5842 alu.src[1].sel = tmp0; 5843 alu.src[1].chan = 2; 5844 5845 alu.last = 1; 5846 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5847 return r; 5848 5849 /* sign of the remainder is the same as the sign of src0 */ 5850 /* tmp0.x = src0>=0 ? tmp0.z : tmp0.x */ 5851 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5852 alu.op = ALU_OP3_CNDGE_INT; 5853 alu.is_op3 = 1; 5854 5855 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5856 5857 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5858 alu.src[1].sel = tmp0; 5859 alu.src[1].chan = 2; 5860 alu.src[2].sel = tmp0; 5861 alu.src[2].chan = 0; 5862 5863 alu.last = 1; 5864 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5865 return r; 5866 5867 } else { 5868 5869 /* tmp0.x = -tmp0.z */ 5870 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5871 alu.op = ALU_OP2_SUB_INT; 5872 5873 alu.dst.sel = tmp0; 5874 alu.dst.chan = 0; 5875 alu.dst.write = 1; 5876 5877 alu.src[0].sel = V_SQ_ALU_SRC_0; 5878 alu.src[1].sel = tmp0; 5879 alu.src[1].chan = 2; 5880 5881 alu.last = 1; 5882 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5883 return r; 5884 5885 /* fix the quotient sign (same as the sign of src0*src1) */ 5886 /* tmp0.x = tmp2.z>=0 ? tmp0.z : tmp0.x */ 5887 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5888 alu.op = ALU_OP3_CNDGE_INT; 5889 alu.is_op3 = 1; 5890 5891 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5892 5893 alu.src[0].sel = tmp2; 5894 alu.src[0].chan = 2; 5895 alu.src[1].sel = tmp0; 5896 alu.src[1].chan = 2; 5897 alu.src[2].sel = tmp0; 5898 alu.src[2].chan = 0; 5899 5900 alu.last = 1; 5901 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 5902 return r; 5903 } 5904 } 5905 } 5906 return 0; 5907 } 5908 5909 static int tgsi_udiv(struct r600_shader_ctx *ctx) 5910 { 5911 return tgsi_divmod(ctx, 0, 0); 5912 } 5913 5914 static int tgsi_umod(struct r600_shader_ctx *ctx) 5915 { 5916 return tgsi_divmod(ctx, 1, 0); 5917 } 5918 5919 static int tgsi_idiv(struct r600_shader_ctx *ctx) 5920 { 5921 return tgsi_divmod(ctx, 0, 1); 5922 } 5923 5924 static int tgsi_imod(struct r600_shader_ctx *ctx) 5925 { 5926 return tgsi_divmod(ctx, 1, 1); 5927 } 5928 5929 5930 static int tgsi_f2i(struct r600_shader_ctx *ctx) 5931 { 5932 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5933 struct r600_bytecode_alu alu; 5934 int i, r; 5935 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5936 int last_inst = tgsi_last_instruction(write_mask); 5937 5938 for (i = 0; i < 4; i++) { 5939 if (!(write_mask & (1<<i))) 5940 continue; 5941 5942 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5943 alu.op = ALU_OP1_TRUNC; 5944 5945 alu.dst.sel = ctx->temp_reg; 5946 alu.dst.chan = i; 5947 alu.dst.write = 1; 5948 5949 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 5950 if (i == last_inst) 5951 alu.last = 1; 5952 r = r600_bytecode_add_alu(ctx->bc, &alu); 5953 if (r) 5954 return r; 5955 } 5956 5957 for (i = 0; i < 4; i++) { 5958 if (!(write_mask & (1<<i))) 5959 continue; 5960 5961 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5962 alu.op = ctx->inst_info->op; 5963 5964 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 5965 5966 alu.src[0].sel = ctx->temp_reg; 5967 alu.src[0].chan = i; 5968 5969 if (i == last_inst || alu.op == ALU_OP1_FLT_TO_UINT) 5970 alu.last = 1; 5971 r = r600_bytecode_add_alu(ctx->bc, &alu); 5972 if (r) 5973 return r; 5974 } 5975 5976 return 0; 5977 } 5978 5979 static int tgsi_iabs(struct r600_shader_ctx *ctx) 5980 { 5981 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 5982 struct r600_bytecode_alu alu; 5983 int i, r; 5984 unsigned write_mask = inst->Dst[0].Register.WriteMask; 5985 int last_inst = tgsi_last_instruction(write_mask); 5986 5987 /* tmp = -src */ 5988 for (i = 0; i < 4; i++) { 5989 if (!(write_mask & (1<<i))) 5990 continue; 5991 5992 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 5993 alu.op = ALU_OP2_SUB_INT; 5994 5995 alu.dst.sel = ctx->temp_reg; 5996 alu.dst.chan = i; 5997 alu.dst.write = 1; 5998 5999 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 6000 alu.src[0].sel = V_SQ_ALU_SRC_0; 6001 6002 if (i == last_inst) 6003 alu.last = 1; 6004 r = r600_bytecode_add_alu(ctx->bc, &alu); 6005 if (r) 6006 return r; 6007 } 6008 6009 /* dst = (src >= 0 ? src : tmp) */ 6010 for (i = 0; i < 4; i++) { 6011 if (!(write_mask & (1<<i))) 6012 continue; 6013 6014 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6015 alu.op = ALU_OP3_CNDGE_INT; 6016 alu.is_op3 = 1; 6017 alu.dst.write = 1; 6018 6019 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6020 6021 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6022 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 6023 alu.src[2].sel = ctx->temp_reg; 6024 alu.src[2].chan = i; 6025 6026 if (i == last_inst) 6027 alu.last = 1; 6028 r = r600_bytecode_add_alu(ctx->bc, &alu); 6029 if (r) 6030 return r; 6031 } 6032 return 0; 6033 } 6034 6035 static int tgsi_issg(struct r600_shader_ctx *ctx) 6036 { 6037 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6038 struct r600_bytecode_alu alu; 6039 int i, r; 6040 unsigned write_mask = inst->Dst[0].Register.WriteMask; 6041 int last_inst = tgsi_last_instruction(write_mask); 6042 6043 /* tmp = (src >= 0 ? src : -1) */ 6044 for (i = 0; i < 4; i++) { 6045 if (!(write_mask & (1<<i))) 6046 continue; 6047 6048 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6049 alu.op = ALU_OP3_CNDGE_INT; 6050 alu.is_op3 = 1; 6051 6052 alu.dst.sel = ctx->temp_reg; 6053 alu.dst.chan = i; 6054 alu.dst.write = 1; 6055 6056 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6057 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 6058 alu.src[2].sel = V_SQ_ALU_SRC_M_1_INT; 6059 6060 if (i == last_inst) 6061 alu.last = 1; 6062 r = r600_bytecode_add_alu(ctx->bc, &alu); 6063 if (r) 6064 return r; 6065 } 6066 6067 /* dst = (tmp > 0 ? 1 : tmp) */ 6068 for (i = 0; i < 4; i++) { 6069 if (!(write_mask & (1<<i))) 6070 continue; 6071 6072 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6073 alu.op = ALU_OP3_CNDGT_INT; 6074 alu.is_op3 = 1; 6075 alu.dst.write = 1; 6076 6077 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6078 6079 alu.src[0].sel = ctx->temp_reg; 6080 alu.src[0].chan = i; 6081 6082 alu.src[1].sel = V_SQ_ALU_SRC_1_INT; 6083 6084 alu.src[2].sel = ctx->temp_reg; 6085 alu.src[2].chan = i; 6086 6087 if (i == last_inst) 6088 alu.last = 1; 6089 r = r600_bytecode_add_alu(ctx->bc, &alu); 6090 if (r) 6091 return r; 6092 } 6093 return 0; 6094 } 6095 6096 6097 6098 static int tgsi_ssg(struct r600_shader_ctx *ctx) 6099 { 6100 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6101 struct r600_bytecode_alu alu; 6102 int i, r; 6103 6104 /* tmp = (src > 0 ? 1 : src) */ 6105 for (i = 0; i < 4; i++) { 6106 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6107 alu.op = ALU_OP3_CNDGT; 6108 alu.is_op3 = 1; 6109 6110 alu.dst.sel = ctx->temp_reg; 6111 alu.dst.chan = i; 6112 6113 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6114 alu.src[1].sel = V_SQ_ALU_SRC_1; 6115 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 6116 6117 if (i == 3) 6118 alu.last = 1; 6119 r = r600_bytecode_add_alu(ctx->bc, &alu); 6120 if (r) 6121 return r; 6122 } 6123 6124 /* dst = (-tmp > 0 ? -1 : tmp) */ 6125 for (i = 0; i < 4; i++) { 6126 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6127 alu.op = ALU_OP3_CNDGT; 6128 alu.is_op3 = 1; 6129 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6130 6131 alu.src[0].sel = ctx->temp_reg; 6132 alu.src[0].chan = i; 6133 alu.src[0].neg = 1; 6134 6135 alu.src[1].sel = V_SQ_ALU_SRC_1; 6136 alu.src[1].neg = 1; 6137 6138 alu.src[2].sel = ctx->temp_reg; 6139 alu.src[2].chan = i; 6140 6141 if (i == 3) 6142 alu.last = 1; 6143 r = r600_bytecode_add_alu(ctx->bc, &alu); 6144 if (r) 6145 return r; 6146 } 6147 return 0; 6148 } 6149 6150 static int tgsi_bfi(struct r600_shader_ctx *ctx) 6151 { 6152 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6153 struct r600_bytecode_alu alu; 6154 int i, r, t1, t2; 6155 6156 unsigned write_mask = inst->Dst[0].Register.WriteMask; 6157 int last_inst = tgsi_last_instruction(write_mask); 6158 6159 t1 = ctx->temp_reg; 6160 6161 for (i = 0; i < 4; i++) { 6162 if (!(write_mask & (1<<i))) 6163 continue; 6164 6165 /* create mask tmp */ 6166 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6167 alu.op = ALU_OP2_BFM_INT; 6168 alu.dst.sel = t1; 6169 alu.dst.chan = i; 6170 alu.dst.write = 1; 6171 alu.last = i == last_inst; 6172 6173 r600_bytecode_src(&alu.src[0], &ctx->src[3], i); 6174 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6175 6176 r = r600_bytecode_add_alu(ctx->bc, &alu); 6177 if (r) 6178 return r; 6179 } 6180 6181 t2 = r600_get_temp(ctx); 6182 6183 for (i = 0; i < 4; i++) { 6184 if (!(write_mask & (1<<i))) 6185 continue; 6186 6187 /* shift insert left */ 6188 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6189 alu.op = ALU_OP2_LSHL_INT; 6190 alu.dst.sel = t2; 6191 alu.dst.chan = i; 6192 alu.dst.write = 1; 6193 alu.last = i == last_inst; 6194 6195 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 6196 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 6197 6198 r = r600_bytecode_add_alu(ctx->bc, &alu); 6199 if (r) 6200 return r; 6201 } 6202 6203 for (i = 0; i < 4; i++) { 6204 if (!(write_mask & (1<<i))) 6205 continue; 6206 6207 /* actual bitfield insert */ 6208 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6209 alu.op = ALU_OP3_BFI_INT; 6210 alu.is_op3 = 1; 6211 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6212 alu.dst.chan = i; 6213 alu.dst.write = 1; 6214 alu.last = i == last_inst; 6215 6216 alu.src[0].sel = t1; 6217 alu.src[0].chan = i; 6218 alu.src[1].sel = t2; 6219 alu.src[1].chan = i; 6220 r600_bytecode_src(&alu.src[2], &ctx->src[0], i); 6221 6222 r = r600_bytecode_add_alu(ctx->bc, &alu); 6223 if (r) 6224 return r; 6225 } 6226 6227 return 0; 6228 } 6229 6230 static int tgsi_msb(struct r600_shader_ctx *ctx) 6231 { 6232 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6233 struct r600_bytecode_alu alu; 6234 int i, r, t1, t2; 6235 6236 unsigned write_mask = inst->Dst[0].Register.WriteMask; 6237 int last_inst = tgsi_last_instruction(write_mask); 6238 6239 assert(ctx->inst_info->op == ALU_OP1_FFBH_INT || 6240 ctx->inst_info->op == ALU_OP1_FFBH_UINT); 6241 6242 t1 = ctx->temp_reg; 6243 6244 /* bit position is indexed from lsb by TGSI, and from msb by the hardware */ 6245 for (i = 0; i < 4; i++) { 6246 if (!(write_mask & (1<<i))) 6247 continue; 6248 6249 /* t1 = FFBH_INT / FFBH_UINT */ 6250 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6251 alu.op = ctx->inst_info->op; 6252 alu.dst.sel = t1; 6253 alu.dst.chan = i; 6254 alu.dst.write = 1; 6255 alu.last = i == last_inst; 6256 6257 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6258 6259 r = r600_bytecode_add_alu(ctx->bc, &alu); 6260 if (r) 6261 return r; 6262 } 6263 6264 t2 = r600_get_temp(ctx); 6265 6266 for (i = 0; i < 4; i++) { 6267 if (!(write_mask & (1<<i))) 6268 continue; 6269 6270 /* t2 = 31 - t1 */ 6271 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6272 alu.op = ALU_OP2_SUB_INT; 6273 alu.dst.sel = t2; 6274 alu.dst.chan = i; 6275 alu.dst.write = 1; 6276 alu.last = i == last_inst; 6277 6278 alu.src[0].sel = V_SQ_ALU_SRC_LITERAL; 6279 alu.src[0].value = 31; 6280 alu.src[1].sel = t1; 6281 alu.src[1].chan = i; 6282 6283 r = r600_bytecode_add_alu(ctx->bc, &alu); 6284 if (r) 6285 return r; 6286 } 6287 6288 for (i = 0; i < 4; i++) { 6289 if (!(write_mask & (1<<i))) 6290 continue; 6291 6292 /* result = t1 >= 0 ? t2 : t1 */ 6293 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6294 alu.op = ALU_OP3_CNDGE_INT; 6295 alu.is_op3 = 1; 6296 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6297 alu.dst.chan = i; 6298 alu.dst.write = 1; 6299 alu.last = i == last_inst; 6300 6301 alu.src[0].sel = t1; 6302 alu.src[0].chan = i; 6303 alu.src[1].sel = t2; 6304 alu.src[1].chan = i; 6305 alu.src[2].sel = t1; 6306 alu.src[2].chan = i; 6307 6308 r = r600_bytecode_add_alu(ctx->bc, &alu); 6309 if (r) 6310 return r; 6311 } 6312 6313 return 0; 6314 } 6315 6316 static int tgsi_interp_egcm(struct r600_shader_ctx *ctx) 6317 { 6318 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6319 struct r600_bytecode_alu alu; 6320 int r, i = 0, k, interp_gpr, interp_base_chan, tmp, lasti; 6321 unsigned location; 6322 int input; 6323 6324 assert(inst->Src[0].Register.File == TGSI_FILE_INPUT); 6325 6326 input = inst->Src[0].Register.Index; 6327 6328 /* Interpolators have been marked for use already by allocate_system_value_inputs */ 6329 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 6330 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6331 location = TGSI_INTERPOLATE_LOC_CENTER; /* sample offset will be added explicitly */ 6332 } 6333 else { 6334 location = TGSI_INTERPOLATE_LOC_CENTROID; 6335 } 6336 6337 k = eg_get_interpolator_index(ctx->shader->input[input].interpolate, location); 6338 if (k < 0) 6339 k = 0; 6340 interp_gpr = ctx->eg_interpolators[k].ij_index / 2; 6341 interp_base_chan = 2 * (ctx->eg_interpolators[k].ij_index % 2); 6342 6343 /* NOTE: currently offset is not perspective correct */ 6344 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 6345 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6346 int sample_gpr = -1; 6347 int gradientsH, gradientsV; 6348 struct r600_bytecode_tex tex; 6349 6350 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6351 sample_gpr = load_sample_position(ctx, &ctx->src[1], ctx->src[1].swizzle[0]); 6352 } 6353 6354 gradientsH = r600_get_temp(ctx); 6355 gradientsV = r600_get_temp(ctx); 6356 for (i = 0; i < 2; i++) { 6357 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 6358 tex.op = i == 0 ? FETCH_OP_GET_GRADIENTS_H : FETCH_OP_GET_GRADIENTS_V; 6359 tex.src_gpr = interp_gpr; 6360 tex.src_sel_x = interp_base_chan + 0; 6361 tex.src_sel_y = interp_base_chan + 1; 6362 tex.src_sel_z = 0; 6363 tex.src_sel_w = 0; 6364 tex.dst_gpr = i == 0 ? gradientsH : gradientsV; 6365 tex.dst_sel_x = 0; 6366 tex.dst_sel_y = 1; 6367 tex.dst_sel_z = 7; 6368 tex.dst_sel_w = 7; 6369 tex.inst_mod = 1; // Use per pixel gradient calculation 6370 tex.sampler_id = 0; 6371 tex.resource_id = tex.sampler_id; 6372 r = r600_bytecode_add_tex(ctx->bc, &tex); 6373 if (r) 6374 return r; 6375 } 6376 6377 for (i = 0; i < 2; i++) { 6378 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6379 alu.op = ALU_OP3_MULADD; 6380 alu.is_op3 = 1; 6381 alu.src[0].sel = gradientsH; 6382 alu.src[0].chan = i; 6383 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6384 alu.src[1].sel = sample_gpr; 6385 alu.src[1].chan = 2; 6386 } 6387 else { 6388 r600_bytecode_src(&alu.src[1], &ctx->src[1], 0); 6389 } 6390 alu.src[2].sel = interp_gpr; 6391 alu.src[2].chan = interp_base_chan + i; 6392 alu.dst.sel = ctx->temp_reg; 6393 alu.dst.chan = i; 6394 alu.last = i == 1; 6395 6396 r = r600_bytecode_add_alu(ctx->bc, &alu); 6397 if (r) 6398 return r; 6399 } 6400 6401 for (i = 0; i < 2; i++) { 6402 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6403 alu.op = ALU_OP3_MULADD; 6404 alu.is_op3 = 1; 6405 alu.src[0].sel = gradientsV; 6406 alu.src[0].chan = i; 6407 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6408 alu.src[1].sel = sample_gpr; 6409 alu.src[1].chan = 3; 6410 } 6411 else { 6412 r600_bytecode_src(&alu.src[1], &ctx->src[1], 1); 6413 } 6414 alu.src[2].sel = ctx->temp_reg; 6415 alu.src[2].chan = i; 6416 alu.dst.sel = ctx->temp_reg; 6417 alu.dst.chan = i; 6418 alu.last = i == 1; 6419 6420 r = r600_bytecode_add_alu(ctx->bc, &alu); 6421 if (r) 6422 return r; 6423 } 6424 } 6425 6426 tmp = r600_get_temp(ctx); 6427 for (i = 0; i < 8; i++) { 6428 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6429 alu.op = i < 4 ? ALU_OP2_INTERP_ZW : ALU_OP2_INTERP_XY; 6430 6431 alu.dst.sel = tmp; 6432 if ((i > 1 && i < 6)) { 6433 alu.dst.write = 1; 6434 } 6435 else { 6436 alu.dst.write = 0; 6437 } 6438 alu.dst.chan = i % 4; 6439 6440 if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET || 6441 inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE) { 6442 alu.src[0].sel = ctx->temp_reg; 6443 alu.src[0].chan = 1 - (i % 2); 6444 } else { 6445 alu.src[0].sel = interp_gpr; 6446 alu.src[0].chan = interp_base_chan + 1 - (i % 2); 6447 } 6448 alu.src[1].sel = V_SQ_ALU_SRC_PARAM_BASE + ctx->shader->input[input].lds_pos; 6449 alu.src[1].chan = 0; 6450 6451 alu.last = i % 4 == 3; 6452 alu.bank_swizzle_force = SQ_ALU_VEC_210; 6453 6454 r = r600_bytecode_add_alu(ctx->bc, &alu); 6455 if (r) 6456 return r; 6457 } 6458 6459 // INTERP can't swizzle dst 6460 lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6461 for (i = 0; i <= lasti; i++) { 6462 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6463 continue; 6464 6465 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6466 alu.op = ALU_OP1_MOV; 6467 alu.src[0].sel = tmp; 6468 alu.src[0].chan = ctx->src[0].swizzle[i]; 6469 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6470 alu.dst.write = 1; 6471 alu.last = i == lasti; 6472 r = r600_bytecode_add_alu(ctx->bc, &alu); 6473 if (r) 6474 return r; 6475 } 6476 6477 return 0; 6478 } 6479 6480 6481 static int tgsi_helper_copy(struct r600_shader_ctx *ctx, struct tgsi_full_instruction *inst) 6482 { 6483 struct r600_bytecode_alu alu; 6484 int i, r; 6485 6486 for (i = 0; i < 4; i++) { 6487 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6488 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) { 6489 alu.op = ALU_OP0_NOP; 6490 alu.dst.chan = i; 6491 } else { 6492 alu.op = ALU_OP1_MOV; 6493 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6494 alu.src[0].sel = ctx->temp_reg; 6495 alu.src[0].chan = i; 6496 } 6497 if (i == 3) { 6498 alu.last = 1; 6499 } 6500 r = r600_bytecode_add_alu(ctx->bc, &alu); 6501 if (r) 6502 return r; 6503 } 6504 return 0; 6505 } 6506 6507 static int tgsi_make_src_for_op3(struct r600_shader_ctx *ctx, 6508 unsigned temp, int chan, 6509 struct r600_bytecode_alu_src *bc_src, 6510 const struct r600_shader_src *shader_src) 6511 { 6512 struct r600_bytecode_alu alu; 6513 int r; 6514 6515 r600_bytecode_src(bc_src, shader_src, chan); 6516 6517 /* op3 operands don't support abs modifier */ 6518 if (bc_src->abs) { 6519 assert(temp!=0); /* we actually need the extra register, make sure it is allocated. */ 6520 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6521 alu.op = ALU_OP1_MOV; 6522 alu.dst.sel = temp; 6523 alu.dst.chan = chan; 6524 alu.dst.write = 1; 6525 6526 alu.src[0] = *bc_src; 6527 alu.last = true; // sufficient? 6528 r = r600_bytecode_add_alu(ctx->bc, &alu); 6529 if (r) 6530 return r; 6531 6532 memset(bc_src, 0, sizeof(*bc_src)); 6533 bc_src->sel = temp; 6534 bc_src->chan = chan; 6535 } 6536 return 0; 6537 } 6538 6539 static int tgsi_op3(struct r600_shader_ctx *ctx) 6540 { 6541 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6542 struct r600_bytecode_alu alu; 6543 int i, j, r; 6544 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6545 int temp_regs[4]; 6546 6547 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 6548 temp_regs[j] = 0; 6549 if (ctx->src[j].abs) 6550 temp_regs[j] = r600_get_temp(ctx); 6551 } 6552 for (i = 0; i < lasti + 1; i++) { 6553 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6554 continue; 6555 6556 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6557 alu.op = ctx->inst_info->op; 6558 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 6559 r = tgsi_make_src_for_op3(ctx, temp_regs[j], i, &alu.src[j], &ctx->src[j]); 6560 if (r) 6561 return r; 6562 } 6563 6564 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6565 alu.dst.chan = i; 6566 alu.dst.write = 1; 6567 alu.is_op3 = 1; 6568 if (i == lasti) { 6569 alu.last = 1; 6570 } 6571 r = r600_bytecode_add_alu(ctx->bc, &alu); 6572 if (r) 6573 return r; 6574 } 6575 return 0; 6576 } 6577 6578 static int tgsi_dp(struct r600_shader_ctx *ctx) 6579 { 6580 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6581 struct r600_bytecode_alu alu; 6582 int i, j, r; 6583 6584 for (i = 0; i < 4; i++) { 6585 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6586 alu.op = ctx->inst_info->op; 6587 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 6588 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 6589 } 6590 6591 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 6592 alu.dst.chan = i; 6593 alu.dst.write = (inst->Dst[0].Register.WriteMask >> i) & 1; 6594 /* handle some special cases */ 6595 switch (inst->Instruction.Opcode) { 6596 case TGSI_OPCODE_DP2: 6597 if (i > 1) { 6598 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 6599 alu.src[0].chan = alu.src[1].chan = 0; 6600 } 6601 break; 6602 case TGSI_OPCODE_DP3: 6603 if (i > 2) { 6604 alu.src[0].sel = alu.src[1].sel = V_SQ_ALU_SRC_0; 6605 alu.src[0].chan = alu.src[1].chan = 0; 6606 } 6607 break; 6608 case TGSI_OPCODE_DPH: 6609 if (i == 3) { 6610 alu.src[0].sel = V_SQ_ALU_SRC_1; 6611 alu.src[0].chan = 0; 6612 alu.src[0].neg = 0; 6613 } 6614 break; 6615 default: 6616 break; 6617 } 6618 if (i == 3) { 6619 alu.last = 1; 6620 } 6621 r = r600_bytecode_add_alu(ctx->bc, &alu); 6622 if (r) 6623 return r; 6624 } 6625 return 0; 6626 } 6627 6628 static inline boolean tgsi_tex_src_requires_loading(struct r600_shader_ctx *ctx, 6629 unsigned index) 6630 { 6631 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6632 return (inst->Src[index].Register.File != TGSI_FILE_TEMPORARY && 6633 inst->Src[index].Register.File != TGSI_FILE_INPUT && 6634 inst->Src[index].Register.File != TGSI_FILE_OUTPUT) || 6635 ctx->src[index].neg || ctx->src[index].abs || 6636 (inst->Src[index].Register.File == TGSI_FILE_INPUT && ctx->type == PIPE_SHADER_GEOMETRY); 6637 } 6638 6639 static inline unsigned tgsi_tex_get_src_gpr(struct r600_shader_ctx *ctx, 6640 unsigned index) 6641 { 6642 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6643 return ctx->file_offset[inst->Src[index].Register.File] + inst->Src[index].Register.Index; 6644 } 6645 6646 static int do_vtx_fetch_inst(struct r600_shader_ctx *ctx, boolean src_requires_loading) 6647 { 6648 struct r600_bytecode_vtx vtx; 6649 struct r600_bytecode_alu alu; 6650 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6651 int src_gpr, r, i; 6652 int id = tgsi_tex_get_src_gpr(ctx, 1); 6653 6654 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 6655 if (src_requires_loading) { 6656 for (i = 0; i < 4; i++) { 6657 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6658 alu.op = ALU_OP1_MOV; 6659 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 6660 alu.dst.sel = ctx->temp_reg; 6661 alu.dst.chan = i; 6662 if (i == 3) 6663 alu.last = 1; 6664 alu.dst.write = 1; 6665 r = r600_bytecode_add_alu(ctx->bc, &alu); 6666 if (r) 6667 return r; 6668 } 6669 src_gpr = ctx->temp_reg; 6670 } 6671 6672 memset(&vtx, 0, sizeof(vtx)); 6673 vtx.op = FETCH_OP_VFETCH; 6674 vtx.buffer_id = id + R600_MAX_CONST_BUFFERS; 6675 vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET; 6676 vtx.src_gpr = src_gpr; 6677 vtx.mega_fetch_count = 16; 6678 vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 6679 vtx.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; /* SEL_X */ 6680 vtx.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; /* SEL_Y */ 6681 vtx.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; /* SEL_Z */ 6682 vtx.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; /* SEL_W */ 6683 vtx.use_const_fields = 1; 6684 6685 if ((r = r600_bytecode_add_vtx(ctx->bc, &vtx))) 6686 return r; 6687 6688 if (ctx->bc->chip_class >= EVERGREEN) 6689 return 0; 6690 6691 for (i = 0; i < 4; i++) { 6692 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 6693 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 6694 continue; 6695 6696 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6697 alu.op = ALU_OP2_AND_INT; 6698 6699 alu.dst.chan = i; 6700 alu.dst.sel = vtx.dst_gpr; 6701 alu.dst.write = 1; 6702 6703 alu.src[0].sel = vtx.dst_gpr; 6704 alu.src[0].chan = i; 6705 6706 alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL; 6707 alu.src[1].sel += (id * 2); 6708 alu.src[1].chan = i % 4; 6709 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 6710 6711 if (i == lasti) 6712 alu.last = 1; 6713 r = r600_bytecode_add_alu(ctx->bc, &alu); 6714 if (r) 6715 return r; 6716 } 6717 6718 if (inst->Dst[0].Register.WriteMask & 3) { 6719 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6720 alu.op = ALU_OP2_OR_INT; 6721 6722 alu.dst.chan = 3; 6723 alu.dst.sel = vtx.dst_gpr; 6724 alu.dst.write = 1; 6725 6726 alu.src[0].sel = vtx.dst_gpr; 6727 alu.src[0].chan = 3; 6728 6729 alu.src[1].sel = R600_SHADER_BUFFER_INFO_SEL + (id * 2) + 1; 6730 alu.src[1].chan = 0; 6731 alu.src[1].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 6732 6733 alu.last = 1; 6734 r = r600_bytecode_add_alu(ctx->bc, &alu); 6735 if (r) 6736 return r; 6737 } 6738 return 0; 6739 } 6740 6741 static int r600_do_buffer_txq(struct r600_shader_ctx *ctx) 6742 { 6743 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6744 struct r600_bytecode_alu alu; 6745 int r; 6746 int id = tgsi_tex_get_src_gpr(ctx, 1); 6747 6748 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6749 alu.op = ALU_OP1_MOV; 6750 alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 6751 if (ctx->bc->chip_class >= EVERGREEN) { 6752 /* channel 0 or 2 of each word */ 6753 alu.src[0].sel += (id / 2); 6754 alu.src[0].chan = (id % 2) * 2; 6755 } else { 6756 /* r600 we have them at channel 2 of the second dword */ 6757 alu.src[0].sel += (id * 2) + 1; 6758 alu.src[0].chan = 1; 6759 } 6760 alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 6761 tgsi_dst(ctx, &inst->Dst[0], 0, &alu.dst); 6762 alu.last = 1; 6763 r = r600_bytecode_add_alu(ctx->bc, &alu); 6764 if (r) 6765 return r; 6766 return 0; 6767 } 6768 6769 static int tgsi_tex(struct r600_shader_ctx *ctx) 6770 { 6771 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 6772 struct r600_bytecode_tex tex; 6773 struct r600_bytecode_alu alu; 6774 unsigned src_gpr; 6775 int r, i, j; 6776 int opcode; 6777 bool read_compressed_msaa = ctx->bc->has_compressed_msaa_texturing && 6778 inst->Instruction.Opcode == TGSI_OPCODE_TXF && 6779 (inst->Texture.Texture == TGSI_TEXTURE_2D_MSAA || 6780 inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY_MSAA); 6781 6782 bool txf_add_offsets = inst->Texture.NumOffsets && 6783 inst->Instruction.Opcode == TGSI_OPCODE_TXF && 6784 inst->Texture.Texture != TGSI_TEXTURE_BUFFER; 6785 6786 /* Texture fetch instructions can only use gprs as source. 6787 * Also they cannot negate the source or take the absolute value */ 6788 const boolean src_requires_loading = (inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ && 6789 inst->Instruction.Opcode != TGSI_OPCODE_TXQS && 6790 tgsi_tex_src_requires_loading(ctx, 0)) || 6791 read_compressed_msaa || txf_add_offsets; 6792 6793 boolean src_loaded = FALSE; 6794 unsigned sampler_src_reg = inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ ? 0 : 1; 6795 int8_t offset_x = 0, offset_y = 0, offset_z = 0; 6796 boolean has_txq_cube_array_z = false; 6797 unsigned sampler_index_mode; 6798 6799 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ && 6800 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 6801 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY))) 6802 if (inst->Dst[0].Register.WriteMask & 4) { 6803 ctx->shader->has_txq_cube_array_z_comp = true; 6804 has_txq_cube_array_z = true; 6805 } 6806 6807 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || 6808 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 6809 inst->Instruction.Opcode == TGSI_OPCODE_TXL2 || 6810 inst->Instruction.Opcode == TGSI_OPCODE_TG4) 6811 sampler_src_reg = 2; 6812 6813 /* TGSI moves the sampler to src reg 3 for TXD */ 6814 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) 6815 sampler_src_reg = 3; 6816 6817 sampler_index_mode = inst->Src[sampler_src_reg].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE 6818 6819 src_gpr = tgsi_tex_get_src_gpr(ctx, 0); 6820 6821 if (inst->Texture.Texture == TGSI_TEXTURE_BUFFER) { 6822 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ) { 6823 ctx->shader->uses_tex_buffers = true; 6824 return r600_do_buffer_txq(ctx); 6825 } 6826 else if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) { 6827 if (ctx->bc->chip_class < EVERGREEN) 6828 ctx->shader->uses_tex_buffers = true; 6829 return do_vtx_fetch_inst(ctx, src_requires_loading); 6830 } 6831 } 6832 6833 if (inst->Instruction.Opcode == TGSI_OPCODE_TXP) { 6834 int out_chan; 6835 /* Add perspective divide */ 6836 if (ctx->bc->chip_class == CAYMAN) { 6837 out_chan = 2; 6838 for (i = 0; i < 3; i++) { 6839 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6840 alu.op = ALU_OP1_RECIP_IEEE; 6841 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6842 6843 alu.dst.sel = ctx->temp_reg; 6844 alu.dst.chan = i; 6845 if (i == 2) 6846 alu.last = 1; 6847 if (out_chan == i) 6848 alu.dst.write = 1; 6849 r = r600_bytecode_add_alu(ctx->bc, &alu); 6850 if (r) 6851 return r; 6852 } 6853 6854 } else { 6855 out_chan = 3; 6856 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6857 alu.op = ALU_OP1_RECIP_IEEE; 6858 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 6859 6860 alu.dst.sel = ctx->temp_reg; 6861 alu.dst.chan = out_chan; 6862 alu.last = 1; 6863 alu.dst.write = 1; 6864 r = r600_bytecode_add_alu(ctx->bc, &alu); 6865 if (r) 6866 return r; 6867 } 6868 6869 for (i = 0; i < 3; i++) { 6870 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6871 alu.op = ALU_OP2_MUL; 6872 alu.src[0].sel = ctx->temp_reg; 6873 alu.src[0].chan = out_chan; 6874 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 6875 alu.dst.sel = ctx->temp_reg; 6876 alu.dst.chan = i; 6877 alu.dst.write = 1; 6878 r = r600_bytecode_add_alu(ctx->bc, &alu); 6879 if (r) 6880 return r; 6881 } 6882 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6883 alu.op = ALU_OP1_MOV; 6884 alu.src[0].sel = V_SQ_ALU_SRC_1; 6885 alu.src[0].chan = 0; 6886 alu.dst.sel = ctx->temp_reg; 6887 alu.dst.chan = 3; 6888 alu.last = 1; 6889 alu.dst.write = 1; 6890 r = r600_bytecode_add_alu(ctx->bc, &alu); 6891 if (r) 6892 return r; 6893 src_loaded = TRUE; 6894 src_gpr = ctx->temp_reg; 6895 } 6896 6897 6898 if ((inst->Texture.Texture == TGSI_TEXTURE_CUBE || 6899 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 6900 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 6901 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 6902 inst->Instruction.Opcode != TGSI_OPCODE_TXQ && 6903 inst->Instruction.Opcode != TGSI_OPCODE_TXQ_LZ) { 6904 6905 static const unsigned src0_swizzle[] = {2, 2, 0, 1}; 6906 static const unsigned src1_swizzle[] = {1, 0, 2, 2}; 6907 6908 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */ 6909 for (i = 0; i < 4; i++) { 6910 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6911 alu.op = ALU_OP2_CUBE; 6912 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 6913 r600_bytecode_src(&alu.src[1], &ctx->src[0], src1_swizzle[i]); 6914 alu.dst.sel = ctx->temp_reg; 6915 alu.dst.chan = i; 6916 if (i == 3) 6917 alu.last = 1; 6918 alu.dst.write = 1; 6919 r = r600_bytecode_add_alu(ctx->bc, &alu); 6920 if (r) 6921 return r; 6922 } 6923 6924 /* tmp1.z = RCP_e(|tmp1.z|) */ 6925 if (ctx->bc->chip_class == CAYMAN) { 6926 for (i = 0; i < 3; i++) { 6927 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6928 alu.op = ALU_OP1_RECIP_IEEE; 6929 alu.src[0].sel = ctx->temp_reg; 6930 alu.src[0].chan = 2; 6931 alu.src[0].abs = 1; 6932 alu.dst.sel = ctx->temp_reg; 6933 alu.dst.chan = i; 6934 if (i == 2) 6935 alu.dst.write = 1; 6936 if (i == 2) 6937 alu.last = 1; 6938 r = r600_bytecode_add_alu(ctx->bc, &alu); 6939 if (r) 6940 return r; 6941 } 6942 } else { 6943 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6944 alu.op = ALU_OP1_RECIP_IEEE; 6945 alu.src[0].sel = ctx->temp_reg; 6946 alu.src[0].chan = 2; 6947 alu.src[0].abs = 1; 6948 alu.dst.sel = ctx->temp_reg; 6949 alu.dst.chan = 2; 6950 alu.dst.write = 1; 6951 alu.last = 1; 6952 r = r600_bytecode_add_alu(ctx->bc, &alu); 6953 if (r) 6954 return r; 6955 } 6956 6957 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x 6958 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x 6959 * muladd has no writemask, have to use another temp 6960 */ 6961 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6962 alu.op = ALU_OP3_MULADD; 6963 alu.is_op3 = 1; 6964 6965 alu.src[0].sel = ctx->temp_reg; 6966 alu.src[0].chan = 0; 6967 alu.src[1].sel = ctx->temp_reg; 6968 alu.src[1].chan = 2; 6969 6970 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 6971 alu.src[2].chan = 0; 6972 alu.src[2].value = u_bitcast_f2u(1.5f); 6973 6974 alu.dst.sel = ctx->temp_reg; 6975 alu.dst.chan = 0; 6976 alu.dst.write = 1; 6977 6978 r = r600_bytecode_add_alu(ctx->bc, &alu); 6979 if (r) 6980 return r; 6981 6982 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 6983 alu.op = ALU_OP3_MULADD; 6984 alu.is_op3 = 1; 6985 6986 alu.src[0].sel = ctx->temp_reg; 6987 alu.src[0].chan = 1; 6988 alu.src[1].sel = ctx->temp_reg; 6989 alu.src[1].chan = 2; 6990 6991 alu.src[2].sel = V_SQ_ALU_SRC_LITERAL; 6992 alu.src[2].chan = 0; 6993 alu.src[2].value = u_bitcast_f2u(1.5f); 6994 6995 alu.dst.sel = ctx->temp_reg; 6996 alu.dst.chan = 1; 6997 alu.dst.write = 1; 6998 6999 alu.last = 1; 7000 r = r600_bytecode_add_alu(ctx->bc, &alu); 7001 if (r) 7002 return r; 7003 /* write initial compare value into Z component 7004 - W src 0 for shadow cube 7005 - X src 1 for shadow cube array */ 7006 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 7007 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 7008 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7009 alu.op = ALU_OP1_MOV; 7010 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) 7011 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 7012 else 7013 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 7014 alu.dst.sel = ctx->temp_reg; 7015 alu.dst.chan = 2; 7016 alu.dst.write = 1; 7017 alu.last = 1; 7018 r = r600_bytecode_add_alu(ctx->bc, &alu); 7019 if (r) 7020 return r; 7021 } 7022 7023 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 7024 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 7025 if (ctx->bc->chip_class >= EVERGREEN) { 7026 int mytmp = r600_get_temp(ctx); 7027 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7028 alu.op = ALU_OP1_MOV; 7029 alu.src[0].sel = ctx->temp_reg; 7030 alu.src[0].chan = 3; 7031 alu.dst.sel = mytmp; 7032 alu.dst.chan = 0; 7033 alu.dst.write = 1; 7034 alu.last = 1; 7035 r = r600_bytecode_add_alu(ctx->bc, &alu); 7036 if (r) 7037 return r; 7038 7039 /* have to multiply original layer by 8 and add to face id (temp.w) in Z */ 7040 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7041 alu.op = ALU_OP3_MULADD; 7042 alu.is_op3 = 1; 7043 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 7044 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7045 alu.src[1].chan = 0; 7046 alu.src[1].value = u_bitcast_f2u(8.0f); 7047 alu.src[2].sel = mytmp; 7048 alu.src[2].chan = 0; 7049 alu.dst.sel = ctx->temp_reg; 7050 alu.dst.chan = 3; 7051 alu.dst.write = 1; 7052 alu.last = 1; 7053 r = r600_bytecode_add_alu(ctx->bc, &alu); 7054 if (r) 7055 return r; 7056 } else if (ctx->bc->chip_class < EVERGREEN) { 7057 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7058 tex.op = FETCH_OP_SET_CUBEMAP_INDEX; 7059 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7060 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7061 tex.src_gpr = r600_get_temp(ctx); 7062 tex.src_sel_x = 0; 7063 tex.src_sel_y = 0; 7064 tex.src_sel_z = 0; 7065 tex.src_sel_w = 0; 7066 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 7067 tex.coord_type_x = 1; 7068 tex.coord_type_y = 1; 7069 tex.coord_type_z = 1; 7070 tex.coord_type_w = 1; 7071 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7072 alu.op = ALU_OP1_MOV; 7073 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 7074 alu.dst.sel = tex.src_gpr; 7075 alu.dst.chan = 0; 7076 alu.last = 1; 7077 alu.dst.write = 1; 7078 r = r600_bytecode_add_alu(ctx->bc, &alu); 7079 if (r) 7080 return r; 7081 7082 r = r600_bytecode_add_tex(ctx->bc, &tex); 7083 if (r) 7084 return r; 7085 } 7086 7087 } 7088 7089 /* for cube forms of lod and bias we need to route things */ 7090 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB || 7091 inst->Instruction.Opcode == TGSI_OPCODE_TXL || 7092 inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 7093 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) { 7094 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7095 alu.op = ALU_OP1_MOV; 7096 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || 7097 inst->Instruction.Opcode == TGSI_OPCODE_TXL2) 7098 r600_bytecode_src(&alu.src[0], &ctx->src[1], 0); 7099 else 7100 r600_bytecode_src(&alu.src[0], &ctx->src[0], 3); 7101 alu.dst.sel = ctx->temp_reg; 7102 alu.dst.chan = 2; 7103 alu.last = 1; 7104 alu.dst.write = 1; 7105 r = r600_bytecode_add_alu(ctx->bc, &alu); 7106 if (r) 7107 return r; 7108 } 7109 7110 src_loaded = TRUE; 7111 src_gpr = ctx->temp_reg; 7112 } 7113 7114 if (inst->Instruction.Opcode == TGSI_OPCODE_TXD) { 7115 int temp_h = 0, temp_v = 0; 7116 int start_val = 0; 7117 7118 /* if we've already loaded the src (i.e. CUBE don't reload it). */ 7119 if (src_loaded == TRUE) 7120 start_val = 1; 7121 else 7122 src_loaded = TRUE; 7123 for (i = start_val; i < 3; i++) { 7124 int treg = r600_get_temp(ctx); 7125 7126 if (i == 0) 7127 src_gpr = treg; 7128 else if (i == 1) 7129 temp_h = treg; 7130 else 7131 temp_v = treg; 7132 7133 for (j = 0; j < 4; j++) { 7134 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7135 alu.op = ALU_OP1_MOV; 7136 r600_bytecode_src(&alu.src[0], &ctx->src[i], j); 7137 alu.dst.sel = treg; 7138 alu.dst.chan = j; 7139 if (j == 3) 7140 alu.last = 1; 7141 alu.dst.write = 1; 7142 r = r600_bytecode_add_alu(ctx->bc, &alu); 7143 if (r) 7144 return r; 7145 } 7146 } 7147 for (i = 1; i < 3; i++) { 7148 /* set gradients h/v */ 7149 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7150 tex.op = (i == 1) ? FETCH_OP_SET_GRADIENTS_H : 7151 FETCH_OP_SET_GRADIENTS_V; 7152 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7153 tex.sampler_index_mode = sampler_index_mode; 7154 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7155 tex.resource_index_mode = sampler_index_mode; 7156 7157 tex.src_gpr = (i == 1) ? temp_h : temp_v; 7158 tex.src_sel_x = 0; 7159 tex.src_sel_y = 1; 7160 tex.src_sel_z = 2; 7161 tex.src_sel_w = 3; 7162 7163 tex.dst_gpr = r600_get_temp(ctx); /* just to avoid confusing the asm scheduler */ 7164 tex.dst_sel_x = tex.dst_sel_y = tex.dst_sel_z = tex.dst_sel_w = 7; 7165 if (inst->Texture.Texture != TGSI_TEXTURE_RECT) { 7166 tex.coord_type_x = 1; 7167 tex.coord_type_y = 1; 7168 tex.coord_type_z = 1; 7169 tex.coord_type_w = 1; 7170 } 7171 r = r600_bytecode_add_tex(ctx->bc, &tex); 7172 if (r) 7173 return r; 7174 } 7175 } 7176 7177 if (src_requires_loading && !src_loaded) { 7178 for (i = 0; i < 4; i++) { 7179 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7180 alu.op = ALU_OP1_MOV; 7181 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7182 alu.dst.sel = ctx->temp_reg; 7183 alu.dst.chan = i; 7184 if (i == 3) 7185 alu.last = 1; 7186 alu.dst.write = 1; 7187 r = r600_bytecode_add_alu(ctx->bc, &alu); 7188 if (r) 7189 return r; 7190 } 7191 src_loaded = TRUE; 7192 src_gpr = ctx->temp_reg; 7193 } 7194 7195 /* get offset values */ 7196 if (inst->Texture.NumOffsets) { 7197 assert(inst->Texture.NumOffsets == 1); 7198 7199 /* The texture offset feature doesn't work with the TXF instruction 7200 * and must be emulated by adding the offset to the texture coordinates. */ 7201 if (txf_add_offsets) { 7202 const struct tgsi_texture_offset *off = inst->TexOffsets; 7203 7204 switch (inst->Texture.Texture) { 7205 case TGSI_TEXTURE_3D: 7206 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7207 alu.op = ALU_OP2_ADD_INT; 7208 alu.src[0].sel = src_gpr; 7209 alu.src[0].chan = 2; 7210 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7211 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleZ]; 7212 alu.dst.sel = src_gpr; 7213 alu.dst.chan = 2; 7214 alu.dst.write = 1; 7215 alu.last = 1; 7216 r = r600_bytecode_add_alu(ctx->bc, &alu); 7217 if (r) 7218 return r; 7219 /* fall through */ 7220 7221 case TGSI_TEXTURE_2D: 7222 case TGSI_TEXTURE_SHADOW2D: 7223 case TGSI_TEXTURE_RECT: 7224 case TGSI_TEXTURE_SHADOWRECT: 7225 case TGSI_TEXTURE_2D_ARRAY: 7226 case TGSI_TEXTURE_SHADOW2D_ARRAY: 7227 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7228 alu.op = ALU_OP2_ADD_INT; 7229 alu.src[0].sel = src_gpr; 7230 alu.src[0].chan = 1; 7231 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7232 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleY]; 7233 alu.dst.sel = src_gpr; 7234 alu.dst.chan = 1; 7235 alu.dst.write = 1; 7236 alu.last = 1; 7237 r = r600_bytecode_add_alu(ctx->bc, &alu); 7238 if (r) 7239 return r; 7240 /* fall through */ 7241 7242 case TGSI_TEXTURE_1D: 7243 case TGSI_TEXTURE_SHADOW1D: 7244 case TGSI_TEXTURE_1D_ARRAY: 7245 case TGSI_TEXTURE_SHADOW1D_ARRAY: 7246 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7247 alu.op = ALU_OP2_ADD_INT; 7248 alu.src[0].sel = src_gpr; 7249 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7250 alu.src[1].value = ctx->literals[4 * off[0].Index + off[0].SwizzleX]; 7251 alu.dst.sel = src_gpr; 7252 alu.dst.write = 1; 7253 alu.last = 1; 7254 r = r600_bytecode_add_alu(ctx->bc, &alu); 7255 if (r) 7256 return r; 7257 break; 7258 /* texture offsets do not apply to other texture targets */ 7259 } 7260 } else { 7261 switch (inst->Texture.Texture) { 7262 case TGSI_TEXTURE_3D: 7263 offset_z = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleZ] << 1; 7264 /* fallthrough */ 7265 case TGSI_TEXTURE_2D: 7266 case TGSI_TEXTURE_SHADOW2D: 7267 case TGSI_TEXTURE_RECT: 7268 case TGSI_TEXTURE_SHADOWRECT: 7269 case TGSI_TEXTURE_2D_ARRAY: 7270 case TGSI_TEXTURE_SHADOW2D_ARRAY: 7271 offset_y = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleY] << 1; 7272 /* fallthrough */ 7273 case TGSI_TEXTURE_1D: 7274 case TGSI_TEXTURE_SHADOW1D: 7275 case TGSI_TEXTURE_1D_ARRAY: 7276 case TGSI_TEXTURE_SHADOW1D_ARRAY: 7277 offset_x = ctx->literals[4 * inst->TexOffsets[0].Index + inst->TexOffsets[0].SwizzleX] << 1; 7278 } 7279 } 7280 } 7281 7282 /* Obtain the sample index for reading a compressed MSAA color texture. 7283 * To read the FMASK, we use the ldfptr instruction, which tells us 7284 * where the samples are stored. 7285 * For uncompressed 8x MSAA surfaces, ldfptr should return 0x76543210, 7286 * which is the identity mapping. Each nibble says which physical sample 7287 * should be fetched to get that sample. 7288 * 7289 * Assume src.z contains the sample index. It should be modified like this: 7290 * src.z = (ldfptr() >> (src.z * 4)) & 0xF; 7291 * Then fetch the texel with src. 7292 */ 7293 if (read_compressed_msaa) { 7294 unsigned sample_chan = 3; 7295 unsigned temp = r600_get_temp(ctx); 7296 assert(src_loaded); 7297 7298 /* temp.w = ldfptr() */ 7299 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7300 tex.op = FETCH_OP_LD; 7301 tex.inst_mod = 1; /* to indicate this is ldfptr */ 7302 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7303 tex.sampler_index_mode = sampler_index_mode; 7304 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7305 tex.resource_index_mode = sampler_index_mode; 7306 tex.src_gpr = src_gpr; 7307 tex.dst_gpr = temp; 7308 tex.dst_sel_x = 7; /* mask out these components */ 7309 tex.dst_sel_y = 7; 7310 tex.dst_sel_z = 7; 7311 tex.dst_sel_w = 0; /* store X */ 7312 tex.src_sel_x = 0; 7313 tex.src_sel_y = 1; 7314 tex.src_sel_z = 2; 7315 tex.src_sel_w = 3; 7316 tex.offset_x = offset_x; 7317 tex.offset_y = offset_y; 7318 tex.offset_z = offset_z; 7319 r = r600_bytecode_add_tex(ctx->bc, &tex); 7320 if (r) 7321 return r; 7322 7323 /* temp.x = sample_index*4 */ 7324 if (ctx->bc->chip_class == CAYMAN) { 7325 for (i = 0 ; i < 4; i++) { 7326 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7327 alu.op = ALU_OP2_MULLO_INT; 7328 alu.src[0].sel = src_gpr; 7329 alu.src[0].chan = sample_chan; 7330 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7331 alu.src[1].value = 4; 7332 alu.dst.sel = temp; 7333 alu.dst.chan = i; 7334 alu.dst.write = i == 0; 7335 if (i == 3) 7336 alu.last = 1; 7337 r = r600_bytecode_add_alu(ctx->bc, &alu); 7338 if (r) 7339 return r; 7340 } 7341 } else { 7342 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7343 alu.op = ALU_OP2_MULLO_INT; 7344 alu.src[0].sel = src_gpr; 7345 alu.src[0].chan = sample_chan; 7346 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7347 alu.src[1].value = 4; 7348 alu.dst.sel = temp; 7349 alu.dst.chan = 0; 7350 alu.dst.write = 1; 7351 alu.last = 1; 7352 r = r600_bytecode_add_alu(ctx->bc, &alu); 7353 if (r) 7354 return r; 7355 } 7356 7357 /* sample_index = temp.w >> temp.x */ 7358 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7359 alu.op = ALU_OP2_LSHR_INT; 7360 alu.src[0].sel = temp; 7361 alu.src[0].chan = 3; 7362 alu.src[1].sel = temp; 7363 alu.src[1].chan = 0; 7364 alu.dst.sel = src_gpr; 7365 alu.dst.chan = sample_chan; 7366 alu.dst.write = 1; 7367 alu.last = 1; 7368 r = r600_bytecode_add_alu(ctx->bc, &alu); 7369 if (r) 7370 return r; 7371 7372 /* sample_index & 0xF */ 7373 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7374 alu.op = ALU_OP2_AND_INT; 7375 alu.src[0].sel = src_gpr; 7376 alu.src[0].chan = sample_chan; 7377 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 7378 alu.src[1].value = 0xF; 7379 alu.dst.sel = src_gpr; 7380 alu.dst.chan = sample_chan; 7381 alu.dst.write = 1; 7382 alu.last = 1; 7383 r = r600_bytecode_add_alu(ctx->bc, &alu); 7384 if (r) 7385 return r; 7386 #if 0 7387 /* visualize the FMASK */ 7388 for (i = 0; i < 4; i++) { 7389 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7390 alu.op = ALU_OP1_INT_TO_FLT; 7391 alu.src[0].sel = src_gpr; 7392 alu.src[0].chan = sample_chan; 7393 alu.dst.sel = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 7394 alu.dst.chan = i; 7395 alu.dst.write = 1; 7396 alu.last = 1; 7397 r = r600_bytecode_add_alu(ctx->bc, &alu); 7398 if (r) 7399 return r; 7400 } 7401 return 0; 7402 #endif 7403 } 7404 7405 /* does this shader want a num layers from TXQ for a cube array? */ 7406 if (has_txq_cube_array_z) { 7407 int id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7408 7409 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7410 alu.op = ALU_OP1_MOV; 7411 7412 alu.src[0].sel = R600_SHADER_BUFFER_INFO_SEL; 7413 if (ctx->bc->chip_class >= EVERGREEN) { 7414 /* channel 1 or 3 of each word */ 7415 alu.src[0].sel += (id / 2); 7416 alu.src[0].chan = ((id % 2) * 2) + 1; 7417 } else { 7418 /* r600 we have them at channel 2 of the second dword */ 7419 alu.src[0].sel += (id * 2) + 1; 7420 alu.src[0].chan = 2; 7421 } 7422 alu.src[0].kc_bank = R600_BUFFER_INFO_CONST_BUFFER; 7423 tgsi_dst(ctx, &inst->Dst[0], 2, &alu.dst); 7424 alu.last = 1; 7425 r = r600_bytecode_add_alu(ctx->bc, &alu); 7426 if (r) 7427 return r; 7428 /* disable writemask from texture instruction */ 7429 inst->Dst[0].Register.WriteMask &= ~4; 7430 } 7431 7432 opcode = ctx->inst_info->op; 7433 if (opcode == FETCH_OP_GATHER4 && 7434 inst->TexOffsets[0].File != TGSI_FILE_NULL && 7435 inst->TexOffsets[0].File != TGSI_FILE_IMMEDIATE) { 7436 opcode = FETCH_OP_GATHER4_O; 7437 7438 /* GATHER4_O/GATHER4_C_O use offset values loaded by 7439 SET_TEXTURE_OFFSETS instruction. The immediate offset values 7440 encoded in the instruction are ignored. */ 7441 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7442 tex.op = FETCH_OP_SET_TEXTURE_OFFSETS; 7443 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7444 tex.sampler_index_mode = sampler_index_mode; 7445 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7446 tex.resource_index_mode = sampler_index_mode; 7447 7448 tex.src_gpr = ctx->file_offset[inst->TexOffsets[0].File] + inst->TexOffsets[0].Index; 7449 tex.src_sel_x = inst->TexOffsets[0].SwizzleX; 7450 tex.src_sel_y = inst->TexOffsets[0].SwizzleY; 7451 tex.src_sel_z = inst->TexOffsets[0].SwizzleZ; 7452 tex.src_sel_w = 4; 7453 7454 tex.dst_sel_x = 7; 7455 tex.dst_sel_y = 7; 7456 tex.dst_sel_z = 7; 7457 tex.dst_sel_w = 7; 7458 7459 r = r600_bytecode_add_tex(ctx->bc, &tex); 7460 if (r) 7461 return r; 7462 } 7463 7464 if (inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 7465 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 7466 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 7467 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 7468 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY || 7469 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 7470 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 7471 switch (opcode) { 7472 case FETCH_OP_SAMPLE: 7473 opcode = FETCH_OP_SAMPLE_C; 7474 break; 7475 case FETCH_OP_SAMPLE_L: 7476 opcode = FETCH_OP_SAMPLE_C_L; 7477 break; 7478 case FETCH_OP_SAMPLE_LB: 7479 opcode = FETCH_OP_SAMPLE_C_LB; 7480 break; 7481 case FETCH_OP_SAMPLE_G: 7482 opcode = FETCH_OP_SAMPLE_C_G; 7483 break; 7484 /* Texture gather variants */ 7485 case FETCH_OP_GATHER4: 7486 opcode = FETCH_OP_GATHER4_C; 7487 break; 7488 case FETCH_OP_GATHER4_O: 7489 opcode = FETCH_OP_GATHER4_C_O; 7490 break; 7491 } 7492 } 7493 7494 memset(&tex, 0, sizeof(struct r600_bytecode_tex)); 7495 tex.op = opcode; 7496 7497 tex.sampler_id = tgsi_tex_get_src_gpr(ctx, sampler_src_reg); 7498 tex.sampler_index_mode = sampler_index_mode; 7499 tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; 7500 tex.resource_index_mode = sampler_index_mode; 7501 tex.src_gpr = src_gpr; 7502 tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index; 7503 7504 if (inst->Instruction.Opcode == TGSI_OPCODE_DDX_FINE || 7505 inst->Instruction.Opcode == TGSI_OPCODE_DDY_FINE) { 7506 tex.inst_mod = 1; /* per pixel gradient calculation instead of per 2x2 quad */ 7507 } 7508 7509 if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) { 7510 int8_t texture_component_select = ctx->literals[4 * inst->Src[1].Register.Index + inst->Src[1].Register.SwizzleX]; 7511 tex.inst_mod = texture_component_select; 7512 7513 if (ctx->bc->chip_class == CAYMAN) { 7514 /* GATHER4 result order is different from TGSI TG4 */ 7515 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 0 : 7; 7516 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 1 : 7; 7517 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 2 : 7; 7518 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 7519 } else { 7520 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 7521 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 7522 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 7523 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 7524 } 7525 } 7526 else if (inst->Instruction.Opcode == TGSI_OPCODE_LODQ) { 7527 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 7528 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 7529 tex.dst_sel_z = 7; 7530 tex.dst_sel_w = 7; 7531 } 7532 else if (inst->Instruction.Opcode == TGSI_OPCODE_TXQS) { 7533 tex.dst_sel_x = 3; 7534 tex.dst_sel_y = 7; 7535 tex.dst_sel_z = 7; 7536 tex.dst_sel_w = 7; 7537 } 7538 else { 7539 tex.dst_sel_x = (inst->Dst[0].Register.WriteMask & 1) ? 0 : 7; 7540 tex.dst_sel_y = (inst->Dst[0].Register.WriteMask & 2) ? 1 : 7; 7541 tex.dst_sel_z = (inst->Dst[0].Register.WriteMask & 4) ? 2 : 7; 7542 tex.dst_sel_w = (inst->Dst[0].Register.WriteMask & 8) ? 3 : 7; 7543 } 7544 7545 7546 if (inst->Instruction.Opcode == TGSI_OPCODE_TXQ_LZ || 7547 inst->Instruction.Opcode == TGSI_OPCODE_TXQS) { 7548 tex.src_sel_x = 4; 7549 tex.src_sel_y = 4; 7550 tex.src_sel_z = 4; 7551 tex.src_sel_w = 4; 7552 } else if (src_loaded) { 7553 tex.src_sel_x = 0; 7554 tex.src_sel_y = 1; 7555 tex.src_sel_z = 2; 7556 tex.src_sel_w = 3; 7557 } else { 7558 tex.src_sel_x = ctx->src[0].swizzle[0]; 7559 tex.src_sel_y = ctx->src[0].swizzle[1]; 7560 tex.src_sel_z = ctx->src[0].swizzle[2]; 7561 tex.src_sel_w = ctx->src[0].swizzle[3]; 7562 tex.src_rel = ctx->src[0].rel; 7563 } 7564 7565 if (inst->Texture.Texture == TGSI_TEXTURE_CUBE || 7566 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || 7567 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 7568 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 7569 tex.src_sel_x = 1; 7570 tex.src_sel_y = 0; 7571 tex.src_sel_z = 3; 7572 tex.src_sel_w = 2; /* route Z compare or Lod value into W */ 7573 } 7574 7575 if (inst->Texture.Texture != TGSI_TEXTURE_RECT && 7576 inst->Texture.Texture != TGSI_TEXTURE_SHADOWRECT) { 7577 tex.coord_type_x = 1; 7578 tex.coord_type_y = 1; 7579 } 7580 tex.coord_type_z = 1; 7581 tex.coord_type_w = 1; 7582 7583 tex.offset_x = offset_x; 7584 tex.offset_y = offset_y; 7585 if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 && 7586 (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 7587 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)) { 7588 tex.offset_z = 0; 7589 } 7590 else { 7591 tex.offset_z = offset_z; 7592 } 7593 7594 /* Put the depth for comparison in W. 7595 * TGSI_TEXTURE_SHADOW2D_ARRAY already has the depth in W. 7596 * Some instructions expect the depth in Z. */ 7597 if ((inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D || 7598 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D || 7599 inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT || 7600 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) && 7601 opcode != FETCH_OP_SAMPLE_C_L && 7602 opcode != FETCH_OP_SAMPLE_C_LB) { 7603 tex.src_sel_w = tex.src_sel_z; 7604 } 7605 7606 if (inst->Texture.Texture == TGSI_TEXTURE_1D_ARRAY || 7607 inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY) { 7608 if (opcode == FETCH_OP_SAMPLE_C_L || 7609 opcode == FETCH_OP_SAMPLE_C_LB) { 7610 /* the array index is read from Y */ 7611 tex.coord_type_y = 0; 7612 } else { 7613 /* the array index is read from Z */ 7614 tex.coord_type_z = 0; 7615 tex.src_sel_z = tex.src_sel_y; 7616 } 7617 } else if (inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY || 7618 inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY || 7619 ((inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY || 7620 inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) && 7621 (ctx->bc->chip_class >= EVERGREEN))) 7622 /* the array index is read from Z */ 7623 tex.coord_type_z = 0; 7624 7625 /* mask unused source components */ 7626 if (opcode == FETCH_OP_SAMPLE || opcode == FETCH_OP_GATHER4) { 7627 switch (inst->Texture.Texture) { 7628 case TGSI_TEXTURE_2D: 7629 case TGSI_TEXTURE_RECT: 7630 tex.src_sel_z = 7; 7631 tex.src_sel_w = 7; 7632 break; 7633 case TGSI_TEXTURE_1D_ARRAY: 7634 tex.src_sel_y = 7; 7635 tex.src_sel_w = 7; 7636 break; 7637 case TGSI_TEXTURE_1D: 7638 tex.src_sel_y = 7; 7639 tex.src_sel_z = 7; 7640 tex.src_sel_w = 7; 7641 break; 7642 } 7643 } 7644 7645 r = r600_bytecode_add_tex(ctx->bc, &tex); 7646 if (r) 7647 return r; 7648 7649 /* add shadow ambient support - gallium doesn't do it yet */ 7650 return 0; 7651 } 7652 7653 static int tgsi_lrp(struct r600_shader_ctx *ctx) 7654 { 7655 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7656 struct r600_bytecode_alu alu; 7657 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7658 unsigned i, temp_regs[2]; 7659 int r; 7660 7661 /* optimize if it's just an equal balance */ 7662 if (ctx->src[0].sel == V_SQ_ALU_SRC_0_5) { 7663 for (i = 0; i < lasti + 1; i++) { 7664 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7665 continue; 7666 7667 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7668 alu.op = ALU_OP2_ADD; 7669 r600_bytecode_src(&alu.src[0], &ctx->src[1], i); 7670 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 7671 alu.omod = 3; 7672 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7673 alu.dst.chan = i; 7674 if (i == lasti) { 7675 alu.last = 1; 7676 } 7677 r = r600_bytecode_add_alu(ctx->bc, &alu); 7678 if (r) 7679 return r; 7680 } 7681 return 0; 7682 } 7683 7684 /* 1 - src0 */ 7685 for (i = 0; i < lasti + 1; i++) { 7686 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7687 continue; 7688 7689 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7690 alu.op = ALU_OP2_ADD; 7691 alu.src[0].sel = V_SQ_ALU_SRC_1; 7692 alu.src[0].chan = 0; 7693 r600_bytecode_src(&alu.src[1], &ctx->src[0], i); 7694 r600_bytecode_src_toggle_neg(&alu.src[1]); 7695 alu.dst.sel = ctx->temp_reg; 7696 alu.dst.chan = i; 7697 if (i == lasti) { 7698 alu.last = 1; 7699 } 7700 alu.dst.write = 1; 7701 r = r600_bytecode_add_alu(ctx->bc, &alu); 7702 if (r) 7703 return r; 7704 } 7705 7706 /* (1 - src0) * src2 */ 7707 for (i = 0; i < lasti + 1; i++) { 7708 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7709 continue; 7710 7711 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7712 alu.op = ALU_OP2_MUL; 7713 alu.src[0].sel = ctx->temp_reg; 7714 alu.src[0].chan = i; 7715 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 7716 alu.dst.sel = ctx->temp_reg; 7717 alu.dst.chan = i; 7718 if (i == lasti) { 7719 alu.last = 1; 7720 } 7721 alu.dst.write = 1; 7722 r = r600_bytecode_add_alu(ctx->bc, &alu); 7723 if (r) 7724 return r; 7725 } 7726 7727 /* src0 * src1 + (1 - src0) * src2 */ 7728 if (ctx->src[0].abs) 7729 temp_regs[0] = r600_get_temp(ctx); 7730 else 7731 temp_regs[0] = 0; 7732 if (ctx->src[1].abs) 7733 temp_regs[1] = r600_get_temp(ctx); 7734 else 7735 temp_regs[1] = 0; 7736 7737 for (i = 0; i < lasti + 1; i++) { 7738 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7739 continue; 7740 7741 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7742 alu.op = ALU_OP3_MULADD; 7743 alu.is_op3 = 1; 7744 r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]); 7745 if (r) 7746 return r; 7747 r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[1], &ctx->src[1]); 7748 if (r) 7749 return r; 7750 alu.src[2].sel = ctx->temp_reg; 7751 alu.src[2].chan = i; 7752 7753 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7754 alu.dst.chan = i; 7755 if (i == lasti) { 7756 alu.last = 1; 7757 } 7758 r = r600_bytecode_add_alu(ctx->bc, &alu); 7759 if (r) 7760 return r; 7761 } 7762 return 0; 7763 } 7764 7765 static int tgsi_cmp(struct r600_shader_ctx *ctx) 7766 { 7767 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7768 struct r600_bytecode_alu alu; 7769 int i, r, j; 7770 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7771 int temp_regs[3]; 7772 unsigned op; 7773 7774 if (ctx->src[0].abs && ctx->src[0].neg) { 7775 op = ALU_OP3_CNDE; 7776 ctx->src[0].abs = 0; 7777 ctx->src[0].neg = 0; 7778 } else { 7779 op = ALU_OP3_CNDGE; 7780 } 7781 7782 for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { 7783 temp_regs[j] = 0; 7784 if (ctx->src[j].abs) 7785 temp_regs[j] = r600_get_temp(ctx); 7786 } 7787 7788 for (i = 0; i < lasti + 1; i++) { 7789 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7790 continue; 7791 7792 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7793 alu.op = op; 7794 r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]); 7795 if (r) 7796 return r; 7797 r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[1], &ctx->src[2]); 7798 if (r) 7799 return r; 7800 r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[2], &ctx->src[1]); 7801 if (r) 7802 return r; 7803 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7804 alu.dst.chan = i; 7805 alu.dst.write = 1; 7806 alu.is_op3 = 1; 7807 if (i == lasti) 7808 alu.last = 1; 7809 r = r600_bytecode_add_alu(ctx->bc, &alu); 7810 if (r) 7811 return r; 7812 } 7813 return 0; 7814 } 7815 7816 static int tgsi_ucmp(struct r600_shader_ctx *ctx) 7817 { 7818 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7819 struct r600_bytecode_alu alu; 7820 int i, r; 7821 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 7822 7823 for (i = 0; i < lasti + 1; i++) { 7824 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 7825 continue; 7826 7827 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7828 alu.op = ALU_OP3_CNDE_INT; 7829 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 7830 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 7831 r600_bytecode_src(&alu.src[2], &ctx->src[1], i); 7832 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7833 alu.dst.chan = i; 7834 alu.dst.write = 1; 7835 alu.is_op3 = 1; 7836 if (i == lasti) 7837 alu.last = 1; 7838 r = r600_bytecode_add_alu(ctx->bc, &alu); 7839 if (r) 7840 return r; 7841 } 7842 return 0; 7843 } 7844 7845 static int tgsi_xpd(struct r600_shader_ctx *ctx) 7846 { 7847 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7848 static const unsigned int src0_swizzle[] = {2, 0, 1}; 7849 static const unsigned int src1_swizzle[] = {1, 2, 0}; 7850 struct r600_bytecode_alu alu; 7851 uint32_t use_temp = 0; 7852 int i, r; 7853 7854 if (inst->Dst[0].Register.WriteMask != 0xf) 7855 use_temp = 1; 7856 7857 for (i = 0; i < 4; i++) { 7858 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7859 alu.op = ALU_OP2_MUL; 7860 if (i < 3) { 7861 r600_bytecode_src(&alu.src[0], &ctx->src[0], src0_swizzle[i]); 7862 r600_bytecode_src(&alu.src[1], &ctx->src[1], src1_swizzle[i]); 7863 } else { 7864 alu.src[0].sel = V_SQ_ALU_SRC_0; 7865 alu.src[0].chan = i; 7866 alu.src[1].sel = V_SQ_ALU_SRC_0; 7867 alu.src[1].chan = i; 7868 } 7869 7870 alu.dst.sel = ctx->temp_reg; 7871 alu.dst.chan = i; 7872 alu.dst.write = 1; 7873 7874 if (i == 3) 7875 alu.last = 1; 7876 r = r600_bytecode_add_alu(ctx->bc, &alu); 7877 if (r) 7878 return r; 7879 } 7880 7881 for (i = 0; i < 4; i++) { 7882 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7883 alu.op = ALU_OP3_MULADD; 7884 7885 if (i < 3) { 7886 r600_bytecode_src(&alu.src[0], &ctx->src[0], src1_swizzle[i]); 7887 r600_bytecode_src(&alu.src[1], &ctx->src[1], src0_swizzle[i]); 7888 } else { 7889 alu.src[0].sel = V_SQ_ALU_SRC_0; 7890 alu.src[0].chan = i; 7891 alu.src[1].sel = V_SQ_ALU_SRC_0; 7892 alu.src[1].chan = i; 7893 } 7894 7895 alu.src[2].sel = ctx->temp_reg; 7896 alu.src[2].neg = 1; 7897 alu.src[2].chan = i; 7898 7899 if (use_temp) 7900 alu.dst.sel = ctx->temp_reg; 7901 else 7902 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7903 alu.dst.chan = i; 7904 alu.dst.write = 1; 7905 alu.is_op3 = 1; 7906 if (i == 3) 7907 alu.last = 1; 7908 r = r600_bytecode_add_alu(ctx->bc, &alu); 7909 if (r) 7910 return r; 7911 } 7912 if (use_temp) 7913 return tgsi_helper_copy(ctx, inst); 7914 return 0; 7915 } 7916 7917 static int tgsi_exp(struct r600_shader_ctx *ctx) 7918 { 7919 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 7920 struct r600_bytecode_alu alu; 7921 int r; 7922 unsigned i; 7923 7924 /* result.x = 2^floor(src); */ 7925 if (inst->Dst[0].Register.WriteMask & 1) { 7926 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7927 7928 alu.op = ALU_OP1_FLOOR; 7929 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7930 7931 alu.dst.sel = ctx->temp_reg; 7932 alu.dst.chan = 0; 7933 alu.dst.write = 1; 7934 alu.last = 1; 7935 r = r600_bytecode_add_alu(ctx->bc, &alu); 7936 if (r) 7937 return r; 7938 7939 if (ctx->bc->chip_class == CAYMAN) { 7940 for (i = 0; i < 3; i++) { 7941 alu.op = ALU_OP1_EXP_IEEE; 7942 alu.src[0].sel = ctx->temp_reg; 7943 alu.src[0].chan = 0; 7944 7945 alu.dst.sel = ctx->temp_reg; 7946 alu.dst.chan = i; 7947 alu.dst.write = i == 0; 7948 alu.last = i == 2; 7949 r = r600_bytecode_add_alu(ctx->bc, &alu); 7950 if (r) 7951 return r; 7952 } 7953 } else { 7954 alu.op = ALU_OP1_EXP_IEEE; 7955 alu.src[0].sel = ctx->temp_reg; 7956 alu.src[0].chan = 0; 7957 7958 alu.dst.sel = ctx->temp_reg; 7959 alu.dst.chan = 0; 7960 alu.dst.write = 1; 7961 alu.last = 1; 7962 r = r600_bytecode_add_alu(ctx->bc, &alu); 7963 if (r) 7964 return r; 7965 } 7966 } 7967 7968 /* result.y = tmp - floor(tmp); */ 7969 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 7970 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7971 7972 alu.op = ALU_OP1_FRACT; 7973 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7974 7975 alu.dst.sel = ctx->temp_reg; 7976 #if 0 7977 r = tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 7978 if (r) 7979 return r; 7980 #endif 7981 alu.dst.write = 1; 7982 alu.dst.chan = 1; 7983 7984 alu.last = 1; 7985 7986 r = r600_bytecode_add_alu(ctx->bc, &alu); 7987 if (r) 7988 return r; 7989 } 7990 7991 /* result.z = RoughApprox2ToX(tmp);*/ 7992 if ((inst->Dst[0].Register.WriteMask >> 2) & 0x1) { 7993 if (ctx->bc->chip_class == CAYMAN) { 7994 for (i = 0; i < 3; i++) { 7995 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 7996 alu.op = ALU_OP1_EXP_IEEE; 7997 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 7998 7999 alu.dst.sel = ctx->temp_reg; 8000 alu.dst.chan = i; 8001 if (i == 2) { 8002 alu.dst.write = 1; 8003 alu.last = 1; 8004 } 8005 8006 r = r600_bytecode_add_alu(ctx->bc, &alu); 8007 if (r) 8008 return r; 8009 } 8010 } else { 8011 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8012 alu.op = ALU_OP1_EXP_IEEE; 8013 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8014 8015 alu.dst.sel = ctx->temp_reg; 8016 alu.dst.write = 1; 8017 alu.dst.chan = 2; 8018 8019 alu.last = 1; 8020 8021 r = r600_bytecode_add_alu(ctx->bc, &alu); 8022 if (r) 8023 return r; 8024 } 8025 } 8026 8027 /* result.w = 1.0;*/ 8028 if ((inst->Dst[0].Register.WriteMask >> 3) & 0x1) { 8029 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8030 8031 alu.op = ALU_OP1_MOV; 8032 alu.src[0].sel = V_SQ_ALU_SRC_1; 8033 alu.src[0].chan = 0; 8034 8035 alu.dst.sel = ctx->temp_reg; 8036 alu.dst.chan = 3; 8037 alu.dst.write = 1; 8038 alu.last = 1; 8039 r = r600_bytecode_add_alu(ctx->bc, &alu); 8040 if (r) 8041 return r; 8042 } 8043 return tgsi_helper_copy(ctx, inst); 8044 } 8045 8046 static int tgsi_log(struct r600_shader_ctx *ctx) 8047 { 8048 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8049 struct r600_bytecode_alu alu; 8050 int r; 8051 unsigned i; 8052 8053 /* result.x = floor(log2(|src|)); */ 8054 if (inst->Dst[0].Register.WriteMask & 1) { 8055 if (ctx->bc->chip_class == CAYMAN) { 8056 for (i = 0; i < 3; i++) { 8057 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8058 8059 alu.op = ALU_OP1_LOG_IEEE; 8060 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8061 r600_bytecode_src_set_abs(&alu.src[0]); 8062 8063 alu.dst.sel = ctx->temp_reg; 8064 alu.dst.chan = i; 8065 if (i == 0) 8066 alu.dst.write = 1; 8067 if (i == 2) 8068 alu.last = 1; 8069 r = r600_bytecode_add_alu(ctx->bc, &alu); 8070 if (r) 8071 return r; 8072 } 8073 8074 } else { 8075 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8076 8077 alu.op = ALU_OP1_LOG_IEEE; 8078 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8079 r600_bytecode_src_set_abs(&alu.src[0]); 8080 8081 alu.dst.sel = ctx->temp_reg; 8082 alu.dst.chan = 0; 8083 alu.dst.write = 1; 8084 alu.last = 1; 8085 r = r600_bytecode_add_alu(ctx->bc, &alu); 8086 if (r) 8087 return r; 8088 } 8089 8090 alu.op = ALU_OP1_FLOOR; 8091 alu.src[0].sel = ctx->temp_reg; 8092 alu.src[0].chan = 0; 8093 8094 alu.dst.sel = ctx->temp_reg; 8095 alu.dst.chan = 0; 8096 alu.dst.write = 1; 8097 alu.last = 1; 8098 8099 r = r600_bytecode_add_alu(ctx->bc, &alu); 8100 if (r) 8101 return r; 8102 } 8103 8104 /* result.y = |src.x| / (2 ^ floor(log2(|src.x|))); */ 8105 if ((inst->Dst[0].Register.WriteMask >> 1) & 1) { 8106 8107 if (ctx->bc->chip_class == CAYMAN) { 8108 for (i = 0; i < 3; i++) { 8109 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8110 8111 alu.op = ALU_OP1_LOG_IEEE; 8112 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8113 r600_bytecode_src_set_abs(&alu.src[0]); 8114 8115 alu.dst.sel = ctx->temp_reg; 8116 alu.dst.chan = i; 8117 if (i == 1) 8118 alu.dst.write = 1; 8119 if (i == 2) 8120 alu.last = 1; 8121 8122 r = r600_bytecode_add_alu(ctx->bc, &alu); 8123 if (r) 8124 return r; 8125 } 8126 } else { 8127 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8128 8129 alu.op = ALU_OP1_LOG_IEEE; 8130 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8131 r600_bytecode_src_set_abs(&alu.src[0]); 8132 8133 alu.dst.sel = ctx->temp_reg; 8134 alu.dst.chan = 1; 8135 alu.dst.write = 1; 8136 alu.last = 1; 8137 8138 r = r600_bytecode_add_alu(ctx->bc, &alu); 8139 if (r) 8140 return r; 8141 } 8142 8143 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8144 8145 alu.op = ALU_OP1_FLOOR; 8146 alu.src[0].sel = ctx->temp_reg; 8147 alu.src[0].chan = 1; 8148 8149 alu.dst.sel = ctx->temp_reg; 8150 alu.dst.chan = 1; 8151 alu.dst.write = 1; 8152 alu.last = 1; 8153 8154 r = r600_bytecode_add_alu(ctx->bc, &alu); 8155 if (r) 8156 return r; 8157 8158 if (ctx->bc->chip_class == CAYMAN) { 8159 for (i = 0; i < 3; i++) { 8160 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8161 alu.op = ALU_OP1_EXP_IEEE; 8162 alu.src[0].sel = ctx->temp_reg; 8163 alu.src[0].chan = 1; 8164 8165 alu.dst.sel = ctx->temp_reg; 8166 alu.dst.chan = i; 8167 if (i == 1) 8168 alu.dst.write = 1; 8169 if (i == 2) 8170 alu.last = 1; 8171 8172 r = r600_bytecode_add_alu(ctx->bc, &alu); 8173 if (r) 8174 return r; 8175 } 8176 } else { 8177 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8178 alu.op = ALU_OP1_EXP_IEEE; 8179 alu.src[0].sel = ctx->temp_reg; 8180 alu.src[0].chan = 1; 8181 8182 alu.dst.sel = ctx->temp_reg; 8183 alu.dst.chan = 1; 8184 alu.dst.write = 1; 8185 alu.last = 1; 8186 8187 r = r600_bytecode_add_alu(ctx->bc, &alu); 8188 if (r) 8189 return r; 8190 } 8191 8192 if (ctx->bc->chip_class == CAYMAN) { 8193 for (i = 0; i < 3; i++) { 8194 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8195 alu.op = ALU_OP1_RECIP_IEEE; 8196 alu.src[0].sel = ctx->temp_reg; 8197 alu.src[0].chan = 1; 8198 8199 alu.dst.sel = ctx->temp_reg; 8200 alu.dst.chan = i; 8201 if (i == 1) 8202 alu.dst.write = 1; 8203 if (i == 2) 8204 alu.last = 1; 8205 8206 r = r600_bytecode_add_alu(ctx->bc, &alu); 8207 if (r) 8208 return r; 8209 } 8210 } else { 8211 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8212 alu.op = ALU_OP1_RECIP_IEEE; 8213 alu.src[0].sel = ctx->temp_reg; 8214 alu.src[0].chan = 1; 8215 8216 alu.dst.sel = ctx->temp_reg; 8217 alu.dst.chan = 1; 8218 alu.dst.write = 1; 8219 alu.last = 1; 8220 8221 r = r600_bytecode_add_alu(ctx->bc, &alu); 8222 if (r) 8223 return r; 8224 } 8225 8226 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8227 8228 alu.op = ALU_OP2_MUL; 8229 8230 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8231 r600_bytecode_src_set_abs(&alu.src[0]); 8232 8233 alu.src[1].sel = ctx->temp_reg; 8234 alu.src[1].chan = 1; 8235 8236 alu.dst.sel = ctx->temp_reg; 8237 alu.dst.chan = 1; 8238 alu.dst.write = 1; 8239 alu.last = 1; 8240 8241 r = r600_bytecode_add_alu(ctx->bc, &alu); 8242 if (r) 8243 return r; 8244 } 8245 8246 /* result.z = log2(|src|);*/ 8247 if ((inst->Dst[0].Register.WriteMask >> 2) & 1) { 8248 if (ctx->bc->chip_class == CAYMAN) { 8249 for (i = 0; i < 3; i++) { 8250 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8251 8252 alu.op = ALU_OP1_LOG_IEEE; 8253 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8254 r600_bytecode_src_set_abs(&alu.src[0]); 8255 8256 alu.dst.sel = ctx->temp_reg; 8257 if (i == 2) 8258 alu.dst.write = 1; 8259 alu.dst.chan = i; 8260 if (i == 2) 8261 alu.last = 1; 8262 8263 r = r600_bytecode_add_alu(ctx->bc, &alu); 8264 if (r) 8265 return r; 8266 } 8267 } else { 8268 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8269 8270 alu.op = ALU_OP1_LOG_IEEE; 8271 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8272 r600_bytecode_src_set_abs(&alu.src[0]); 8273 8274 alu.dst.sel = ctx->temp_reg; 8275 alu.dst.write = 1; 8276 alu.dst.chan = 2; 8277 alu.last = 1; 8278 8279 r = r600_bytecode_add_alu(ctx->bc, &alu); 8280 if (r) 8281 return r; 8282 } 8283 } 8284 8285 /* result.w = 1.0; */ 8286 if ((inst->Dst[0].Register.WriteMask >> 3) & 1) { 8287 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8288 8289 alu.op = ALU_OP1_MOV; 8290 alu.src[0].sel = V_SQ_ALU_SRC_1; 8291 alu.src[0].chan = 0; 8292 8293 alu.dst.sel = ctx->temp_reg; 8294 alu.dst.chan = 3; 8295 alu.dst.write = 1; 8296 alu.last = 1; 8297 8298 r = r600_bytecode_add_alu(ctx->bc, &alu); 8299 if (r) 8300 return r; 8301 } 8302 8303 return tgsi_helper_copy(ctx, inst); 8304 } 8305 8306 static int tgsi_eg_arl(struct r600_shader_ctx *ctx) 8307 { 8308 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8309 struct r600_bytecode_alu alu; 8310 int r; 8311 int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 8312 unsigned reg = get_address_file_reg(ctx, inst->Dst[0].Register.Index); 8313 8314 assert(inst->Dst[0].Register.Index < 3); 8315 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8316 8317 switch (inst->Instruction.Opcode) { 8318 case TGSI_OPCODE_ARL: 8319 alu.op = ALU_OP1_FLT_TO_INT_FLOOR; 8320 break; 8321 case TGSI_OPCODE_ARR: 8322 alu.op = ALU_OP1_FLT_TO_INT; 8323 break; 8324 case TGSI_OPCODE_UARL: 8325 alu.op = ALU_OP1_MOV; 8326 break; 8327 default: 8328 assert(0); 8329 return -1; 8330 } 8331 8332 for (i = 0; i <= lasti; ++i) { 8333 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 8334 continue; 8335 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8336 alu.last = i == lasti; 8337 alu.dst.sel = reg; 8338 alu.dst.chan = i; 8339 alu.dst.write = 1; 8340 r = r600_bytecode_add_alu(ctx->bc, &alu); 8341 if (r) 8342 return r; 8343 } 8344 8345 if (inst->Dst[0].Register.Index > 0) 8346 ctx->bc->index_loaded[inst->Dst[0].Register.Index - 1] = 0; 8347 else 8348 ctx->bc->ar_loaded = 0; 8349 8350 return 0; 8351 } 8352 static int tgsi_r600_arl(struct r600_shader_ctx *ctx) 8353 { 8354 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8355 struct r600_bytecode_alu alu; 8356 int r; 8357 int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 8358 8359 switch (inst->Instruction.Opcode) { 8360 case TGSI_OPCODE_ARL: 8361 memset(&alu, 0, sizeof(alu)); 8362 alu.op = ALU_OP1_FLOOR; 8363 alu.dst.sel = ctx->bc->ar_reg; 8364 alu.dst.write = 1; 8365 for (i = 0; i <= lasti; ++i) { 8366 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 8367 alu.dst.chan = i; 8368 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8369 alu.last = i == lasti; 8370 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 8371 return r; 8372 } 8373 } 8374 8375 memset(&alu, 0, sizeof(alu)); 8376 alu.op = ALU_OP1_FLT_TO_INT; 8377 alu.src[0].sel = ctx->bc->ar_reg; 8378 alu.dst.sel = ctx->bc->ar_reg; 8379 alu.dst.write = 1; 8380 /* FLT_TO_INT is trans-only on r600/r700 */ 8381 alu.last = TRUE; 8382 for (i = 0; i <= lasti; ++i) { 8383 alu.dst.chan = i; 8384 alu.src[0].chan = i; 8385 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 8386 return r; 8387 } 8388 break; 8389 case TGSI_OPCODE_ARR: 8390 memset(&alu, 0, sizeof(alu)); 8391 alu.op = ALU_OP1_FLT_TO_INT; 8392 alu.dst.sel = ctx->bc->ar_reg; 8393 alu.dst.write = 1; 8394 /* FLT_TO_INT is trans-only on r600/r700 */ 8395 alu.last = TRUE; 8396 for (i = 0; i <= lasti; ++i) { 8397 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 8398 alu.dst.chan = i; 8399 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8400 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 8401 return r; 8402 } 8403 } 8404 break; 8405 case TGSI_OPCODE_UARL: 8406 memset(&alu, 0, sizeof(alu)); 8407 alu.op = ALU_OP1_MOV; 8408 alu.dst.sel = ctx->bc->ar_reg; 8409 alu.dst.write = 1; 8410 for (i = 0; i <= lasti; ++i) { 8411 if (inst->Dst[0].Register.WriteMask & (1 << i)) { 8412 alu.dst.chan = i; 8413 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8414 alu.last = i == lasti; 8415 if ((r = r600_bytecode_add_alu(ctx->bc, &alu))) 8416 return r; 8417 } 8418 } 8419 break; 8420 default: 8421 assert(0); 8422 return -1; 8423 } 8424 8425 ctx->bc->ar_loaded = 0; 8426 return 0; 8427 } 8428 8429 static int tgsi_opdst(struct r600_shader_ctx *ctx) 8430 { 8431 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8432 struct r600_bytecode_alu alu; 8433 int i, r = 0; 8434 8435 for (i = 0; i < 4; i++) { 8436 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8437 8438 alu.op = ALU_OP2_MUL; 8439 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 8440 8441 if (i == 0 || i == 3) { 8442 alu.src[0].sel = V_SQ_ALU_SRC_1; 8443 } else { 8444 r600_bytecode_src(&alu.src[0], &ctx->src[0], i); 8445 } 8446 8447 if (i == 0 || i == 2) { 8448 alu.src[1].sel = V_SQ_ALU_SRC_1; 8449 } else { 8450 r600_bytecode_src(&alu.src[1], &ctx->src[1], i); 8451 } 8452 if (i == 3) 8453 alu.last = 1; 8454 r = r600_bytecode_add_alu(ctx->bc, &alu); 8455 if (r) 8456 return r; 8457 } 8458 return 0; 8459 } 8460 8461 static int emit_logic_pred(struct r600_shader_ctx *ctx, int opcode, int alu_type) 8462 { 8463 struct r600_bytecode_alu alu; 8464 int r; 8465 8466 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8467 alu.op = opcode; 8468 alu.execute_mask = 1; 8469 alu.update_pred = 1; 8470 8471 alu.dst.sel = ctx->temp_reg; 8472 alu.dst.write = 1; 8473 alu.dst.chan = 0; 8474 8475 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8476 alu.src[1].sel = V_SQ_ALU_SRC_0; 8477 alu.src[1].chan = 0; 8478 8479 alu.last = 1; 8480 8481 r = r600_bytecode_add_alu_type(ctx->bc, &alu, alu_type); 8482 if (r) 8483 return r; 8484 return 0; 8485 } 8486 8487 static int pops(struct r600_shader_ctx *ctx, int pops) 8488 { 8489 unsigned force_pop = ctx->bc->force_add_cf; 8490 8491 if (!force_pop) { 8492 int alu_pop = 3; 8493 if (ctx->bc->cf_last) { 8494 if (ctx->bc->cf_last->op == CF_OP_ALU) 8495 alu_pop = 0; 8496 else if (ctx->bc->cf_last->op == CF_OP_ALU_POP_AFTER) 8497 alu_pop = 1; 8498 } 8499 alu_pop += pops; 8500 if (alu_pop == 1) { 8501 ctx->bc->cf_last->op = CF_OP_ALU_POP_AFTER; 8502 ctx->bc->force_add_cf = 1; 8503 } else if (alu_pop == 2) { 8504 ctx->bc->cf_last->op = CF_OP_ALU_POP2_AFTER; 8505 ctx->bc->force_add_cf = 1; 8506 } else { 8507 force_pop = 1; 8508 } 8509 } 8510 8511 if (force_pop) { 8512 r600_bytecode_add_cfinst(ctx->bc, CF_OP_POP); 8513 ctx->bc->cf_last->pop_count = pops; 8514 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 8515 } 8516 8517 return 0; 8518 } 8519 8520 static inline void callstack_update_max_depth(struct r600_shader_ctx *ctx, 8521 unsigned reason) 8522 { 8523 struct r600_stack_info *stack = &ctx->bc->stack; 8524 unsigned elements, entries; 8525 8526 unsigned entry_size = stack->entry_size; 8527 8528 elements = (stack->loop + stack->push_wqm ) * entry_size; 8529 elements += stack->push; 8530 8531 switch (ctx->bc->chip_class) { 8532 case R600: 8533 case R700: 8534 /* pre-r8xx: if any non-WQM PUSH instruction is invoked, 2 elements on 8535 * the stack must be reserved to hold the current active/continue 8536 * masks */ 8537 if (reason == FC_PUSH_VPM) { 8538 elements += 2; 8539 } 8540 break; 8541 8542 case CAYMAN: 8543 /* r9xx: any stack operation on empty stack consumes 2 additional 8544 * elements */ 8545 elements += 2; 8546 8547 /* fallthrough */ 8548 /* FIXME: do the two elements added above cover the cases for the 8549 * r8xx+ below? */ 8550 8551 case EVERGREEN: 8552 /* r8xx+: 2 extra elements are not always required, but one extra 8553 * element must be added for each of the following cases: 8554 * 1. There is an ALU_ELSE_AFTER instruction at the point of greatest 8555 * stack usage. 8556 * (Currently we don't use ALU_ELSE_AFTER.) 8557 * 2. There are LOOP/WQM frames on the stack when any flavor of non-WQM 8558 * PUSH instruction executed. 8559 * 8560 * NOTE: it seems we also need to reserve additional element in some 8561 * other cases, e.g. when we have 4 levels of PUSH_VPM in the shader, 8562 * then STACK_SIZE should be 2 instead of 1 */ 8563 if (reason == FC_PUSH_VPM) { 8564 elements += 1; 8565 } 8566 break; 8567 8568 default: 8569 assert(0); 8570 break; 8571 } 8572 8573 /* NOTE: it seems STACK_SIZE is interpreted by hw as if entry_size is 4 8574 * for all chips, so we use 4 in the final formula, not the real entry_size 8575 * for the chip */ 8576 entry_size = 4; 8577 8578 entries = (elements + (entry_size - 1)) / entry_size; 8579 8580 if (entries > stack->max_entries) 8581 stack->max_entries = entries; 8582 } 8583 8584 static inline void callstack_pop(struct r600_shader_ctx *ctx, unsigned reason) 8585 { 8586 switch(reason) { 8587 case FC_PUSH_VPM: 8588 --ctx->bc->stack.push; 8589 assert(ctx->bc->stack.push >= 0); 8590 break; 8591 case FC_PUSH_WQM: 8592 --ctx->bc->stack.push_wqm; 8593 assert(ctx->bc->stack.push_wqm >= 0); 8594 break; 8595 case FC_LOOP: 8596 --ctx->bc->stack.loop; 8597 assert(ctx->bc->stack.loop >= 0); 8598 break; 8599 default: 8600 assert(0); 8601 break; 8602 } 8603 } 8604 8605 static inline void callstack_push(struct r600_shader_ctx *ctx, unsigned reason) 8606 { 8607 switch (reason) { 8608 case FC_PUSH_VPM: 8609 ++ctx->bc->stack.push; 8610 break; 8611 case FC_PUSH_WQM: 8612 ++ctx->bc->stack.push_wqm; 8613 case FC_LOOP: 8614 ++ctx->bc->stack.loop; 8615 break; 8616 default: 8617 assert(0); 8618 } 8619 8620 callstack_update_max_depth(ctx, reason); 8621 } 8622 8623 static void fc_set_mid(struct r600_shader_ctx *ctx, int fc_sp) 8624 { 8625 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[fc_sp]; 8626 8627 sp->mid = realloc((void *)sp->mid, 8628 sizeof(struct r600_bytecode_cf *) * (sp->num_mid + 1)); 8629 sp->mid[sp->num_mid] = ctx->bc->cf_last; 8630 sp->num_mid++; 8631 } 8632 8633 static void fc_pushlevel(struct r600_shader_ctx *ctx, int type) 8634 { 8635 ctx->bc->fc_sp++; 8636 ctx->bc->fc_stack[ctx->bc->fc_sp].type = type; 8637 ctx->bc->fc_stack[ctx->bc->fc_sp].start = ctx->bc->cf_last; 8638 } 8639 8640 static void fc_poplevel(struct r600_shader_ctx *ctx) 8641 { 8642 struct r600_cf_stack_entry *sp = &ctx->bc->fc_stack[ctx->bc->fc_sp]; 8643 free(sp->mid); 8644 sp->mid = NULL; 8645 sp->num_mid = 0; 8646 sp->start = NULL; 8647 sp->type = 0; 8648 ctx->bc->fc_sp--; 8649 } 8650 8651 #if 0 8652 static int emit_return(struct r600_shader_ctx *ctx) 8653 { 8654 r600_bytecode_add_cfinst(ctx->bc, CF_OP_RETURN)); 8655 return 0; 8656 } 8657 8658 static int emit_jump_to_offset(struct r600_shader_ctx *ctx, int pops, int offset) 8659 { 8660 8661 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP)); 8662 ctx->bc->cf_last->pop_count = pops; 8663 /* XXX work out offset */ 8664 return 0; 8665 } 8666 8667 static int emit_setret_in_loop_flag(struct r600_shader_ctx *ctx, unsigned flag_value) 8668 { 8669 return 0; 8670 } 8671 8672 static void emit_testflag(struct r600_shader_ctx *ctx) 8673 { 8674 8675 } 8676 8677 static void emit_return_on_flag(struct r600_shader_ctx *ctx, unsigned ifidx) 8678 { 8679 emit_testflag(ctx); 8680 emit_jump_to_offset(ctx, 1, 4); 8681 emit_setret_in_loop_flag(ctx, V_SQ_ALU_SRC_0); 8682 pops(ctx, ifidx + 1); 8683 emit_return(ctx); 8684 } 8685 8686 static void break_loop_on_flag(struct r600_shader_ctx *ctx, unsigned fc_sp) 8687 { 8688 emit_testflag(ctx); 8689 8690 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 8691 ctx->bc->cf_last->pop_count = 1; 8692 8693 fc_set_mid(ctx, fc_sp); 8694 8695 pops(ctx, 1); 8696 } 8697 #endif 8698 8699 static int emit_if(struct r600_shader_ctx *ctx, int opcode) 8700 { 8701 int alu_type = CF_OP_ALU_PUSH_BEFORE; 8702 8703 /* There is a hardware bug on Cayman where a BREAK/CONTINUE followed by 8704 * LOOP_STARTxxx for nested loops may put the branch stack into a state 8705 * such that ALU_PUSH_BEFORE doesn't work as expected. Workaround this 8706 * by replacing the ALU_PUSH_BEFORE with a PUSH + ALU */ 8707 if (ctx->bc->chip_class == CAYMAN && ctx->bc->stack.loop > 1) { 8708 r600_bytecode_add_cfinst(ctx->bc, CF_OP_PUSH); 8709 ctx->bc->cf_last->cf_addr = ctx->bc->cf_last->id + 2; 8710 alu_type = CF_OP_ALU; 8711 } 8712 8713 emit_logic_pred(ctx, opcode, alu_type); 8714 8715 r600_bytecode_add_cfinst(ctx->bc, CF_OP_JUMP); 8716 8717 fc_pushlevel(ctx, FC_IF); 8718 8719 callstack_push(ctx, FC_PUSH_VPM); 8720 return 0; 8721 } 8722 8723 static int tgsi_if(struct r600_shader_ctx *ctx) 8724 { 8725 return emit_if(ctx, ALU_OP2_PRED_SETNE); 8726 } 8727 8728 static int tgsi_uif(struct r600_shader_ctx *ctx) 8729 { 8730 return emit_if(ctx, ALU_OP2_PRED_SETNE_INT); 8731 } 8732 8733 static int tgsi_else(struct r600_shader_ctx *ctx) 8734 { 8735 r600_bytecode_add_cfinst(ctx->bc, CF_OP_ELSE); 8736 ctx->bc->cf_last->pop_count = 1; 8737 8738 fc_set_mid(ctx, ctx->bc->fc_sp); 8739 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id; 8740 return 0; 8741 } 8742 8743 static int tgsi_endif(struct r600_shader_ctx *ctx) 8744 { 8745 pops(ctx, 1); 8746 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_IF) { 8747 R600_ERR("if/endif unbalanced in shader\n"); 8748 return -1; 8749 } 8750 8751 if (ctx->bc->fc_stack[ctx->bc->fc_sp].mid == NULL) { 8752 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 8753 ctx->bc->fc_stack[ctx->bc->fc_sp].start->pop_count = 1; 8754 } else { 8755 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[0]->cf_addr = ctx->bc->cf_last->id + 2; 8756 } 8757 fc_poplevel(ctx); 8758 8759 callstack_pop(ctx, FC_PUSH_VPM); 8760 return 0; 8761 } 8762 8763 static int tgsi_bgnloop(struct r600_shader_ctx *ctx) 8764 { 8765 /* LOOP_START_DX10 ignores the LOOP_CONFIG* registers, so it is not 8766 * limited to 4096 iterations, like the other LOOP_* instructions. */ 8767 r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_START_DX10); 8768 8769 fc_pushlevel(ctx, FC_LOOP); 8770 8771 /* check stack depth */ 8772 callstack_push(ctx, FC_LOOP); 8773 return 0; 8774 } 8775 8776 static int tgsi_endloop(struct r600_shader_ctx *ctx) 8777 { 8778 unsigned i; 8779 8780 r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_END); 8781 8782 if (ctx->bc->fc_stack[ctx->bc->fc_sp].type != FC_LOOP) { 8783 R600_ERR("loop/endloop in shader code are not paired.\n"); 8784 return -EINVAL; 8785 } 8786 8787 /* fixup loop pointers - from r600isa 8788 LOOP END points to CF after LOOP START, 8789 LOOP START point to CF after LOOP END 8790 BRK/CONT point to LOOP END CF 8791 */ 8792 ctx->bc->cf_last->cf_addr = ctx->bc->fc_stack[ctx->bc->fc_sp].start->id + 2; 8793 8794 ctx->bc->fc_stack[ctx->bc->fc_sp].start->cf_addr = ctx->bc->cf_last->id + 2; 8795 8796 for (i = 0; i < ctx->bc->fc_stack[ctx->bc->fc_sp].num_mid; i++) { 8797 ctx->bc->fc_stack[ctx->bc->fc_sp].mid[i]->cf_addr = ctx->bc->cf_last->id; 8798 } 8799 /* XXX add LOOPRET support */ 8800 fc_poplevel(ctx); 8801 callstack_pop(ctx, FC_LOOP); 8802 return 0; 8803 } 8804 8805 static int tgsi_loop_breakc(struct r600_shader_ctx *ctx) 8806 { 8807 int r; 8808 unsigned int fscp; 8809 8810 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 8811 { 8812 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 8813 break; 8814 } 8815 if (fscp == 0) { 8816 R600_ERR("BREAKC not inside loop/endloop pair\n"); 8817 return -EINVAL; 8818 } 8819 8820 if (ctx->bc->chip_class == EVERGREEN && 8821 ctx->bc->family != CHIP_CYPRESS && 8822 ctx->bc->family != CHIP_JUNIPER) { 8823 /* HW bug: ALU_BREAK does not save the active mask correctly */ 8824 r = tgsi_uif(ctx); 8825 if (r) 8826 return r; 8827 8828 r = r600_bytecode_add_cfinst(ctx->bc, CF_OP_LOOP_BREAK); 8829 if (r) 8830 return r; 8831 fc_set_mid(ctx, fscp); 8832 8833 return tgsi_endif(ctx); 8834 } else { 8835 r = emit_logic_pred(ctx, ALU_OP2_PRED_SETE_INT, CF_OP_ALU_BREAK); 8836 if (r) 8837 return r; 8838 fc_set_mid(ctx, fscp); 8839 } 8840 8841 return 0; 8842 } 8843 8844 static int tgsi_loop_brk_cont(struct r600_shader_ctx *ctx) 8845 { 8846 unsigned int fscp; 8847 8848 for (fscp = ctx->bc->fc_sp; fscp > 0; fscp--) 8849 { 8850 if (FC_LOOP == ctx->bc->fc_stack[fscp].type) 8851 break; 8852 } 8853 8854 if (fscp == 0) { 8855 R600_ERR("Break not inside loop/endloop pair\n"); 8856 return -EINVAL; 8857 } 8858 8859 r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 8860 8861 fc_set_mid(ctx, fscp); 8862 8863 return 0; 8864 } 8865 8866 static int tgsi_gs_emit(struct r600_shader_ctx *ctx) 8867 { 8868 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8869 int stream = ctx->literals[inst->Src[0].Register.Index * 4 + inst->Src[0].Register.SwizzleX]; 8870 int r; 8871 8872 if (ctx->inst_info->op == CF_OP_EMIT_VERTEX) 8873 emit_gs_ring_writes(ctx, ctx->gs_stream_output_info, stream, TRUE); 8874 8875 r = r600_bytecode_add_cfinst(ctx->bc, ctx->inst_info->op); 8876 if (!r) { 8877 ctx->bc->cf_last->count = stream; // Count field for CUT/EMIT_VERTEX indicates which stream 8878 if (ctx->inst_info->op == CF_OP_EMIT_VERTEX) 8879 return emit_inc_ring_offset(ctx, stream, TRUE); 8880 } 8881 return r; 8882 } 8883 8884 static int tgsi_umad(struct r600_shader_ctx *ctx) 8885 { 8886 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8887 struct r600_bytecode_alu alu; 8888 int i, j, k, r; 8889 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 8890 8891 /* src0 * src1 */ 8892 for (i = 0; i < lasti + 1; i++) { 8893 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 8894 continue; 8895 8896 if (ctx->bc->chip_class == CAYMAN) { 8897 for (j = 0 ; j < 4; j++) { 8898 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8899 8900 alu.op = ALU_OP2_MULLO_UINT; 8901 for (k = 0; k < inst->Instruction.NumSrcRegs; k++) { 8902 r600_bytecode_src(&alu.src[k], &ctx->src[k], i); 8903 } 8904 alu.dst.chan = j; 8905 alu.dst.sel = ctx->temp_reg; 8906 alu.dst.write = (j == i); 8907 if (j == 3) 8908 alu.last = 1; 8909 r = r600_bytecode_add_alu(ctx->bc, &alu); 8910 if (r) 8911 return r; 8912 } 8913 } else { 8914 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8915 8916 alu.dst.chan = i; 8917 alu.dst.sel = ctx->temp_reg; 8918 alu.dst.write = 1; 8919 8920 alu.op = ALU_OP2_MULLO_UINT; 8921 for (j = 0; j < 2; j++) { 8922 r600_bytecode_src(&alu.src[j], &ctx->src[j], i); 8923 } 8924 8925 alu.last = 1; 8926 r = r600_bytecode_add_alu(ctx->bc, &alu); 8927 if (r) 8928 return r; 8929 } 8930 } 8931 8932 8933 for (i = 0; i < lasti + 1; i++) { 8934 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 8935 continue; 8936 8937 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8938 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 8939 8940 alu.op = ALU_OP2_ADD_INT; 8941 8942 alu.src[0].sel = ctx->temp_reg; 8943 alu.src[0].chan = i; 8944 8945 r600_bytecode_src(&alu.src[1], &ctx->src[2], i); 8946 if (i == lasti) { 8947 alu.last = 1; 8948 } 8949 r = r600_bytecode_add_alu(ctx->bc, &alu); 8950 if (r) 8951 return r; 8952 } 8953 return 0; 8954 } 8955 8956 static int tgsi_pk2h(struct r600_shader_ctx *ctx) 8957 { 8958 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 8959 struct r600_bytecode_alu alu; 8960 int r, i; 8961 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 8962 8963 /* temp.xy = f32_to_f16(src) */ 8964 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8965 alu.op = ALU_OP1_FLT32_TO_FLT16; 8966 alu.dst.chan = 0; 8967 alu.dst.sel = ctx->temp_reg; 8968 alu.dst.write = 1; 8969 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 8970 r = r600_bytecode_add_alu(ctx->bc, &alu); 8971 if (r) 8972 return r; 8973 alu.dst.chan = 1; 8974 r600_bytecode_src(&alu.src[0], &ctx->src[0], 1); 8975 alu.last = 1; 8976 r = r600_bytecode_add_alu(ctx->bc, &alu); 8977 if (r) 8978 return r; 8979 8980 /* dst.x = temp.y * 0x10000 + temp.x */ 8981 for (i = 0; i < lasti + 1; i++) { 8982 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 8983 continue; 8984 8985 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 8986 alu.op = ALU_OP3_MULADD_UINT24; 8987 alu.is_op3 = 1; 8988 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 8989 alu.last = i == lasti; 8990 alu.src[0].sel = ctx->temp_reg; 8991 alu.src[0].chan = 1; 8992 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 8993 alu.src[1].value = 0x10000; 8994 alu.src[2].sel = ctx->temp_reg; 8995 alu.src[2].chan = 0; 8996 r = r600_bytecode_add_alu(ctx->bc, &alu); 8997 if (r) 8998 return r; 8999 } 9000 9001 return 0; 9002 } 9003 9004 static int tgsi_up2h(struct r600_shader_ctx *ctx) 9005 { 9006 struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction; 9007 struct r600_bytecode_alu alu; 9008 int r, i; 9009 int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask); 9010 9011 /* temp.x = src.x */ 9012 /* note: no need to mask out the high bits */ 9013 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9014 alu.op = ALU_OP1_MOV; 9015 alu.dst.chan = 0; 9016 alu.dst.sel = ctx->temp_reg; 9017 alu.dst.write = 1; 9018 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 9019 r = r600_bytecode_add_alu(ctx->bc, &alu); 9020 if (r) 9021 return r; 9022 9023 /* temp.y = src.x >> 16 */ 9024 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9025 alu.op = ALU_OP2_LSHR_INT; 9026 alu.dst.chan = 1; 9027 alu.dst.sel = ctx->temp_reg; 9028 alu.dst.write = 1; 9029 r600_bytecode_src(&alu.src[0], &ctx->src[0], 0); 9030 alu.src[1].sel = V_SQ_ALU_SRC_LITERAL; 9031 alu.src[1].value = 16; 9032 alu.last = 1; 9033 r = r600_bytecode_add_alu(ctx->bc, &alu); 9034 if (r) 9035 return r; 9036 9037 /* dst.wz = dst.xy = f16_to_f32(temp.xy) */ 9038 for (i = 0; i < lasti + 1; i++) { 9039 if (!(inst->Dst[0].Register.WriteMask & (1 << i))) 9040 continue; 9041 memset(&alu, 0, sizeof(struct r600_bytecode_alu)); 9042 tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); 9043 alu.op = ALU_OP1_FLT16_TO_FLT32; 9044 alu.src[0].sel = ctx->temp_reg; 9045 alu.src[0].chan = i % 2; 9046 alu.last = i == lasti; 9047 r = r600_bytecode_add_alu(ctx->bc, &alu); 9048 if (r) 9049 return r; 9050 } 9051 9052 return 0; 9053 } 9054 9055 static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { 9056 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_r600_arl}, 9057 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 9058 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 9059 9060 /* XXX: 9061 * For state trackers other than OpenGL, we'll want to use 9062 * _RECIP_IEEE instead. 9063 */ 9064 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_CLAMPED, tgsi_trans_srcx_replicate}, 9065 9066 [TGSI_OPCODE_RSQ] = { ALU_OP0_NOP, tgsi_rsq}, 9067 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 9068 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 9069 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 9070 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 9071 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 9072 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 9073 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 9074 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 9075 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 9076 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 9077 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 9078 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 9079 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 9080 [TGSI_OPCODE_FMA] = { ALU_OP0_NOP, tgsi_unsupported}, 9081 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, 9082 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 9083 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 9084 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 9085 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 9086 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 9087 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 9088 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 9089 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 9090 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 9091 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, 9092 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 9093 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 9094 [33] = { ALU_OP0_NOP, tgsi_unsupported}, 9095 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 9096 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 9097 [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, 9098 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9099 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9100 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 9101 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_unsupported}, 9102 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9103 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9104 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9105 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 9106 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 9107 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 9108 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 9109 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, 9110 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 9111 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 9112 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 9113 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 9114 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 9115 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 9116 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_unsupported}, 9117 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9118 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9119 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9120 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 9121 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 9122 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_r600_arl}, 9123 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 9124 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 9125 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 9126 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 9127 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 9128 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 9129 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9130 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 9131 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 9132 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 9133 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9134 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 9135 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 9136 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 9137 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 9138 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 9139 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 9140 [TGSI_OPCODE_DDX_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, 9141 [TGSI_OPCODE_DDY_FINE] = { ALU_OP0_NOP, tgsi_unsupported}, 9142 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 9143 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 9144 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 9145 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 9146 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 9147 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 9148 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2_trans}, 9149 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 9150 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 9151 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 9152 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 9153 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 9154 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9155 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 9156 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9157 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 9158 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 9159 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 9160 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 9161 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9162 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 9163 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9164 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9165 [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 9166 [TGSI_OPCODE_RESQ] = { ALU_OP0_NOP, tgsi_unsupported}, 9167 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 9168 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 9169 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 9170 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 9171 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 9172 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 9173 [TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported}, 9174 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 9175 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 9176 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_loop_breakc}, 9177 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 9178 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 9179 [TGSI_OPCODE_DFMA] = { ALU_OP0_NOP, tgsi_unsupported}, 9180 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2_trans}, 9181 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 9182 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 9183 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 9184 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 9185 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 9186 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2_trans}, 9187 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 9188 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2_trans}, 9189 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 9190 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 9191 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 9192 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 9193 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 9194 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 9195 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 9196 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 9197 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 9198 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 9199 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2_trans}, 9200 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 9201 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2_swap}, 9202 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9203 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 9204 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 9205 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9206 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 9207 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 9208 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 9209 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 9210 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 9211 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 9212 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 9213 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 9214 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 9215 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 9216 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 9217 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 9218 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_r600_arl}, 9219 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 9220 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 9221 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 9222 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9223 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 9224 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9225 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9226 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9227 [TGSI_OPCODE_BARRIER] = { ALU_OP0_NOP, tgsi_unsupported}, 9228 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 9229 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 9230 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 9231 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 9232 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9233 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9234 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9235 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9236 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9237 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9238 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 9239 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9240 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9241 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, 9242 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, 9243 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_unsupported}, 9244 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_unsupported}, 9245 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_unsupported}, 9246 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_unsupported}, 9247 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_unsupported}, 9248 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_unsupported}, 9249 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_unsupported}, 9250 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_unsupported}, 9251 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_unsupported}, 9252 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_unsupported}, 9253 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_unsupported}, 9254 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_unsupported}, 9255 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_unsupported}, 9256 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 9257 }; 9258 9259 static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { 9260 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, 9261 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 9262 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 9263 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, tgsi_trans_srcx_replicate}, 9264 [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, tgsi_rsq}, 9265 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 9266 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 9267 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 9268 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 9269 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 9270 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 9271 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 9272 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 9273 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 9274 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 9275 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 9276 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 9277 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 9278 [TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3}, 9279 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, 9280 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 9281 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 9282 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 9283 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 9284 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 9285 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 9286 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 9287 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, tgsi_trans_srcx_replicate}, 9288 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, 9289 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, tgsi_pow}, 9290 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 9291 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 9292 [33] = { ALU_OP0_NOP, tgsi_unsupported}, 9293 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 9294 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 9295 [TGSI_OPCODE_COS] = { ALU_OP1_COS, tgsi_trig}, 9296 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9297 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9298 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 9299 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_pk2h}, 9300 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9301 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9302 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9303 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 9304 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 9305 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 9306 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 9307 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, tgsi_trig}, 9308 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 9309 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 9310 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 9311 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 9312 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 9313 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 9314 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_up2h}, 9315 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9316 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9317 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9318 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 9319 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 9320 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, 9321 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 9322 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 9323 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 9324 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 9325 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 9326 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 9327 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9328 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 9329 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 9330 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 9331 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9332 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 9333 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 9334 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 9335 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 9336 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 9337 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 9338 [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9339 [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9340 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 9341 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 9342 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 9343 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2_trans}, 9344 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 9345 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 9346 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, 9347 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 9348 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 9349 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 9350 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 9351 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 9352 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9353 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 9354 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9355 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 9356 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 9357 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 9358 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 9359 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9360 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 9361 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9362 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9363 [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 9364 [TGSI_OPCODE_RESQ] = { ALU_OP0_NOP, tgsi_unsupported}, 9365 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 9366 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 9367 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 9368 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 9369 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 9370 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 9371 [TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported}, 9372 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 9373 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 9374 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported}, 9375 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 9376 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 9377 /* Refer below for TGSI_OPCODE_DFMA */ 9378 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_f2i}, 9379 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 9380 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 9381 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 9382 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 9383 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 9384 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, 9385 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 9386 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_f2i}, 9387 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2_trans}, 9388 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 9389 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 9390 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 9391 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 9392 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 9393 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 9394 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_UINT, tgsi_op2_trans}, 9395 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 9396 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 9397 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, 9398 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 9399 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, 9400 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9401 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 9402 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 9403 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9404 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 9405 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 9406 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 9407 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 9408 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 9409 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 9410 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 9411 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 9412 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 9413 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 9414 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 9415 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 9416 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, 9417 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 9418 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 9419 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 9420 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9421 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 9422 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9423 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9424 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9425 [TGSI_OPCODE_BARRIER] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier}, 9426 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 9427 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 9428 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 9429 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 9430 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9431 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9432 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9433 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9434 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9435 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9436 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 9437 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9438 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9439 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, tgsi_op2_trans}, 9440 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, tgsi_op2_trans}, 9441 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, 9442 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, 9443 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_op3}, 9444 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_op3}, 9445 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, 9446 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, 9447 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, 9448 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, 9449 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, 9450 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, 9451 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9452 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9453 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9454 [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64}, 9455 [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest}, 9456 [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64}, 9457 [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, 9458 [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, 9459 [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, 9460 [TGSI_OPCODE_DDIV] = { 0, cayman_ddiv_instr }, 9461 [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, 9462 [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, 9463 [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, 9464 [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest}, 9465 [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest}, 9466 [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest}, 9467 [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, 9468 [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, 9469 [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, 9470 [TGSI_OPCODE_DFMA] = { ALU_OP3_FMA_64, tgsi_op3_64}, 9471 [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, 9472 [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, 9473 [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp}, 9474 [TGSI_OPCODE_D2I] = { ALU_OP1_FLT_TO_INT, egcm_double_to_int}, 9475 [TGSI_OPCODE_I2D] = { ALU_OP1_INT_TO_FLT, egcm_int_to_double}, 9476 [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int}, 9477 [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double}, 9478 [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, 9479 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 9480 }; 9481 9482 static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { 9483 [TGSI_OPCODE_ARL] = { ALU_OP0_NOP, tgsi_eg_arl}, 9484 [TGSI_OPCODE_MOV] = { ALU_OP1_MOV, tgsi_op2}, 9485 [TGSI_OPCODE_LIT] = { ALU_OP0_NOP, tgsi_lit}, 9486 [TGSI_OPCODE_RCP] = { ALU_OP1_RECIP_IEEE, cayman_emit_float_instr}, 9487 [TGSI_OPCODE_RSQ] = { ALU_OP1_RECIPSQRT_IEEE, cayman_emit_float_instr}, 9488 [TGSI_OPCODE_EXP] = { ALU_OP0_NOP, tgsi_exp}, 9489 [TGSI_OPCODE_LOG] = { ALU_OP0_NOP, tgsi_log}, 9490 [TGSI_OPCODE_MUL] = { ALU_OP2_MUL, tgsi_op2}, 9491 [TGSI_OPCODE_ADD] = { ALU_OP2_ADD, tgsi_op2}, 9492 [TGSI_OPCODE_DP3] = { ALU_OP2_DOT4, tgsi_dp}, 9493 [TGSI_OPCODE_DP4] = { ALU_OP2_DOT4, tgsi_dp}, 9494 [TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst}, 9495 [TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2}, 9496 [TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2}, 9497 [TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap}, 9498 [TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2}, 9499 [TGSI_OPCODE_MAD] = { ALU_OP3_MULADD, tgsi_op3}, 9500 [TGSI_OPCODE_LRP] = { ALU_OP0_NOP, tgsi_lrp}, 9501 [TGSI_OPCODE_FMA] = { ALU_OP3_FMA, tgsi_op3}, 9502 [TGSI_OPCODE_SQRT] = { ALU_OP1_SQRT_IEEE, cayman_emit_float_instr}, 9503 [TGSI_OPCODE_DP2A] = { ALU_OP0_NOP, tgsi_unsupported}, 9504 [22] = { ALU_OP0_NOP, tgsi_unsupported}, 9505 [23] = { ALU_OP0_NOP, tgsi_unsupported}, 9506 [TGSI_OPCODE_FRC] = { ALU_OP1_FRACT, tgsi_op2}, 9507 [TGSI_OPCODE_CLAMP] = { ALU_OP0_NOP, tgsi_unsupported}, 9508 [TGSI_OPCODE_FLR] = { ALU_OP1_FLOOR, tgsi_op2}, 9509 [TGSI_OPCODE_ROUND] = { ALU_OP1_RNDNE, tgsi_op2}, 9510 [TGSI_OPCODE_EX2] = { ALU_OP1_EXP_IEEE, cayman_emit_float_instr}, 9511 [TGSI_OPCODE_LG2] = { ALU_OP1_LOG_IEEE, cayman_emit_float_instr}, 9512 [TGSI_OPCODE_POW] = { ALU_OP0_NOP, cayman_pow}, 9513 [TGSI_OPCODE_XPD] = { ALU_OP0_NOP, tgsi_xpd}, 9514 [32] = { ALU_OP0_NOP, tgsi_unsupported}, 9515 [33] = { ALU_OP0_NOP, tgsi_unsupported}, 9516 [34] = { ALU_OP0_NOP, tgsi_unsupported}, 9517 [TGSI_OPCODE_DPH] = { ALU_OP2_DOT4, tgsi_dp}, 9518 [TGSI_OPCODE_COS] = { ALU_OP1_COS, cayman_trig}, 9519 [TGSI_OPCODE_DDX] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9520 [TGSI_OPCODE_DDY] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9521 [TGSI_OPCODE_KILL] = { ALU_OP2_KILLGT, tgsi_kill}, /* unconditional kill */ 9522 [TGSI_OPCODE_PK2H] = { ALU_OP0_NOP, tgsi_pk2h}, 9523 [TGSI_OPCODE_PK2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9524 [TGSI_OPCODE_PK4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9525 [TGSI_OPCODE_PK4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9526 [44] = { ALU_OP0_NOP, tgsi_unsupported}, 9527 [TGSI_OPCODE_SEQ] = { ALU_OP2_SETE, tgsi_op2}, 9528 [46] = { ALU_OP0_NOP, tgsi_unsupported}, 9529 [TGSI_OPCODE_SGT] = { ALU_OP2_SETGT, tgsi_op2}, 9530 [TGSI_OPCODE_SIN] = { ALU_OP1_SIN, cayman_trig}, 9531 [TGSI_OPCODE_SLE] = { ALU_OP2_SETGE, tgsi_op2_swap}, 9532 [TGSI_OPCODE_SNE] = { ALU_OP2_SETNE, tgsi_op2}, 9533 [51] = { ALU_OP0_NOP, tgsi_unsupported}, 9534 [TGSI_OPCODE_TEX] = { FETCH_OP_SAMPLE, tgsi_tex}, 9535 [TGSI_OPCODE_TXD] = { FETCH_OP_SAMPLE_G, tgsi_tex}, 9536 [TGSI_OPCODE_TXP] = { FETCH_OP_SAMPLE, tgsi_tex}, 9537 [TGSI_OPCODE_UP2H] = { ALU_OP0_NOP, tgsi_up2h}, 9538 [TGSI_OPCODE_UP2US] = { ALU_OP0_NOP, tgsi_unsupported}, 9539 [TGSI_OPCODE_UP4B] = { ALU_OP0_NOP, tgsi_unsupported}, 9540 [TGSI_OPCODE_UP4UB] = { ALU_OP0_NOP, tgsi_unsupported}, 9541 [59] = { ALU_OP0_NOP, tgsi_unsupported}, 9542 [60] = { ALU_OP0_NOP, tgsi_unsupported}, 9543 [TGSI_OPCODE_ARR] = { ALU_OP0_NOP, tgsi_eg_arl}, 9544 [62] = { ALU_OP0_NOP, tgsi_unsupported}, 9545 [TGSI_OPCODE_CAL] = { ALU_OP0_NOP, tgsi_unsupported}, 9546 [TGSI_OPCODE_RET] = { ALU_OP0_NOP, tgsi_unsupported}, 9547 [TGSI_OPCODE_SSG] = { ALU_OP0_NOP, tgsi_ssg}, 9548 [TGSI_OPCODE_CMP] = { ALU_OP0_NOP, tgsi_cmp}, 9549 [TGSI_OPCODE_SCS] = { ALU_OP0_NOP, tgsi_scs}, 9550 [TGSI_OPCODE_TXB] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9551 [69] = { ALU_OP0_NOP, tgsi_unsupported}, 9552 [TGSI_OPCODE_DIV] = { ALU_OP0_NOP, tgsi_unsupported}, 9553 [TGSI_OPCODE_DP2] = { ALU_OP2_DOT4, tgsi_dp}, 9554 [TGSI_OPCODE_TXL] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9555 [TGSI_OPCODE_BRK] = { CF_OP_LOOP_BREAK, tgsi_loop_brk_cont}, 9556 [TGSI_OPCODE_IF] = { ALU_OP0_NOP, tgsi_if}, 9557 [TGSI_OPCODE_UIF] = { ALU_OP0_NOP, tgsi_uif}, 9558 [76] = { ALU_OP0_NOP, tgsi_unsupported}, 9559 [TGSI_OPCODE_ELSE] = { ALU_OP0_NOP, tgsi_else}, 9560 [TGSI_OPCODE_ENDIF] = { ALU_OP0_NOP, tgsi_endif}, 9561 [TGSI_OPCODE_DDX_FINE] = { FETCH_OP_GET_GRADIENTS_H, tgsi_tex}, 9562 [TGSI_OPCODE_DDY_FINE] = { FETCH_OP_GET_GRADIENTS_V, tgsi_tex}, 9563 [TGSI_OPCODE_PUSHA] = { ALU_OP0_NOP, tgsi_unsupported}, 9564 [TGSI_OPCODE_POPA] = { ALU_OP0_NOP, tgsi_unsupported}, 9565 [TGSI_OPCODE_CEIL] = { ALU_OP1_CEIL, tgsi_op2}, 9566 [TGSI_OPCODE_I2F] = { ALU_OP1_INT_TO_FLT, tgsi_op2}, 9567 [TGSI_OPCODE_NOT] = { ALU_OP1_NOT_INT, tgsi_op2}, 9568 [TGSI_OPCODE_TRUNC] = { ALU_OP1_TRUNC, tgsi_op2}, 9569 [TGSI_OPCODE_SHL] = { ALU_OP2_LSHL_INT, tgsi_op2}, 9570 [88] = { ALU_OP0_NOP, tgsi_unsupported}, 9571 [TGSI_OPCODE_AND] = { ALU_OP2_AND_INT, tgsi_op2}, 9572 [TGSI_OPCODE_OR] = { ALU_OP2_OR_INT, tgsi_op2}, 9573 [TGSI_OPCODE_MOD] = { ALU_OP0_NOP, tgsi_imod}, 9574 [TGSI_OPCODE_XOR] = { ALU_OP2_XOR_INT, tgsi_op2}, 9575 [TGSI_OPCODE_SAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9576 [TGSI_OPCODE_TXF] = { FETCH_OP_LD, tgsi_tex}, 9577 [TGSI_OPCODE_TXQ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9578 [TGSI_OPCODE_CONT] = { CF_OP_LOOP_CONTINUE, tgsi_loop_brk_cont}, 9579 [TGSI_OPCODE_EMIT] = { CF_OP_EMIT_VERTEX, tgsi_gs_emit}, 9580 [TGSI_OPCODE_ENDPRIM] = { CF_OP_CUT_VERTEX, tgsi_gs_emit}, 9581 [TGSI_OPCODE_BGNLOOP] = { ALU_OP0_NOP, tgsi_bgnloop}, 9582 [TGSI_OPCODE_BGNSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9583 [TGSI_OPCODE_ENDLOOP] = { ALU_OP0_NOP, tgsi_endloop}, 9584 [TGSI_OPCODE_ENDSUB] = { ALU_OP0_NOP, tgsi_unsupported}, 9585 [TGSI_OPCODE_TXQ_LZ] = { FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, 9586 [TGSI_OPCODE_TXQS] = { FETCH_OP_GET_NUMBER_OF_SAMPLES, tgsi_tex}, 9587 [TGSI_OPCODE_RESQ] = { ALU_OP0_NOP, tgsi_unsupported}, 9588 [106] = { ALU_OP0_NOP, tgsi_unsupported}, 9589 [TGSI_OPCODE_NOP] = { ALU_OP0_NOP, tgsi_unsupported}, 9590 [TGSI_OPCODE_FSEQ] = { ALU_OP2_SETE_DX10, tgsi_op2}, 9591 [TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2}, 9592 [TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap}, 9593 [TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap}, 9594 [TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported}, 9595 [TGSI_OPCODE_CALLNZ] = { ALU_OP0_NOP, tgsi_unsupported}, 9596 [114] = { ALU_OP0_NOP, tgsi_unsupported}, 9597 [TGSI_OPCODE_BREAKC] = { ALU_OP0_NOP, tgsi_unsupported}, 9598 [TGSI_OPCODE_KILL_IF] = { ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ 9599 [TGSI_OPCODE_END] = { ALU_OP0_NOP, tgsi_end}, /* aka HALT */ 9600 /* Refer below for TGSI_OPCODE_DFMA */ 9601 [TGSI_OPCODE_F2I] = { ALU_OP1_FLT_TO_INT, tgsi_op2}, 9602 [TGSI_OPCODE_IDIV] = { ALU_OP0_NOP, tgsi_idiv}, 9603 [TGSI_OPCODE_IMAX] = { ALU_OP2_MAX_INT, tgsi_op2}, 9604 [TGSI_OPCODE_IMIN] = { ALU_OP2_MIN_INT, tgsi_op2}, 9605 [TGSI_OPCODE_INEG] = { ALU_OP2_SUB_INT, tgsi_ineg}, 9606 [TGSI_OPCODE_ISGE] = { ALU_OP2_SETGE_INT, tgsi_op2}, 9607 [TGSI_OPCODE_ISHR] = { ALU_OP2_ASHR_INT, tgsi_op2}, 9608 [TGSI_OPCODE_ISLT] = { ALU_OP2_SETGT_INT, tgsi_op2_swap}, 9609 [TGSI_OPCODE_F2U] = { ALU_OP1_FLT_TO_UINT, tgsi_op2}, 9610 [TGSI_OPCODE_U2F] = { ALU_OP1_UINT_TO_FLT, tgsi_op2}, 9611 [TGSI_OPCODE_UADD] = { ALU_OP2_ADD_INT, tgsi_op2}, 9612 [TGSI_OPCODE_UDIV] = { ALU_OP0_NOP, tgsi_udiv}, 9613 [TGSI_OPCODE_UMAD] = { ALU_OP0_NOP, tgsi_umad}, 9614 [TGSI_OPCODE_UMAX] = { ALU_OP2_MAX_UINT, tgsi_op2}, 9615 [TGSI_OPCODE_UMIN] = { ALU_OP2_MIN_UINT, tgsi_op2}, 9616 [TGSI_OPCODE_UMOD] = { ALU_OP0_NOP, tgsi_umod}, 9617 [TGSI_OPCODE_UMUL] = { ALU_OP2_MULLO_INT, cayman_mul_int_instr}, 9618 [TGSI_OPCODE_USEQ] = { ALU_OP2_SETE_INT, tgsi_op2}, 9619 [TGSI_OPCODE_USGE] = { ALU_OP2_SETGE_UINT, tgsi_op2}, 9620 [TGSI_OPCODE_USHR] = { ALU_OP2_LSHR_INT, tgsi_op2}, 9621 [TGSI_OPCODE_USLT] = { ALU_OP2_SETGT_UINT, tgsi_op2_swap}, 9622 [TGSI_OPCODE_USNE] = { ALU_OP2_SETNE_INT, tgsi_op2}, 9623 [TGSI_OPCODE_SWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9624 [TGSI_OPCODE_CASE] = { ALU_OP0_NOP, tgsi_unsupported}, 9625 [TGSI_OPCODE_DEFAULT] = { ALU_OP0_NOP, tgsi_unsupported}, 9626 [TGSI_OPCODE_ENDSWITCH] = { ALU_OP0_NOP, tgsi_unsupported}, 9627 [TGSI_OPCODE_SAMPLE] = { 0, tgsi_unsupported}, 9628 [TGSI_OPCODE_SAMPLE_I] = { 0, tgsi_unsupported}, 9629 [TGSI_OPCODE_SAMPLE_I_MS] = { 0, tgsi_unsupported}, 9630 [TGSI_OPCODE_SAMPLE_B] = { 0, tgsi_unsupported}, 9631 [TGSI_OPCODE_SAMPLE_C] = { 0, tgsi_unsupported}, 9632 [TGSI_OPCODE_SAMPLE_C_LZ] = { 0, tgsi_unsupported}, 9633 [TGSI_OPCODE_SAMPLE_D] = { 0, tgsi_unsupported}, 9634 [TGSI_OPCODE_SAMPLE_L] = { 0, tgsi_unsupported}, 9635 [TGSI_OPCODE_GATHER4] = { 0, tgsi_unsupported}, 9636 [TGSI_OPCODE_SVIEWINFO] = { 0, tgsi_unsupported}, 9637 [TGSI_OPCODE_SAMPLE_POS] = { 0, tgsi_unsupported}, 9638 [TGSI_OPCODE_SAMPLE_INFO] = { 0, tgsi_unsupported}, 9639 [TGSI_OPCODE_UARL] = { ALU_OP1_MOVA_INT, tgsi_eg_arl}, 9640 [TGSI_OPCODE_UCMP] = { ALU_OP0_NOP, tgsi_ucmp}, 9641 [TGSI_OPCODE_IABS] = { 0, tgsi_iabs}, 9642 [TGSI_OPCODE_ISSG] = { 0, tgsi_issg}, 9643 [TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_unsupported}, 9644 [TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported}, 9645 [TGSI_OPCODE_MFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9646 [TGSI_OPCODE_LFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9647 [TGSI_OPCODE_SFENCE] = { ALU_OP0_NOP, tgsi_unsupported}, 9648 [TGSI_OPCODE_BARRIER] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier}, 9649 [TGSI_OPCODE_ATOMUADD] = { ALU_OP0_NOP, tgsi_unsupported}, 9650 [TGSI_OPCODE_ATOMXCHG] = { ALU_OP0_NOP, tgsi_unsupported}, 9651 [TGSI_OPCODE_ATOMCAS] = { ALU_OP0_NOP, tgsi_unsupported}, 9652 [TGSI_OPCODE_ATOMAND] = { ALU_OP0_NOP, tgsi_unsupported}, 9653 [TGSI_OPCODE_ATOMOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9654 [TGSI_OPCODE_ATOMXOR] = { ALU_OP0_NOP, tgsi_unsupported}, 9655 [TGSI_OPCODE_ATOMUMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9656 [TGSI_OPCODE_ATOMUMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9657 [TGSI_OPCODE_ATOMIMIN] = { ALU_OP0_NOP, tgsi_unsupported}, 9658 [TGSI_OPCODE_ATOMIMAX] = { ALU_OP0_NOP, tgsi_unsupported}, 9659 [TGSI_OPCODE_TEX2] = { FETCH_OP_SAMPLE, tgsi_tex}, 9660 [TGSI_OPCODE_TXB2] = { FETCH_OP_SAMPLE_LB, tgsi_tex}, 9661 [TGSI_OPCODE_TXL2] = { FETCH_OP_SAMPLE_L, tgsi_tex}, 9662 [TGSI_OPCODE_IMUL_HI] = { ALU_OP2_MULHI_INT, cayman_mul_int_instr}, 9663 [TGSI_OPCODE_UMUL_HI] = { ALU_OP2_MULHI_UINT, cayman_mul_int_instr}, 9664 [TGSI_OPCODE_TG4] = { FETCH_OP_GATHER4, tgsi_tex}, 9665 [TGSI_OPCODE_LODQ] = { FETCH_OP_GET_LOD, tgsi_tex}, 9666 [TGSI_OPCODE_IBFE] = { ALU_OP3_BFE_INT, tgsi_op3}, 9667 [TGSI_OPCODE_UBFE] = { ALU_OP3_BFE_UINT, tgsi_op3}, 9668 [TGSI_OPCODE_BFI] = { ALU_OP0_NOP, tgsi_bfi}, 9669 [TGSI_OPCODE_BREV] = { ALU_OP1_BFREV_INT, tgsi_op2}, 9670 [TGSI_OPCODE_POPC] = { ALU_OP1_BCNT_INT, tgsi_op2}, 9671 [TGSI_OPCODE_LSB] = { ALU_OP1_FFBL_INT, tgsi_op2}, 9672 [TGSI_OPCODE_IMSB] = { ALU_OP1_FFBH_INT, tgsi_msb}, 9673 [TGSI_OPCODE_UMSB] = { ALU_OP1_FFBH_UINT, tgsi_msb}, 9674 [TGSI_OPCODE_INTERP_CENTROID] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9675 [TGSI_OPCODE_INTERP_SAMPLE] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9676 [TGSI_OPCODE_INTERP_OFFSET] = { ALU_OP0_NOP, tgsi_interp_egcm}, 9677 [TGSI_OPCODE_F2D] = { ALU_OP1_FLT32_TO_FLT64, tgsi_op2_64}, 9678 [TGSI_OPCODE_D2F] = { ALU_OP1_FLT64_TO_FLT32, tgsi_op2_64_single_dest}, 9679 [TGSI_OPCODE_DABS] = { ALU_OP1_MOV, tgsi_op2_64}, 9680 [TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg}, 9681 [TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64}, 9682 [TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr}, 9683 [TGSI_OPCODE_DDIV] = { 0, cayman_ddiv_instr }, 9684 [TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64}, 9685 [TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64}, 9686 [TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s}, 9687 [TGSI_OPCODE_DSGE] = { ALU_OP2_SETGE_64, tgsi_op2_64_single_dest}, 9688 [TGSI_OPCODE_DSEQ] = { ALU_OP2_SETE_64, tgsi_op2_64_single_dest}, 9689 [TGSI_OPCODE_DSNE] = { ALU_OP2_SETNE_64, tgsi_op2_64_single_dest}, 9690 [TGSI_OPCODE_DRCP] = { ALU_OP2_RECIP_64, cayman_emit_double_instr}, 9691 [TGSI_OPCODE_DSQRT] = { ALU_OP2_SQRT_64, cayman_emit_double_instr}, 9692 [TGSI_OPCODE_DMAD] = { ALU_OP3_FMA_64, tgsi_op3_64}, 9693 [TGSI_OPCODE_DFMA] = { ALU_OP3_FMA_64, tgsi_op3_64}, 9694 [TGSI_OPCODE_DFRAC] = { ALU_OP1_FRACT_64, tgsi_op2_64}, 9695 [TGSI_OPCODE_DLDEXP] = { ALU_OP2_LDEXP_64, tgsi_op2_64}, 9696 [TGSI_OPCODE_DFRACEXP] = { ALU_OP1_FREXP_64, tgsi_dfracexp}, 9697 [TGSI_OPCODE_D2I] = { ALU_OP1_FLT_TO_INT, egcm_double_to_int}, 9698 [TGSI_OPCODE_I2D] = { ALU_OP1_INT_TO_FLT, egcm_int_to_double}, 9699 [TGSI_OPCODE_D2U] = { ALU_OP1_FLT_TO_UINT, egcm_double_to_int}, 9700 [TGSI_OPCODE_U2D] = { ALU_OP1_UINT_TO_FLT, egcm_int_to_double}, 9701 [TGSI_OPCODE_DRSQ] = { ALU_OP2_RECIPSQRT_64, cayman_emit_double_instr}, 9702 [TGSI_OPCODE_LAST] = { ALU_OP0_NOP, tgsi_unsupported}, 9703 }; 9704