1 /* 2 * Copyright (C) 2016 Mikls Mt 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include "main/mtypes.h" 24 #include "main/atifragshader.h" 25 #include "main/errors.h" 26 #include "program/prog_parameter.h" 27 28 #include "tgsi/tgsi_ureg.h" 29 #include "tgsi/tgsi_scan.h" 30 #include "tgsi/tgsi_transform.h" 31 32 #include "st_program.h" 33 #include "st_atifs_to_tgsi.h" 34 35 /** 36 * Intermediate state used during shader translation. 37 */ 38 struct st_translate { 39 struct ureg_program *ureg; 40 struct ati_fragment_shader *atifs; 41 42 struct ureg_dst temps[MAX_PROGRAM_TEMPS]; 43 struct ureg_src *constants; 44 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 45 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 46 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 47 48 const GLuint *inputMapping; 49 const GLuint *outputMapping; 50 51 unsigned current_pass; 52 53 bool regs_written[MAX_NUM_PASSES_ATI][MAX_NUM_FRAGMENT_REGISTERS_ATI]; 54 55 boolean error; 56 }; 57 58 struct instruction_desc { 59 unsigned TGSI_opcode; 60 const char *name; 61 unsigned char arg_count; 62 }; 63 64 static const struct instruction_desc inst_desc[] = { 65 {TGSI_OPCODE_MOV, "MOV", 1}, 66 {TGSI_OPCODE_NOP, "UND", 0}, /* unused */ 67 {TGSI_OPCODE_ADD, "ADD", 2}, 68 {TGSI_OPCODE_MUL, "MUL", 2}, 69 {TGSI_OPCODE_NOP, "SUB", 2}, 70 {TGSI_OPCODE_DP3, "DOT3", 2}, 71 {TGSI_OPCODE_DP4, "DOT4", 2}, 72 {TGSI_OPCODE_MAD, "MAD", 3}, 73 {TGSI_OPCODE_LRP, "LERP", 3}, 74 {TGSI_OPCODE_NOP, "CND", 3}, 75 {TGSI_OPCODE_NOP, "CND0", 3}, 76 {TGSI_OPCODE_NOP, "DOT2_ADD", 3} 77 }; 78 79 static struct ureg_dst 80 get_temp(struct st_translate *t, unsigned index) 81 { 82 if (ureg_dst_is_undef(t->temps[index])) 83 t->temps[index] = ureg_DECL_temporary(t->ureg); 84 return t->temps[index]; 85 } 86 87 static struct ureg_src 88 apply_swizzle(struct st_translate *t, 89 struct ureg_src src, GLuint swizzle) 90 { 91 if (swizzle == GL_SWIZZLE_STR_ATI) { 92 return src; 93 } else if (swizzle == GL_SWIZZLE_STQ_ATI) { 94 return ureg_swizzle(src, 95 TGSI_SWIZZLE_X, 96 TGSI_SWIZZLE_Y, 97 TGSI_SWIZZLE_W, 98 TGSI_SWIZZLE_Z); 99 } else { 100 struct ureg_dst tmp[2]; 101 struct ureg_src imm[3]; 102 103 tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); 104 tmp[1] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + 1); 105 imm[0] = src; 106 imm[1] = ureg_imm4f(t->ureg, 1.0f, 1.0f, 0.0f, 0.0f); 107 imm[2] = ureg_imm4f(t->ureg, 0.0f, 0.0f, 1.0f, 1.0f); 108 ureg_insn(t->ureg, TGSI_OPCODE_MAD, &tmp[0], 1, imm, 3); 109 110 if (swizzle == GL_SWIZZLE_STR_DR_ATI) { 111 imm[0] = ureg_scalar(src, TGSI_SWIZZLE_Z); 112 } else { 113 imm[0] = ureg_scalar(src, TGSI_SWIZZLE_W); 114 } 115 ureg_insn(t->ureg, TGSI_OPCODE_RCP, &tmp[1], 1, &imm[0], 1); 116 117 imm[0] = ureg_src(tmp[0]); 118 imm[1] = ureg_src(tmp[1]); 119 ureg_insn(t->ureg, TGSI_OPCODE_MUL, &tmp[0], 1, imm, 2); 120 121 return ureg_src(tmp[0]); 122 } 123 } 124 125 static struct ureg_src 126 get_source(struct st_translate *t, GLuint src_type) 127 { 128 if (src_type >= GL_REG_0_ATI && src_type <= GL_REG_5_ATI) { 129 if (t->regs_written[t->current_pass][src_type - GL_REG_0_ATI]) { 130 return ureg_src(get_temp(t, src_type - GL_REG_0_ATI)); 131 } else { 132 return ureg_imm1f(t->ureg, 0.0f); 133 } 134 } else if (src_type >= GL_CON_0_ATI && src_type <= GL_CON_7_ATI) { 135 return t->constants[src_type - GL_CON_0_ATI]; 136 } else if (src_type == GL_ZERO) { 137 return ureg_imm1f(t->ureg, 0.0f); 138 } else if (src_type == GL_ONE) { 139 return ureg_imm1f(t->ureg, 1.0f); 140 } else if (src_type == GL_PRIMARY_COLOR_ARB) { 141 return t->inputs[t->inputMapping[VARYING_SLOT_COL0]]; 142 } else if (src_type == GL_SECONDARY_INTERPOLATOR_ATI) { 143 return t->inputs[t->inputMapping[VARYING_SLOT_COL1]]; 144 } else { 145 /* frontend prevents this */ 146 unreachable("unknown source"); 147 } 148 } 149 150 static struct ureg_src 151 prepare_argument(struct st_translate *t, const unsigned argId, 152 const struct atifragshader_src_register *srcReg) 153 { 154 struct ureg_src src = get_source(t, srcReg->Index); 155 struct ureg_dst arg = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + argId); 156 157 switch (srcReg->argRep) { 158 case GL_NONE: 159 break; 160 case GL_RED: 161 src = ureg_scalar(src, TGSI_SWIZZLE_X); 162 break; 163 case GL_GREEN: 164 src = ureg_scalar(src, TGSI_SWIZZLE_Y); 165 break; 166 case GL_BLUE: 167 src = ureg_scalar(src, TGSI_SWIZZLE_Z); 168 break; 169 case GL_ALPHA: 170 src = ureg_scalar(src, TGSI_SWIZZLE_W); 171 break; 172 } 173 ureg_insn(t->ureg, TGSI_OPCODE_MOV, &arg, 1, &src, 1); 174 175 if (srcReg->argMod & GL_COMP_BIT_ATI) { 176 struct ureg_src modsrc[2]; 177 modsrc[0] = ureg_imm1f(t->ureg, 1.0f); 178 modsrc[1] = ureg_negate(ureg_src(arg)); 179 180 ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2); 181 } 182 if (srcReg->argMod & GL_BIAS_BIT_ATI) { 183 struct ureg_src modsrc[2]; 184 modsrc[0] = ureg_src(arg); 185 modsrc[1] = ureg_imm1f(t->ureg, -0.5f); 186 187 ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2); 188 } 189 if (srcReg->argMod & GL_2X_BIT_ATI) { 190 struct ureg_src modsrc[2]; 191 modsrc[0] = ureg_src(arg); 192 modsrc[1] = ureg_src(arg); 193 194 ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2); 195 } 196 if (srcReg->argMod & GL_NEGATE_BIT_ATI) { 197 struct ureg_src modsrc[2]; 198 modsrc[0] = ureg_src(arg); 199 modsrc[1] = ureg_imm1f(t->ureg, -1.0f); 200 201 ureg_insn(t->ureg, TGSI_OPCODE_MUL, &arg, 1, modsrc, 2); 202 } 203 return ureg_src(arg); 204 } 205 206 /* These instructions need special treatment */ 207 static void 208 emit_special_inst(struct st_translate *t, const struct instruction_desc *desc, 209 struct ureg_dst *dst, struct ureg_src *args, unsigned argcount) 210 { 211 struct ureg_dst tmp[1]; 212 struct ureg_src src[3]; 213 214 if (!strcmp(desc->name, "SUB")) { 215 ureg_ADD(t->ureg, *dst, args[0], ureg_negate(args[1])); 216 } else if (!strcmp(desc->name, "CND")) { 217 tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + 2); /* re-purpose a3 */ 218 src[0] = ureg_imm1f(t->ureg, 0.5f); 219 src[1] = ureg_negate(args[2]); 220 ureg_insn(t->ureg, TGSI_OPCODE_ADD, tmp, 1, src, 2); 221 src[0] = ureg_src(tmp[0]); 222 src[1] = args[0]; 223 src[2] = args[1]; 224 ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3); 225 } else if (!strcmp(desc->name, "CND0")) { 226 src[0] = args[2]; 227 src[1] = args[1]; 228 src[2] = args[0]; 229 ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3); 230 } else if (!strcmp(desc->name, "DOT2_ADD")) { 231 /* note: DP2A is not implemented in most pipe drivers */ 232 tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); /* re-purpose a1 */ 233 src[0] = args[0]; 234 src[1] = args[1]; 235 ureg_insn(t->ureg, TGSI_OPCODE_DP2, tmp, 1, src, 2); 236 src[0] = ureg_src(tmp[0]); 237 src[1] = ureg_scalar(args[2], TGSI_SWIZZLE_Z); 238 ureg_insn(t->ureg, TGSI_OPCODE_ADD, dst, 1, src, 2); 239 } 240 } 241 242 static void 243 emit_arith_inst(struct st_translate *t, 244 const struct instruction_desc *desc, 245 struct ureg_dst *dst, struct ureg_src *args, unsigned argcount) 246 { 247 if (desc->TGSI_opcode == TGSI_OPCODE_NOP) { 248 return emit_special_inst(t, desc, dst, args, argcount); 249 } 250 251 ureg_insn(t->ureg, desc->TGSI_opcode, dst, 1, args, argcount); 252 } 253 254 static void 255 emit_dstmod(struct st_translate *t, 256 struct ureg_dst dst, GLuint dstMod) 257 { 258 float imm; 259 struct ureg_src src[3]; 260 GLuint scale = dstMod & ~GL_SATURATE_BIT_ATI; 261 262 if (dstMod == GL_NONE) { 263 return; 264 } 265 266 switch (scale) { 267 case GL_2X_BIT_ATI: 268 imm = 2.0f; 269 break; 270 case GL_4X_BIT_ATI: 271 imm = 4.0f; 272 break; 273 case GL_8X_BIT_ATI: 274 imm = 8.0f; 275 break; 276 case GL_HALF_BIT_ATI: 277 imm = 0.5f; 278 break; 279 case GL_QUARTER_BIT_ATI: 280 imm = 0.25f; 281 break; 282 case GL_EIGHTH_BIT_ATI: 283 imm = 0.125f; 284 break; 285 default: 286 imm = 1.0f; 287 } 288 289 src[0] = ureg_src(dst); 290 src[1] = ureg_imm1f(t->ureg, imm); 291 if (dstMod & GL_SATURATE_BIT_ATI) { 292 dst = ureg_saturate(dst); 293 } 294 ureg_insn(t->ureg, TGSI_OPCODE_MUL, &dst, 1, src, 2); 295 } 296 297 /** 298 * Compile one setup instruction to TGSI instructions. 299 */ 300 static void 301 compile_setupinst(struct st_translate *t, 302 const unsigned r, 303 const struct atifs_setupinst *texinst) 304 { 305 struct ureg_dst dst[1]; 306 struct ureg_src src[2]; 307 308 if (!texinst->Opcode) 309 return; 310 311 dst[0] = get_temp(t, r); 312 313 GLuint pass_tex = texinst->src; 314 315 if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) { 316 unsigned attr = pass_tex - GL_TEXTURE0_ARB + VARYING_SLOT_TEX0; 317 318 src[0] = t->inputs[t->inputMapping[attr]]; 319 } else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) { 320 unsigned reg = pass_tex - GL_REG_0_ATI; 321 322 /* the frontend already validated that REG is only allowed in second pass */ 323 if (t->regs_written[0][reg]) { 324 src[0] = ureg_src(t->temps[reg]); 325 } else { 326 src[0] = ureg_imm1f(t->ureg, 0.0f); 327 } 328 } 329 src[0] = apply_swizzle(t, src[0], texinst->swizzle); 330 331 if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) { 332 /* by default texture and sampler indexes are the same */ 333 src[1] = t->samplers[r]; 334 /* the texture target is still unknown, it will be fixed in the draw call */ 335 ureg_tex_insn(t->ureg, TGSI_OPCODE_TEX, dst, 1, TGSI_TEXTURE_2D, 336 NULL, 0, src, 2); 337 } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) { 338 ureg_insn(t->ureg, TGSI_OPCODE_MOV, dst, 1, src, 1); 339 } 340 341 t->regs_written[t->current_pass][r] = true; 342 } 343 344 /** 345 * Compile one arithmetic operation COLOR&ALPHA pair into TGSI instructions. 346 */ 347 static void 348 compile_instruction(struct st_translate *t, 349 const struct atifs_instruction *inst) 350 { 351 unsigned optype; 352 353 for (optype = 0; optype < 2; optype++) { /* color, alpha */ 354 const struct instruction_desc *desc; 355 struct ureg_dst dst[1]; 356 struct ureg_src args[3]; /* arguments for the main operation */ 357 unsigned arg; 358 unsigned dstreg = inst->DstReg[optype].Index - GL_REG_0_ATI; 359 360 if (!inst->Opcode[optype]) 361 continue; 362 363 desc = &inst_desc[inst->Opcode[optype] - GL_MOV_ATI]; 364 365 /* prepare the arguments */ 366 for (arg = 0; arg < desc->arg_count; arg++) { 367 if (arg >= inst->ArgCount[optype]) { 368 _mesa_warning(0, "Using 0 for missing argument %d of %s\n", 369 arg, desc->name); 370 args[arg] = ureg_imm1f(t->ureg, 0.0f); 371 } else { 372 args[arg] = prepare_argument(t, arg, 373 &inst->SrcReg[optype][arg]); 374 } 375 } 376 377 /* prepare dst */ 378 dst[0] = get_temp(t, dstreg); 379 380 if (optype) { 381 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_W); 382 } else { 383 GLuint dstMask = inst->DstReg[optype].dstMask; 384 if (dstMask == GL_NONE) { 385 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ); 386 } else { 387 dst[0] = ureg_writemask(dst[0], dstMask); /* the enum values match */ 388 } 389 } 390 391 /* emit the main instruction */ 392 emit_arith_inst(t, desc, dst, args, arg); 393 394 emit_dstmod(t, *dst, inst->DstReg[optype].dstMod); 395 396 t->regs_written[t->current_pass][dstreg] = true; 397 } 398 } 399 400 static void 401 finalize_shader(struct st_translate *t, unsigned numPasses) 402 { 403 struct ureg_dst dst[1] = { { 0 } }; 404 struct ureg_src src[1] = { { 0 } }; 405 406 if (t->regs_written[numPasses-1][0]) { 407 /* copy the result into the OUT slot */ 408 dst[0] = t->outputs[t->outputMapping[FRAG_RESULT_COLOR]]; 409 src[0] = ureg_src(t->temps[0]); 410 ureg_insn(t->ureg, TGSI_OPCODE_MOV, dst, 1, src, 1); 411 } 412 413 /* signal the end of the program */ 414 ureg_insn(t->ureg, TGSI_OPCODE_END, dst, 0, src, 0); 415 } 416 417 /** 418 * Called when a new variant is needed, we need to translate 419 * the ATI fragment shader to TGSI 420 */ 421 enum pipe_error 422 st_translate_atifs_program( 423 struct ureg_program *ureg, 424 struct ati_fragment_shader *atifs, 425 struct gl_program *program, 426 GLuint numInputs, 427 const GLuint inputMapping[], 428 const ubyte inputSemanticName[], 429 const ubyte inputSemanticIndex[], 430 const GLuint interpMode[], 431 GLuint numOutputs, 432 const GLuint outputMapping[], 433 const ubyte outputSemanticName[], 434 const ubyte outputSemanticIndex[]) 435 { 436 enum pipe_error ret = PIPE_OK; 437 438 unsigned pass, i, r; 439 440 struct st_translate translate, *t; 441 t = &translate; 442 memset(t, 0, sizeof *t); 443 444 t->inputMapping = inputMapping; 445 t->outputMapping = outputMapping; 446 t->ureg = ureg; 447 t->atifs = atifs; 448 449 /* 450 * Declare input attributes. 451 */ 452 for (i = 0; i < numInputs; i++) { 453 t->inputs[i] = ureg_DECL_fs_input(ureg, 454 inputSemanticName[i], 455 inputSemanticIndex[i], 456 interpMode[i]); 457 } 458 459 /* 460 * Declare output attributes: 461 * we always have numOutputs=1 and it's FRAG_RESULT_COLOR 462 */ 463 t->outputs[0] = ureg_DECL_output(ureg, 464 TGSI_SEMANTIC_COLOR, 465 outputSemanticIndex[0]); 466 467 /* Emit constants and immediates. Mesa uses a single index space 468 * for these, so we put all the translated regs in t->constants. 469 */ 470 if (program->Parameters) { 471 t->constants = calloc(program->Parameters->NumParameters, 472 sizeof t->constants[0]); 473 if (t->constants == NULL) { 474 ret = PIPE_ERROR_OUT_OF_MEMORY; 475 goto out; 476 } 477 478 for (i = 0; i < program->Parameters->NumParameters; i++) { 479 switch (program->Parameters->Parameters[i].Type) { 480 case PROGRAM_STATE_VAR: 481 case PROGRAM_UNIFORM: 482 t->constants[i] = ureg_DECL_constant(ureg, i); 483 break; 484 case PROGRAM_CONSTANT: 485 t->constants[i] = 486 ureg_DECL_immediate(ureg, 487 (const float*)program->Parameters->ParameterValues[i], 488 4); 489 break; 490 default: 491 break; 492 } 493 } 494 } 495 496 /* texture samplers */ 497 for (i = 0; i < MAX_NUM_FRAGMENT_REGISTERS_ATI; i++) { 498 if (program->SamplersUsed & (1 << i)) { 499 t->samplers[i] = ureg_DECL_sampler(ureg, i); 500 /* the texture target is still unknown, it will be fixed in the draw call */ 501 ureg_DECL_sampler_view(ureg, i, TGSI_TEXTURE_2D, 502 TGSI_RETURN_TYPE_FLOAT, 503 TGSI_RETURN_TYPE_FLOAT, 504 TGSI_RETURN_TYPE_FLOAT, 505 TGSI_RETURN_TYPE_FLOAT); 506 } 507 } 508 509 /* emit instructions */ 510 for (pass = 0; pass < atifs->NumPasses; pass++) { 511 t->current_pass = pass; 512 for (r = 0; r < MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) { 513 struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r]; 514 compile_setupinst(t, r, texinst); 515 } 516 for (i = 0; i < atifs->numArithInstr[pass]; i++) { 517 struct atifs_instruction *inst = &atifs->Instructions[pass][i]; 518 compile_instruction(t, inst); 519 } 520 } 521 522 finalize_shader(t, atifs->NumPasses); 523 524 out: 525 free(t->constants); 526 527 if (t->error) { 528 debug_printf("%s: translate error flag set\n", __func__); 529 } 530 531 return ret; 532 } 533 534 /** 535 * Called in ProgramStringNotify, we need to fill the metadata of the 536 * gl_program attached to the ati_fragment_shader 537 */ 538 void 539 st_init_atifs_prog(struct gl_context *ctx, struct gl_program *prog) 540 { 541 /* we know this is st_fragment_program, because of st_new_ati_fs() */ 542 struct st_fragment_program *stfp = (struct st_fragment_program *) prog; 543 struct ati_fragment_shader *atifs = stfp->ati_fs; 544 545 unsigned pass, i, r, optype, arg; 546 547 static const gl_state_index fog_params_state[STATE_LENGTH] = 548 {STATE_INTERNAL, STATE_FOG_PARAMS_OPTIMIZED, 0, 0, 0}; 549 static const gl_state_index fog_color[STATE_LENGTH] = 550 {STATE_FOG_COLOR, 0, 0, 0, 0}; 551 552 prog->info.inputs_read = 0; 553 prog->info.outputs_written = BITFIELD64_BIT(FRAG_RESULT_COLOR); 554 prog->SamplersUsed = 0; 555 prog->Parameters = _mesa_new_parameter_list(); 556 557 /* fill in inputs_read, SamplersUsed, TexturesUsed */ 558 for (pass = 0; pass < atifs->NumPasses; pass++) { 559 for (r = 0; r < MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) { 560 struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r]; 561 GLuint pass_tex = texinst->src; 562 563 if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) { 564 /* mark which texcoords are used */ 565 prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + pass_tex - GL_TEXTURE0_ARB); 566 /* by default there is 1:1 mapping between samplers and textures */ 567 prog->SamplersUsed |= (1 << r); 568 /* the target is unknown here, it will be fixed in the draw call */ 569 prog->TexturesUsed[r] = TEXTURE_2D_BIT; 570 } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) { 571 if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) { 572 prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + pass_tex - GL_TEXTURE0_ARB); 573 } 574 } 575 } 576 } 577 for (pass = 0; pass < atifs->NumPasses; pass++) { 578 for (i = 0; i < atifs->numArithInstr[pass]; i++) { 579 struct atifs_instruction *inst = &atifs->Instructions[pass][i]; 580 581 for (optype = 0; optype < 2; optype++) { /* color, alpha */ 582 if (inst->Opcode[optype]) { 583 for (arg = 0; arg < inst->ArgCount[optype]; arg++) { 584 GLint index = inst->SrcReg[optype][arg].Index; 585 if (index == GL_PRIMARY_COLOR_EXT) { 586 prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_COL0); 587 } else if (index == GL_SECONDARY_INTERPOLATOR_ATI) { 588 /* note: ATI_fragment_shader.txt never specifies what 589 * GL_SECONDARY_INTERPOLATOR_ATI is, swrast uses 590 * VARYING_SLOT_COL1 for this input */ 591 prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_COL1); 592 } 593 } 594 } 595 } 596 } 597 } 598 /* we may need fog */ 599 prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_FOGC); 600 601 /* we always have the ATI_fs constants, and the fog params */ 602 for (i = 0; i < MAX_NUM_FRAGMENT_CONSTANTS_ATI; i++) { 603 _mesa_add_parameter(prog->Parameters, PROGRAM_UNIFORM, 604 NULL, 4, GL_FLOAT, NULL, NULL); 605 } 606 _mesa_add_state_reference(prog->Parameters, fog_params_state); 607 _mesa_add_state_reference(prog->Parameters, fog_color); 608 609 prog->arb.NumInstructions = 0; 610 prog->arb.NumTemporaries = MAX_NUM_FRAGMENT_REGISTERS_ATI + 3; /* 3 input temps for arith ops */ 611 prog->arb.NumParameters = MAX_NUM_FRAGMENT_CONSTANTS_ATI + 2; /* 2 state variables for fog */ 612 } 613 614 615 struct tgsi_atifs_transform { 616 struct tgsi_transform_context base; 617 struct tgsi_shader_info info; 618 const struct st_fp_variant_key *key; 619 bool first_instruction_emitted; 620 unsigned fog_factor_temp; 621 unsigned fog_clamp_imm; 622 }; 623 624 static inline struct tgsi_atifs_transform * 625 tgsi_atifs_transform(struct tgsi_transform_context *tctx) 626 { 627 return (struct tgsi_atifs_transform *)tctx; 628 } 629 630 /* copied from st_cb_drawpixels_shader.c */ 631 static void 632 set_src(struct tgsi_full_instruction *inst, unsigned i, unsigned file, unsigned index, 633 unsigned x, unsigned y, unsigned z, unsigned w) 634 { 635 inst->Src[i].Register.File = file; 636 inst->Src[i].Register.Index = index; 637 inst->Src[i].Register.SwizzleX = x; 638 inst->Src[i].Register.SwizzleY = y; 639 inst->Src[i].Register.SwizzleZ = z; 640 inst->Src[i].Register.SwizzleW = w; 641 } 642 643 #define SET_SRC(inst, i, file, index, x, y, z, w) \ 644 set_src(inst, i, file, index, TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, \ 645 TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w) 646 647 static void 648 transform_decl(struct tgsi_transform_context *tctx, 649 struct tgsi_full_declaration *decl) 650 { 651 struct tgsi_atifs_transform *ctx = tgsi_atifs_transform(tctx); 652 653 if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { 654 /* fix texture target */ 655 unsigned newtarget = ctx->key->texture_targets[decl->Range.First]; 656 if (newtarget) 657 decl->SamplerView.Resource = newtarget; 658 } 659 660 tctx->emit_declaration(tctx, decl); 661 } 662 663 static void 664 transform_instr(struct tgsi_transform_context *tctx, 665 struct tgsi_full_instruction *current_inst) 666 { 667 struct tgsi_atifs_transform *ctx = tgsi_atifs_transform(tctx); 668 669 if (ctx->first_instruction_emitted) 670 goto transform_inst; 671 672 ctx->first_instruction_emitted = true; 673 674 if (ctx->key->fog) { 675 /* add a new temp for the fog factor */ 676 ctx->fog_factor_temp = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 1; 677 tgsi_transform_temp_decl(tctx, ctx->fog_factor_temp); 678 679 /* add immediates for clamp */ 680 ctx->fog_clamp_imm = ctx->info.immediate_count; 681 tgsi_transform_immediate_decl(tctx, 1.0f, 0.0f, 0.0f, 0.0f); 682 } 683 684 transform_inst: 685 if (current_inst->Instruction.Opcode == TGSI_OPCODE_TEX) { 686 /* fix texture target */ 687 unsigned newtarget = ctx->key->texture_targets[current_inst->Src[1].Register.Index]; 688 if (newtarget) 689 current_inst->Texture.Texture = newtarget; 690 691 } else if (ctx->key->fog && current_inst->Instruction.Opcode == TGSI_OPCODE_MOV && 692 current_inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) { 693 struct tgsi_full_instruction inst; 694 unsigned i; 695 int fogc_index = -1; 696 int reg0_index = current_inst->Src[0].Register.Index; 697 698 /* find FOGC input */ 699 for (i = 0; i < ctx->info.num_inputs; i++) { 700 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FOG) { 701 fogc_index = i; 702 break; 703 } 704 } 705 if (fogc_index < 0) { 706 /* should never be reached, because fog coord input is always declared */ 707 tctx->emit_instruction(tctx, current_inst); 708 return; 709 } 710 711 /* compute the 1 component fog factor f */ 712 if (ctx->key->fog == 1) { 713 /* LINEAR formula: f = (end - z) / (end - start) 714 * with optimized parameters: 715 * f = MAD(fogcoord, oparams.x, oparams.y) 716 */ 717 inst = tgsi_default_full_instruction(); 718 inst.Instruction.Opcode = TGSI_OPCODE_MAD; 719 inst.Instruction.NumDstRegs = 1; 720 inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 721 inst.Dst[0].Register.Index = ctx->fog_factor_temp; 722 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; 723 inst.Instruction.NumSrcRegs = 3; 724 SET_SRC(&inst, 0, TGSI_FILE_INPUT, fogc_index, X, Y, Z, W); 725 SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, X, X, X, X); 726 SET_SRC(&inst, 2, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, Y, Y, Y, Y); 727 tctx->emit_instruction(tctx, &inst); 728 } else if (ctx->key->fog == 2) { 729 /* EXP formula: f = exp(-dens * z) 730 * with optimized parameters: 731 * f = MUL(fogcoord, oparams.z); f= EX2(-f) 732 */ 733 inst = tgsi_default_full_instruction(); 734 inst.Instruction.Opcode = TGSI_OPCODE_MUL; 735 inst.Instruction.NumDstRegs = 1; 736 inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 737 inst.Dst[0].Register.Index = ctx->fog_factor_temp; 738 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; 739 inst.Instruction.NumSrcRegs = 2; 740 SET_SRC(&inst, 0, TGSI_FILE_INPUT, fogc_index, X, Y, Z, W); 741 SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, Z, Z, Z, Z); 742 tctx->emit_instruction(tctx, &inst); 743 744 inst = tgsi_default_full_instruction(); 745 inst.Instruction.Opcode = TGSI_OPCODE_EX2; 746 inst.Instruction.NumDstRegs = 1; 747 inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 748 inst.Dst[0].Register.Index = ctx->fog_factor_temp; 749 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; 750 inst.Instruction.NumSrcRegs = 1; 751 SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W); 752 inst.Src[0].Register.Negate = 1; 753 tctx->emit_instruction(tctx, &inst); 754 } else if (ctx->key->fog == 3) { 755 /* EXP2 formula: f = exp(-(dens * z)^2) 756 * with optimized parameters: 757 * f = MUL(fogcoord, oparams.w); f=MUL(f, f); f= EX2(-f) 758 */ 759 inst = tgsi_default_full_instruction(); 760 inst.Instruction.Opcode = TGSI_OPCODE_MUL; 761 inst.Instruction.NumDstRegs = 1; 762 inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 763 inst.Dst[0].Register.Index = ctx->fog_factor_temp; 764 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; 765 inst.Instruction.NumSrcRegs = 2; 766 SET_SRC(&inst, 0, TGSI_FILE_INPUT, fogc_index, X, Y, Z, W); 767 SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, W, W, W, W); 768 tctx->emit_instruction(tctx, &inst); 769 770 inst = tgsi_default_full_instruction(); 771 inst.Instruction.Opcode = TGSI_OPCODE_MUL; 772 inst.Instruction.NumDstRegs = 1; 773 inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 774 inst.Dst[0].Register.Index = ctx->fog_factor_temp; 775 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; 776 inst.Instruction.NumSrcRegs = 2; 777 SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W); 778 SET_SRC(&inst, 1, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W); 779 tctx->emit_instruction(tctx, &inst); 780 781 inst = tgsi_default_full_instruction(); 782 inst.Instruction.Opcode = TGSI_OPCODE_EX2; 783 inst.Instruction.NumDstRegs = 1; 784 inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 785 inst.Dst[0].Register.Index = ctx->fog_factor_temp; 786 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; 787 inst.Instruction.NumSrcRegs = 1; 788 SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W); 789 inst.Src[0].Register.Negate ^= 1; 790 tctx->emit_instruction(tctx, &inst); 791 } 792 /* f = CLAMP(f, 0.0, 1.0) */ 793 inst = tgsi_default_full_instruction(); 794 inst.Instruction.Opcode = TGSI_OPCODE_CLAMP; 795 inst.Instruction.NumDstRegs = 1; 796 inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 797 inst.Dst[0].Register.Index = ctx->fog_factor_temp; 798 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; 799 inst.Instruction.NumSrcRegs = 3; 800 SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W); 801 SET_SRC(&inst, 1, TGSI_FILE_IMMEDIATE, ctx->fog_clamp_imm, Y, Y, Y, Y); // 0.0 802 SET_SRC(&inst, 2, TGSI_FILE_IMMEDIATE, ctx->fog_clamp_imm, X, X, X, X); // 1.0 803 tctx->emit_instruction(tctx, &inst); 804 805 /* REG0 = LRP(f, REG0, fogcolor) */ 806 inst = tgsi_default_full_instruction(); 807 inst.Instruction.Opcode = TGSI_OPCODE_LRP; 808 inst.Instruction.NumDstRegs = 1; 809 inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 810 inst.Dst[0].Register.Index = reg0_index; 811 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; 812 inst.Instruction.NumSrcRegs = 3; 813 SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, X, X, Y); 814 SET_SRC(&inst, 1, TGSI_FILE_TEMPORARY, reg0_index, X, Y, Z, W); 815 SET_SRC(&inst, 2, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI + 1, X, Y, Z, W); 816 tctx->emit_instruction(tctx, &inst); 817 } 818 819 tctx->emit_instruction(tctx, current_inst); 820 } 821 822 /* 823 * A post-process step in the draw call to fix texture targets and 824 * insert code for fog. 825 */ 826 const struct tgsi_token * 827 st_fixup_atifs(const struct tgsi_token *tokens, 828 const struct st_fp_variant_key *key) 829 { 830 struct tgsi_atifs_transform ctx; 831 struct tgsi_token *newtoks; 832 int newlen; 833 834 memset(&ctx, 0, sizeof(ctx)); 835 ctx.base.transform_declaration = transform_decl; 836 ctx.base.transform_instruction = transform_instr; 837 ctx.key = key; 838 tgsi_scan_shader(tokens, &ctx.info); 839 840 newlen = tgsi_num_tokens(tokens) + 30; 841 newtoks = tgsi_alloc_tokens(newlen); 842 if (!newtoks) 843 return NULL; 844 845 tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base); 846 return newtoks; 847 } 848 849