1 /* 2 * Copyright (C) 2014 Rob Clark <robclark (at) freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark (at) freedesktop.org> 25 */ 26 27 #include "tgsi/tgsi_transform.h" 28 #include "tgsi/tgsi_scan.h" 29 #include "tgsi/tgsi_dump.h" 30 31 #include "util/u_debug.h" 32 #include "util/u_math.h" 33 34 #include "tgsi_lowering.h" 35 36 struct tgsi_lowering_context { 37 struct tgsi_transform_context base; 38 const struct tgsi_lowering_config *config; 39 struct tgsi_shader_info *info; 40 unsigned two_side_colors; 41 unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS]; 42 unsigned color_base; /* base register for chosen COLOR/BCOLOR's */ 43 int face_idx; 44 unsigned numtmp; 45 struct { 46 struct tgsi_full_src_register src; 47 struct tgsi_full_dst_register dst; 48 } tmp[2]; 49 #define A 0 50 #define B 1 51 struct tgsi_full_src_register imm; 52 int emitted_decls; 53 unsigned saturate; 54 }; 55 56 static inline struct tgsi_lowering_context * 57 tgsi_lowering_context(struct tgsi_transform_context *tctx) 58 { 59 return (struct tgsi_lowering_context *)tctx; 60 } 61 62 /* 63 * Utility helpers: 64 */ 65 66 static void 67 reg_dst(struct tgsi_full_dst_register *dst, 68 const struct tgsi_full_dst_register *orig_dst, unsigned wrmask) 69 { 70 *dst = *orig_dst; 71 dst->Register.WriteMask &= wrmask; 72 assert(dst->Register.WriteMask); 73 } 74 75 static inline void 76 get_swiz(unsigned *swiz, const struct tgsi_src_register *src) 77 { 78 swiz[0] = src->SwizzleX; 79 swiz[1] = src->SwizzleY; 80 swiz[2] = src->SwizzleZ; 81 swiz[3] = src->SwizzleW; 82 } 83 84 static void 85 reg_src(struct tgsi_full_src_register *src, 86 const struct tgsi_full_src_register *orig_src, 87 unsigned sx, unsigned sy, unsigned sz, unsigned sw) 88 { 89 unsigned swiz[4]; 90 get_swiz(swiz, &orig_src->Register); 91 *src = *orig_src; 92 src->Register.SwizzleX = swiz[sx]; 93 src->Register.SwizzleY = swiz[sy]; 94 src->Register.SwizzleZ = swiz[sz]; 95 src->Register.SwizzleW = swiz[sw]; 96 } 97 98 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */ 99 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \ 100 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w 101 102 /* 103 * if (dst.x aliases src.x) { 104 * MOV tmpA.x, src.x 105 * src = tmpA 106 * } 107 * COS dst.x, src.x 108 * SIN dst.y, src.x 109 * MOV dst.zw, imm{0.0, 1.0} 110 */ 111 static bool 112 aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask, 113 const struct tgsi_full_src_register *src, unsigned src_mask) 114 { 115 if ((dst->Register.File == src->Register.File) && 116 (dst->Register.Index == src->Register.Index)) { 117 unsigned i, actual_mask = 0; 118 unsigned swiz[4]; 119 get_swiz(swiz, &src->Register); 120 for (i = 0; i < 4; i++) 121 if (src_mask & (1 << i)) 122 actual_mask |= (1 << swiz[i]); 123 if (actual_mask & dst_mask) 124 return true; 125 } 126 return false; 127 } 128 129 static void 130 create_mov(struct tgsi_transform_context *tctx, 131 const struct tgsi_full_dst_register *dst, 132 const struct tgsi_full_src_register *src, 133 unsigned mask, unsigned saturate) 134 { 135 struct tgsi_full_instruction new_inst; 136 137 new_inst = tgsi_default_full_instruction(); 138 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 139 new_inst.Instruction.Saturate = saturate; 140 new_inst.Instruction.NumDstRegs = 1; 141 reg_dst(&new_inst.Dst[0], dst, mask); 142 new_inst.Instruction.NumSrcRegs = 1; 143 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); 144 tctx->emit_instruction(tctx, &new_inst); 145 } 146 147 /* to help calculate # of tgsi tokens for a lowering.. we assume 148 * the worst case, ie. removed instructions don't have ADDR[] or 149 * anything which increases the # of tokens per src/dst and the 150 * inserted instructions do. 151 * 152 * OINST() - old instruction 153 * 1 : instruction itself 154 * 1 : dst 155 * 1 * nargs : srcN 156 * 157 * NINST() - new instruction 158 * 1 : instruction itself 159 * 2 : dst 160 * 2 * nargs : srcN 161 */ 162 163 #define OINST(nargs) (1 + 1 + 1 * (nargs)) 164 #define NINST(nargs) (1 + 2 + 2 * (nargs)) 165 166 /* 167 * Lowering Translators: 168 */ 169 170 /* DST - Distance Vector 171 * dst.x = 1.0 172 * dst.y = src0.y \times src1.y 173 * dst.z = src0.z 174 * dst.w = src1.w 175 * 176 * ; note: could be more clever and use just a single temp 177 * ; if I was clever enough to re-write the swizzles. 178 * ; needs: 2 tmp, imm{1.0} 179 * if (dst.y aliases src0.z) { 180 * MOV tmpA.yz, src0.yz 181 * src0 = tmpA 182 * } 183 * if (dst.yz aliases src1.w) { 184 * MOV tmpB.yw, src1.yw 185 * src1 = tmpB 186 * } 187 * MUL dst.y, src0.y, src1.y 188 * MOV dst.z, src0.z 189 * MOV dst.w, src1.w 190 * MOV dst.x, imm{1.0} 191 */ 192 #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \ 193 NINST(1) + NINST(1) - OINST(2)) 194 #define DST_TMP 2 195 static void 196 transform_dst(struct tgsi_transform_context *tctx, 197 struct tgsi_full_instruction *inst) 198 { 199 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 200 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 201 struct tgsi_full_src_register *src0 = &inst->Src[0]; 202 struct tgsi_full_src_register *src1 = &inst->Src[1]; 203 struct tgsi_full_instruction new_inst; 204 205 if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) { 206 create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0); 207 src0 = &ctx->tmp[A].src; 208 } 209 210 if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) { 211 create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0); 212 src1 = &ctx->tmp[B].src; 213 } 214 215 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { 216 /* MUL dst.y, src0.y, src1.y */ 217 new_inst = tgsi_default_full_instruction(); 218 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 219 new_inst.Instruction.NumDstRegs = 1; 220 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); 221 new_inst.Instruction.NumSrcRegs = 2; 222 reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _)); 223 reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _)); 224 tctx->emit_instruction(tctx, &new_inst); 225 } 226 227 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { 228 /* MOV dst.z, src0.z */ 229 new_inst = tgsi_default_full_instruction(); 230 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 231 new_inst.Instruction.NumDstRegs = 1; 232 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z); 233 new_inst.Instruction.NumSrcRegs = 1; 234 reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _)); 235 tctx->emit_instruction(tctx, &new_inst); 236 } 237 238 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { 239 /* MOV dst.w, src1.w */ 240 new_inst = tgsi_default_full_instruction(); 241 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 242 new_inst.Instruction.NumDstRegs = 1; 243 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); 244 new_inst.Instruction.NumSrcRegs = 1; 245 reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W)); 246 tctx->emit_instruction(tctx, &new_inst); 247 } 248 249 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) { 250 /* MOV dst.x, imm{1.0} */ 251 new_inst = tgsi_default_full_instruction(); 252 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 253 new_inst.Instruction.NumDstRegs = 1; 254 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X); 255 new_inst.Instruction.NumSrcRegs = 1; 256 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _)); 257 tctx->emit_instruction(tctx, &new_inst); 258 } 259 } 260 261 /* XPD - Cross Product 262 * dst.x = src0.y \times src1.z - src1.y \times src0.z 263 * dst.y = src0.z \times src1.x - src1.z \times src0.x 264 * dst.z = src0.x \times src1.y - src1.x \times src0.y 265 * dst.w = 1.0 266 * 267 * ; needs: 1 tmp, imm{1.0} 268 * MUL tmpA.xyz, src1.yzx, src0.zxy 269 * MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz 270 * MOV dst.w, imm{1.0} 271 */ 272 #define XPD_GROW (NINST(2) + NINST(3) + NINST(1) - OINST(2)) 273 #define XPD_TMP 1 274 static void 275 transform_xpd(struct tgsi_transform_context *tctx, 276 struct tgsi_full_instruction *inst) 277 { 278 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 279 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 280 struct tgsi_full_src_register *src0 = &inst->Src[0]; 281 struct tgsi_full_src_register *src1 = &inst->Src[1]; 282 struct tgsi_full_instruction new_inst; 283 284 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) { 285 /* MUL tmpA.xyz, src1.yzx, src0.zxy */ 286 new_inst = tgsi_default_full_instruction(); 287 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 288 new_inst.Instruction.NumDstRegs = 1; 289 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZ); 290 new_inst.Instruction.NumSrcRegs = 2; 291 reg_src(&new_inst.Src[0], src1, SWIZ(Y, Z, X, _)); 292 reg_src(&new_inst.Src[1], src0, SWIZ(Z, X, Y, _)); 293 tctx->emit_instruction(tctx, &new_inst); 294 295 /* MAD dst.xyz, src0.yzx, src1.zxy, -tmpA.xyz */ 296 new_inst = tgsi_default_full_instruction(); 297 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 298 new_inst.Instruction.NumDstRegs = 1; 299 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZ); 300 new_inst.Instruction.NumSrcRegs = 3; 301 reg_src(&new_inst.Src[0], src0, SWIZ(Y, Z, X, _)); 302 reg_src(&new_inst.Src[1], src1, SWIZ(Z, X, Y, _)); 303 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, _)); 304 new_inst.Src[2].Register.Negate = true; 305 tctx->emit_instruction(tctx, &new_inst); 306 } 307 308 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { 309 /* MOV dst.w, imm{1.0} */ 310 new_inst = tgsi_default_full_instruction(); 311 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 312 new_inst.Instruction.NumDstRegs = 1; 313 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); 314 new_inst.Instruction.NumSrcRegs = 1; 315 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y)); 316 tctx->emit_instruction(tctx, &new_inst); 317 } 318 } 319 320 /* SCS - Sine Cosine 321 * dst.x = \cos{src.x} 322 * dst.y = \sin{src.x} 323 * dst.z = 0.0 324 * dst.w = 1.0 325 * 326 * ; needs: 1 tmp, imm{0.0, 1.0} 327 * if (dst.x aliases src.x) { 328 * MOV tmpA.x, src.x 329 * src = tmpA 330 * } 331 * COS dst.x, src.x 332 * SIN dst.y, src.x 333 * MOV dst.zw, imm{0.0, 1.0} 334 */ 335 #define SCS_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) - OINST(1)) 336 #define SCS_TMP 1 337 static void 338 transform_scs(struct tgsi_transform_context *tctx, 339 struct tgsi_full_instruction *inst) 340 { 341 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 342 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 343 struct tgsi_full_src_register *src = &inst->Src[0]; 344 struct tgsi_full_instruction new_inst; 345 346 if (aliases(dst, TGSI_WRITEMASK_X, src, TGSI_WRITEMASK_X)) { 347 create_mov(tctx, &ctx->tmp[A].dst, src, TGSI_WRITEMASK_X, 0); 348 src = &ctx->tmp[A].src; 349 } 350 351 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) { 352 /* COS dst.x, src.x */ 353 new_inst = tgsi_default_full_instruction(); 354 new_inst.Instruction.Opcode = TGSI_OPCODE_COS; 355 new_inst.Instruction.NumDstRegs = 1; 356 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X); 357 new_inst.Instruction.NumSrcRegs = 1; 358 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 359 tctx->emit_instruction(tctx, &new_inst); 360 } 361 362 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { 363 /* SIN dst.y, src.x */ 364 new_inst = tgsi_default_full_instruction(); 365 new_inst.Instruction.Opcode = TGSI_OPCODE_SIN; 366 new_inst.Instruction.NumDstRegs = 1; 367 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); 368 new_inst.Instruction.NumSrcRegs = 1; 369 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 370 tctx->emit_instruction(tctx, &new_inst); 371 } 372 373 if (dst->Register.WriteMask & TGSI_WRITEMASK_ZW) { 374 /* MOV dst.zw, imm{0.0, 1.0} */ 375 new_inst = tgsi_default_full_instruction(); 376 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 377 new_inst.Instruction.NumDstRegs = 1; 378 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_ZW); 379 new_inst.Instruction.NumSrcRegs = 1; 380 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, X, Y)); 381 tctx->emit_instruction(tctx, &new_inst); 382 } 383 } 384 385 /* LRP - Linear Interpolate 386 * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x 387 * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y 388 * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z 389 * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w 390 * 391 * This becomes: src0 \times src1 + src2 - src0 \times src2, which 392 * can then become: src0 \times src1 - (src0 \times src2 - src2) 393 * 394 * ; needs: 1 tmp 395 * MAD tmpA, src0, src2, -src2 396 * MAD dst, src0, src1, -tmpA 397 */ 398 #define LRP_GROW (NINST(3) + NINST(3) - OINST(3)) 399 #define LRP_TMP 1 400 static void 401 transform_lrp(struct tgsi_transform_context *tctx, 402 struct tgsi_full_instruction *inst) 403 { 404 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 405 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 406 struct tgsi_full_src_register *src0 = &inst->Src[0]; 407 struct tgsi_full_src_register *src1 = &inst->Src[1]; 408 struct tgsi_full_src_register *src2 = &inst->Src[2]; 409 struct tgsi_full_instruction new_inst; 410 411 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 412 /* MAD tmpA, src0, src2, -src2 */ 413 new_inst = tgsi_default_full_instruction(); 414 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 415 new_inst.Instruction.NumDstRegs = 1; 416 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 417 new_inst.Instruction.NumSrcRegs = 3; 418 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 419 reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W)); 420 reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W)); 421 new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate; 422 tctx->emit_instruction(tctx, &new_inst); 423 424 /* MAD dst, src0, src1, -tmpA */ 425 new_inst = tgsi_default_full_instruction(); 426 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 427 new_inst.Instruction.NumDstRegs = 1; 428 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 429 new_inst.Instruction.NumSrcRegs = 3; 430 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 431 reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W)); 432 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 433 new_inst.Src[2].Register.Negate = true; 434 tctx->emit_instruction(tctx, &new_inst); 435 } 436 } 437 438 /* FRC - Fraction 439 * dst.x = src.x - \lfloor src.x\rfloor 440 * dst.y = src.y - \lfloor src.y\rfloor 441 * dst.z = src.z - \lfloor src.z\rfloor 442 * dst.w = src.w - \lfloor src.w\rfloor 443 * 444 * ; needs: 1 tmp 445 * FLR tmpA, src 446 * SUB dst, src, tmpA 447 */ 448 #define FRC_GROW (NINST(1) + NINST(2) - OINST(1)) 449 #define FRC_TMP 1 450 static void 451 transform_frc(struct tgsi_transform_context *tctx, 452 struct tgsi_full_instruction *inst) 453 { 454 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 455 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 456 struct tgsi_full_src_register *src = &inst->Src[0]; 457 struct tgsi_full_instruction new_inst; 458 459 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 460 /* FLR tmpA, src */ 461 new_inst = tgsi_default_full_instruction(); 462 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 463 new_inst.Instruction.NumDstRegs = 1; 464 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 465 new_inst.Instruction.NumSrcRegs = 1; 466 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); 467 tctx->emit_instruction(tctx, &new_inst); 468 469 /* SUB dst, src, tmpA */ 470 new_inst = tgsi_default_full_instruction(); 471 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 472 new_inst.Instruction.NumDstRegs = 1; 473 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 474 new_inst.Instruction.NumSrcRegs = 2; 475 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); 476 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 477 new_inst.Src[1].Register.Negate = 1; 478 tctx->emit_instruction(tctx, &new_inst); 479 } 480 } 481 482 /* POW - Power 483 * dst.x = src0.x^{src1.x} 484 * dst.y = src0.x^{src1.x} 485 * dst.z = src0.x^{src1.x} 486 * dst.w = src0.x^{src1.x} 487 * 488 * ; needs: 1 tmp 489 * LG2 tmpA.x, src0.x 490 * MUL tmpA.x, src1.x, tmpA.x 491 * EX2 dst, tmpA.x 492 */ 493 #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2)) 494 #define POW_TMP 1 495 static void 496 transform_pow(struct tgsi_transform_context *tctx, 497 struct tgsi_full_instruction *inst) 498 { 499 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 500 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 501 struct tgsi_full_src_register *src0 = &inst->Src[0]; 502 struct tgsi_full_src_register *src1 = &inst->Src[1]; 503 struct tgsi_full_instruction new_inst; 504 505 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 506 /* LG2 tmpA.x, src0.x */ 507 new_inst = tgsi_default_full_instruction(); 508 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2; 509 new_inst.Instruction.NumDstRegs = 1; 510 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 511 new_inst.Instruction.NumSrcRegs = 1; 512 reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _)); 513 tctx->emit_instruction(tctx, &new_inst); 514 515 /* MUL tmpA.x, src1.x, tmpA.x */ 516 new_inst = tgsi_default_full_instruction(); 517 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 518 new_inst.Instruction.NumDstRegs = 1; 519 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 520 new_inst.Instruction.NumSrcRegs = 2; 521 reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _)); 522 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 523 tctx->emit_instruction(tctx, &new_inst); 524 525 /* EX2 dst, tmpA.x */ 526 new_inst = tgsi_default_full_instruction(); 527 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 528 new_inst.Instruction.NumDstRegs = 1; 529 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 530 new_inst.Instruction.NumSrcRegs = 1; 531 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 532 tctx->emit_instruction(tctx, &new_inst); 533 } 534 } 535 536 /* LIT - Light Coefficients 537 * dst.x = 1.0 538 * dst.y = max(src.x, 0.0) 539 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0 540 * dst.w = 1.0 541 * 542 * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0} 543 * MAX tmpA.xy, src.xy, imm{0.0} 544 * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} 545 * LG2 tmpA.y, tmpA.y 546 * MUL tmpA.y, tmpA.z, tmpA.y 547 * EX2 tmpA.y, tmpA.y 548 * CMP tmpA.y, -src.x, tmpA.y, imm{0.0} 549 * MOV dst.yz, tmpA.xy 550 * MOV dst.xw, imm{1.0} 551 */ 552 #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \ 553 NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1)) 554 #define LIT_TMP 1 555 static void 556 transform_lit(struct tgsi_transform_context *tctx, 557 struct tgsi_full_instruction *inst) 558 { 559 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 560 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 561 struct tgsi_full_src_register *src = &inst->Src[0]; 562 struct tgsi_full_instruction new_inst; 563 564 if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) { 565 /* MAX tmpA.xy, src.xy, imm{0.0} */ 566 new_inst = tgsi_default_full_instruction(); 567 new_inst.Instruction.Opcode = TGSI_OPCODE_MAX; 568 new_inst.Instruction.NumDstRegs = 1; 569 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY); 570 new_inst.Instruction.NumSrcRegs = 2; 571 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _)); 572 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _)); 573 tctx->emit_instruction(tctx, &new_inst); 574 575 /* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} */ 576 new_inst = tgsi_default_full_instruction(); 577 new_inst.Instruction.Opcode = TGSI_OPCODE_CLAMP; 578 new_inst.Instruction.NumDstRegs = 1; 579 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 580 new_inst.Instruction.NumSrcRegs = 3; 581 reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _)); 582 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _)); 583 new_inst.Src[1].Register.Negate = true; 584 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, _, Z, _)); 585 tctx->emit_instruction(tctx, &new_inst); 586 587 /* LG2 tmpA.y, tmpA.y */ 588 new_inst = tgsi_default_full_instruction(); 589 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2; 590 new_inst.Instruction.NumDstRegs = 1; 591 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 592 new_inst.Instruction.NumSrcRegs = 1; 593 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _)); 594 tctx->emit_instruction(tctx, &new_inst); 595 596 /* MUL tmpA.y, tmpA.z, tmpA.y */ 597 new_inst = tgsi_default_full_instruction(); 598 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 599 new_inst.Instruction.NumDstRegs = 1; 600 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 601 new_inst.Instruction.NumSrcRegs = 2; 602 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _)); 603 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); 604 tctx->emit_instruction(tctx, &new_inst); 605 606 /* EX2 tmpA.y, tmpA.y */ 607 new_inst = tgsi_default_full_instruction(); 608 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 609 new_inst.Instruction.NumDstRegs = 1; 610 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 611 new_inst.Instruction.NumSrcRegs = 1; 612 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _)); 613 tctx->emit_instruction(tctx, &new_inst); 614 615 /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */ 616 new_inst = tgsi_default_full_instruction(); 617 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; 618 new_inst.Instruction.NumDstRegs = 1; 619 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 620 new_inst.Instruction.NumSrcRegs = 3; 621 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); 622 new_inst.Src[0].Register.Negate = true; 623 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); 624 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _)); 625 tctx->emit_instruction(tctx, &new_inst); 626 627 /* MOV dst.yz, tmpA.xy */ 628 new_inst = tgsi_default_full_instruction(); 629 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 630 new_inst.Instruction.NumDstRegs = 1; 631 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ); 632 new_inst.Instruction.NumSrcRegs = 1; 633 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _)); 634 tctx->emit_instruction(tctx, &new_inst); 635 } 636 637 if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) { 638 /* MOV dst.xw, imm{1.0} */ 639 new_inst = tgsi_default_full_instruction(); 640 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 641 new_inst.Instruction.NumDstRegs = 1; 642 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW); 643 new_inst.Instruction.NumSrcRegs = 1; 644 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y)); 645 tctx->emit_instruction(tctx, &new_inst); 646 } 647 } 648 649 /* EXP - Approximate Exponential Base 2 650 * dst.x = 2^{\lfloor src.x\rfloor} 651 * dst.y = src.x - \lfloor src.x\rfloor 652 * dst.z = 2^{src.x} 653 * dst.w = 1.0 654 * 655 * ; needs: 1 tmp, imm{1.0} 656 * if (lowering FLR) { 657 * FRC tmpA.x, src.x 658 * SUB tmpA.x, src.x, tmpA.x 659 * } else { 660 * FLR tmpA.x, src.x 661 * } 662 * EX2 tmpA.y, src.x 663 * SUB dst.y, src.x, tmpA.x 664 * EX2 dst.x, tmpA.x 665 * MOV dst.z, tmpA.y 666 * MOV dst.w, imm{1.0} 667 */ 668 #define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \ 669 NINST(1)+ NINST(1) - OINST(1)) 670 #define EXP_TMP 1 671 static void 672 transform_exp(struct tgsi_transform_context *tctx, 673 struct tgsi_full_instruction *inst) 674 { 675 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 676 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 677 struct tgsi_full_src_register *src = &inst->Src[0]; 678 struct tgsi_full_instruction new_inst; 679 680 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { 681 if (ctx->config->lower_FLR) { 682 /* FRC tmpA.x, src.x */ 683 new_inst = tgsi_default_full_instruction(); 684 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 685 new_inst.Instruction.NumDstRegs = 1; 686 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 687 new_inst.Instruction.NumSrcRegs = 1; 688 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 689 tctx->emit_instruction(tctx, &new_inst); 690 691 /* SUB tmpA.x, src.x, tmpA.x */ 692 new_inst = tgsi_default_full_instruction(); 693 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 694 new_inst.Instruction.NumDstRegs = 1; 695 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 696 new_inst.Instruction.NumSrcRegs = 2; 697 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 698 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 699 new_inst.Src[1].Register.Negate = 1; 700 tctx->emit_instruction(tctx, &new_inst); 701 } else { 702 /* FLR tmpA.x, src.x */ 703 new_inst = tgsi_default_full_instruction(); 704 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 705 new_inst.Instruction.NumDstRegs = 1; 706 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 707 new_inst.Instruction.NumSrcRegs = 1; 708 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 709 tctx->emit_instruction(tctx, &new_inst); 710 } 711 } 712 713 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { 714 /* EX2 tmpA.y, src.x */ 715 new_inst = tgsi_default_full_instruction(); 716 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 717 new_inst.Instruction.NumDstRegs = 1; 718 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 719 new_inst.Instruction.NumSrcRegs = 1; 720 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 721 tctx->emit_instruction(tctx, &new_inst); 722 } 723 724 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { 725 /* SUB dst.y, src.x, tmpA.x */ 726 new_inst = tgsi_default_full_instruction(); 727 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 728 new_inst.Instruction.NumDstRegs = 1; 729 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); 730 new_inst.Instruction.NumSrcRegs = 2; 731 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); 732 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 733 new_inst.Src[1].Register.Negate = 1; 734 tctx->emit_instruction(tctx, &new_inst); 735 } 736 737 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) { 738 /* EX2 dst.x, tmpA.x */ 739 new_inst = tgsi_default_full_instruction(); 740 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 741 new_inst.Instruction.NumDstRegs = 1; 742 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X); 743 new_inst.Instruction.NumSrcRegs = 1; 744 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 745 tctx->emit_instruction(tctx, &new_inst); 746 } 747 748 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { 749 /* MOV dst.z, tmpA.y */ 750 new_inst = tgsi_default_full_instruction(); 751 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 752 new_inst.Instruction.NumDstRegs = 1; 753 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z); 754 new_inst.Instruction.NumSrcRegs = 1; 755 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _)); 756 tctx->emit_instruction(tctx, &new_inst); 757 } 758 759 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { 760 /* MOV dst.w, imm{1.0} */ 761 new_inst = tgsi_default_full_instruction(); 762 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 763 new_inst.Instruction.NumDstRegs = 1; 764 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); 765 new_inst.Instruction.NumSrcRegs = 1; 766 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y)); 767 tctx->emit_instruction(tctx, &new_inst); 768 } 769 } 770 771 /* LOG - Approximate Logarithm Base 2 772 * dst.x = \lfloor\log_2{|src.x|}\rfloor 773 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}} 774 * dst.z = \log_2{|src.x|} 775 * dst.w = 1.0 776 * 777 * ; needs: 1 tmp, imm{1.0} 778 * LG2 tmpA.x, |src.x| 779 * if (lowering FLR) { 780 * FRC tmpA.y, tmpA.x 781 * SUB tmpA.y, tmpA.x, tmpA.y 782 * } else { 783 * FLR tmpA.y, tmpA.x 784 * } 785 * EX2 tmpA.z, tmpA.y 786 * RCP tmpA.z, tmpA.z 787 * MUL dst.y, |src.x|, tmpA.z 788 * MOV dst.xz, tmpA.yx 789 * MOV dst.w, imm{1.0} 790 */ 791 #define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \ 792 NINST(2) + NINST(1) + NINST(1) - OINST(1)) 793 #define LOG_TMP 1 794 static void 795 transform_log(struct tgsi_transform_context *tctx, 796 struct tgsi_full_instruction *inst) 797 { 798 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 799 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 800 struct tgsi_full_src_register *src = &inst->Src[0]; 801 struct tgsi_full_instruction new_inst; 802 803 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) { 804 /* LG2 tmpA.x, |src.x| */ 805 new_inst = tgsi_default_full_instruction(); 806 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2; 807 new_inst.Instruction.NumDstRegs = 1; 808 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 809 new_inst.Instruction.NumSrcRegs = 1; 810 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 811 new_inst.Src[0].Register.Absolute = true; 812 tctx->emit_instruction(tctx, &new_inst); 813 } 814 815 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { 816 if (ctx->config->lower_FLR) { 817 /* FRC tmpA.y, tmpA.x */ 818 new_inst = tgsi_default_full_instruction(); 819 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 820 new_inst.Instruction.NumDstRegs = 1; 821 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 822 new_inst.Instruction.NumSrcRegs = 1; 823 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 824 tctx->emit_instruction(tctx, &new_inst); 825 826 /* SUB tmpA.y, tmpA.x, tmpA.y */ 827 new_inst = tgsi_default_full_instruction(); 828 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 829 new_inst.Instruction.NumDstRegs = 1; 830 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 831 new_inst.Instruction.NumSrcRegs = 2; 832 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 833 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); 834 new_inst.Src[1].Register.Negate = 1; 835 tctx->emit_instruction(tctx, &new_inst); 836 } else { 837 /* FLR tmpA.y, tmpA.x */ 838 new_inst = tgsi_default_full_instruction(); 839 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 840 new_inst.Instruction.NumDstRegs = 1; 841 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 842 new_inst.Instruction.NumSrcRegs = 1; 843 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 844 tctx->emit_instruction(tctx, &new_inst); 845 } 846 } 847 848 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { 849 /* EX2 tmpA.z, tmpA.y */ 850 new_inst = tgsi_default_full_instruction(); 851 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 852 new_inst.Instruction.NumDstRegs = 1; 853 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 854 new_inst.Instruction.NumSrcRegs = 1; 855 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _)); 856 tctx->emit_instruction(tctx, &new_inst); 857 858 /* RCP tmpA.z, tmpA.z */ 859 new_inst = tgsi_default_full_instruction(); 860 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP; 861 new_inst.Instruction.NumDstRegs = 1; 862 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 863 new_inst.Instruction.NumSrcRegs = 1; 864 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _)); 865 tctx->emit_instruction(tctx, &new_inst); 866 867 /* MUL dst.y, |src.x|, tmpA.z */ 868 new_inst = tgsi_default_full_instruction(); 869 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 870 new_inst.Instruction.NumDstRegs = 1; 871 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); 872 new_inst.Instruction.NumSrcRegs = 2; 873 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); 874 new_inst.Src[0].Register.Absolute = true; 875 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _)); 876 tctx->emit_instruction(tctx, &new_inst); 877 } 878 879 if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) { 880 /* MOV dst.xz, tmpA.yx */ 881 new_inst = tgsi_default_full_instruction(); 882 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 883 new_inst.Instruction.NumDstRegs = 1; 884 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ); 885 new_inst.Instruction.NumSrcRegs = 1; 886 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _)); 887 tctx->emit_instruction(tctx, &new_inst); 888 } 889 890 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { 891 /* MOV dst.w, imm{1.0} */ 892 new_inst = tgsi_default_full_instruction(); 893 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 894 new_inst.Instruction.NumDstRegs = 1; 895 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); 896 new_inst.Instruction.NumSrcRegs = 1; 897 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y)); 898 tctx->emit_instruction(tctx, &new_inst); 899 } 900 } 901 902 /* DP4 - 4-component Dot Product 903 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w 904 * 905 * DP3 - 3-component Dot Product 906 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z 907 * 908 * DPH - Homogeneous Dot Product 909 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w 910 * 911 * DP2 - 2-component Dot Product 912 * dst = src0.x \times src1.x + src0.y \times src1.y 913 * 914 * DP2A - 2-component Dot Product And Add 915 * dst = src0.x \times src1.x + src0.y \times src1.y + src2.x 916 * 917 * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar 918 * operations, which is what you'd prefer for a ISA that is natively 919 * scalar. Probably a native vector ISA would at least already have 920 * DP4/DP3 instructions, but perhaps there is room for an alternative 921 * translation for DPH/DP2/DP2A using vector instructions. 922 * 923 * ; needs: 1 tmp 924 * MUL tmpA.x, src0.x, src1.x 925 * MAD tmpA.x, src0.y, src1.y, tmpA.x 926 * if (DPH || DP3 || DP4) { 927 * MAD tmpA.x, src0.z, src1.z, tmpA.x 928 * if (DPH) { 929 * ADD tmpA.x, src1.w, tmpA.x 930 * } else if (DP4) { 931 * MAD tmpA.x, src0.w, src1.w, tmpA.x 932 * } 933 * } else if (DP2A) { 934 * ADD tmpA.x, src2.x, tmpA.x 935 * } 936 * ; fixup last instruction to replicate into dst 937 */ 938 #define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2)) 939 #define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2)) 940 #define DPH_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2)) 941 #define DP2_GROW (NINST(2) + NINST(3) - OINST(2)) 942 #define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3)) 943 #define DOTP_TMP 1 944 static void 945 transform_dotp(struct tgsi_transform_context *tctx, 946 struct tgsi_full_instruction *inst) 947 { 948 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 949 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 950 struct tgsi_full_src_register *src0 = &inst->Src[0]; 951 struct tgsi_full_src_register *src1 = &inst->Src[1]; 952 struct tgsi_full_src_register *src2 = &inst->Src[2]; /* only DP2A */ 953 struct tgsi_full_instruction new_inst; 954 unsigned opcode = inst->Instruction.Opcode; 955 956 /* NOTE: any potential last instruction must replicate src on all 957 * components (since it could be re-written to write to final dst) 958 */ 959 960 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 961 /* MUL tmpA.x, src0.x, src1.x */ 962 new_inst = tgsi_default_full_instruction(); 963 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 964 new_inst.Instruction.NumDstRegs = 1; 965 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 966 new_inst.Instruction.NumSrcRegs = 2; 967 reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _)); 968 reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _)); 969 tctx->emit_instruction(tctx, &new_inst); 970 971 /* MAD tmpA.x, src0.y, src1.y, tmpA.x */ 972 new_inst = tgsi_default_full_instruction(); 973 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 974 new_inst.Instruction.NumDstRegs = 1; 975 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 976 new_inst.Instruction.NumSrcRegs = 3; 977 reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y)); 978 reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y)); 979 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); 980 981 if ((opcode == TGSI_OPCODE_DPH) || 982 (opcode == TGSI_OPCODE_DP3) || 983 (opcode == TGSI_OPCODE_DP4)) { 984 tctx->emit_instruction(tctx, &new_inst); 985 986 /* MAD tmpA.x, src0.z, src1.z, tmpA.x */ 987 new_inst = tgsi_default_full_instruction(); 988 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 989 new_inst.Instruction.NumDstRegs = 1; 990 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 991 new_inst.Instruction.NumSrcRegs = 3; 992 reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z)); 993 reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z)); 994 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); 995 996 if (opcode == TGSI_OPCODE_DPH) { 997 tctx->emit_instruction(tctx, &new_inst); 998 999 /* ADD tmpA.x, src1.w, tmpA.x */ 1000 new_inst = tgsi_default_full_instruction(); 1001 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 1002 new_inst.Instruction.NumDstRegs = 1; 1003 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 1004 new_inst.Instruction.NumSrcRegs = 2; 1005 reg_src(&new_inst.Src[0], src1, SWIZ(W, W, W, W)); 1006 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X)); 1007 } else if (opcode == TGSI_OPCODE_DP4) { 1008 tctx->emit_instruction(tctx, &new_inst); 1009 1010 /* MAD tmpA.x, src0.w, src1.w, tmpA.x */ 1011 new_inst = tgsi_default_full_instruction(); 1012 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 1013 new_inst.Instruction.NumDstRegs = 1; 1014 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 1015 new_inst.Instruction.NumSrcRegs = 3; 1016 reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W)); 1017 reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W)); 1018 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); 1019 } 1020 } else if (opcode == TGSI_OPCODE_DP2A) { 1021 tctx->emit_instruction(tctx, &new_inst); 1022 1023 /* ADD tmpA.x, src2.x, tmpA.x */ 1024 new_inst = tgsi_default_full_instruction(); 1025 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 1026 new_inst.Instruction.NumDstRegs = 1; 1027 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 1028 new_inst.Instruction.NumSrcRegs = 2; 1029 reg_src(&new_inst.Src[0], src2, SWIZ(X, X, X, X)); 1030 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, X, X, X)); 1031 } 1032 1033 /* fixup last instruction to write to dst: */ 1034 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 1035 1036 tctx->emit_instruction(tctx, &new_inst); 1037 } 1038 } 1039 1040 /* FLR - floor, CEIL - ceil 1041 * ; needs: 1 tmp 1042 * if (CEIL) { 1043 * FRC tmpA, -src 1044 * ADD dst, src, tmpA 1045 * } else { 1046 * FRC tmpA, src 1047 * SUB dst, src, tmpA 1048 * } 1049 */ 1050 #define FLR_GROW (NINST(1) + NINST(2) - OINST(1)) 1051 #define CEIL_GROW (NINST(1) + NINST(2) - OINST(1)) 1052 #define FLR_TMP 1 1053 #define CEIL_TMP 1 1054 static void 1055 transform_flr_ceil(struct tgsi_transform_context *tctx, 1056 struct tgsi_full_instruction *inst) 1057 { 1058 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1059 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 1060 struct tgsi_full_src_register *src0 = &inst->Src[0]; 1061 struct tgsi_full_instruction new_inst; 1062 unsigned opcode = inst->Instruction.Opcode; 1063 1064 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 1065 /* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */ 1066 new_inst = tgsi_default_full_instruction(); 1067 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 1068 new_inst.Instruction.NumDstRegs = 1; 1069 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 1070 new_inst.Instruction.NumSrcRegs = 1; 1071 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 1072 1073 if (opcode == TGSI_OPCODE_CEIL) 1074 new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate; 1075 tctx->emit_instruction(tctx, &new_inst); 1076 1077 /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */ 1078 new_inst = tgsi_default_full_instruction(); 1079 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 1080 new_inst.Instruction.NumDstRegs = 1; 1081 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 1082 new_inst.Instruction.NumSrcRegs = 2; 1083 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 1084 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 1085 if (opcode == TGSI_OPCODE_FLR) 1086 new_inst.Src[1].Register.Negate = 1; 1087 tctx->emit_instruction(tctx, &new_inst); 1088 } 1089 } 1090 1091 /* TRUNC - truncate off fractional part 1092 * dst.x = trunc(src.x) 1093 * dst.y = trunc(src.y) 1094 * dst.z = trunc(src.z) 1095 * dst.w = trunc(src.w) 1096 * 1097 * ; needs: 1 tmp 1098 * if (lower FLR) { 1099 * FRC tmpA, |src| 1100 * SUB tmpA, |src|, tmpA 1101 * } else { 1102 * FLR tmpA, |src| 1103 * } 1104 * CMP dst, src, -tmpA, tmpA 1105 */ 1106 #define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1)) 1107 #define TRUNC_TMP 1 1108 static void 1109 transform_trunc(struct tgsi_transform_context *tctx, 1110 struct tgsi_full_instruction *inst) 1111 { 1112 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1113 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 1114 struct tgsi_full_src_register *src0 = &inst->Src[0]; 1115 struct tgsi_full_instruction new_inst; 1116 1117 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 1118 if (ctx->config->lower_FLR) { 1119 new_inst = tgsi_default_full_instruction(); 1120 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 1121 new_inst.Instruction.NumDstRegs = 1; 1122 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 1123 new_inst.Instruction.NumSrcRegs = 1; 1124 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 1125 new_inst.Src[0].Register.Absolute = true; 1126 new_inst.Src[0].Register.Negate = false; 1127 tctx->emit_instruction(tctx, &new_inst); 1128 1129 new_inst = tgsi_default_full_instruction(); 1130 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 1131 new_inst.Instruction.NumDstRegs = 1; 1132 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 1133 new_inst.Instruction.NumSrcRegs = 2; 1134 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 1135 new_inst.Src[0].Register.Absolute = true; 1136 new_inst.Src[0].Register.Negate = false; 1137 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 1138 new_inst.Src[1].Register.Negate = 1; 1139 tctx->emit_instruction(tctx, &new_inst); 1140 } else { 1141 new_inst = tgsi_default_full_instruction(); 1142 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 1143 new_inst.Instruction.NumDstRegs = 1; 1144 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 1145 new_inst.Instruction.NumSrcRegs = 1; 1146 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 1147 new_inst.Src[0].Register.Absolute = true; 1148 new_inst.Src[0].Register.Negate = false; 1149 tctx->emit_instruction(tctx, &new_inst); 1150 } 1151 1152 new_inst = tgsi_default_full_instruction(); 1153 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; 1154 new_inst.Instruction.NumDstRegs = 1; 1155 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 1156 new_inst.Instruction.NumSrcRegs = 3; 1157 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 1158 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 1159 new_inst.Src[1].Register.Negate = true; 1160 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 1161 tctx->emit_instruction(tctx, &new_inst); 1162 } 1163 } 1164 1165 /* Inserts a MOV_SAT for the needed components of tex coord. Note that 1166 * in the case of TXP, the clamping must happen *after* projection, so 1167 * we need to lower TXP to TEX. 1168 * 1169 * MOV tmpA, src0 1170 * if (opc == TXP) { 1171 * ; do perspective division manually before clamping: 1172 * RCP tmpB, tmpA.w 1173 * MUL tmpB.<pmask>, tmpA, tmpB.xxxx 1174 * opc = TEX; 1175 * } 1176 * MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords 1177 * <opc> dst, tmpA, ... 1178 */ 1179 #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1)) 1180 #define SAMP_TMP 2 1181 static int 1182 transform_samp(struct tgsi_transform_context *tctx, 1183 struct tgsi_full_instruction *inst) 1184 { 1185 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1186 struct tgsi_full_src_register *coord = &inst->Src[0]; 1187 struct tgsi_full_src_register *samp; 1188 struct tgsi_full_instruction new_inst; 1189 /* mask is clamped coords, pmask is all coords (for projection): */ 1190 unsigned mask = 0, pmask = 0, smask; 1191 unsigned tex = inst->Texture.Texture; 1192 unsigned opcode = inst->Instruction.Opcode; 1193 bool lower_txp = (opcode == TGSI_OPCODE_TXP) && 1194 (ctx->config->lower_TXP & (1 << tex)); 1195 1196 if (opcode == TGSI_OPCODE_TXB2) { 1197 samp = &inst->Src[2]; 1198 } else { 1199 samp = &inst->Src[1]; 1200 } 1201 1202 /* convert sampler # to bitmask to test: */ 1203 smask = 1 << samp->Register.Index; 1204 1205 /* check if we actually need to lower this one: */ 1206 if (!(ctx->saturate & smask) && !lower_txp) 1207 return -1; 1208 1209 /* figure out which coordinates need saturating: 1210 * - RECT textures should not get saturated 1211 * - array index coords should not get saturated 1212 */ 1213 switch (tex) { 1214 case TGSI_TEXTURE_3D: 1215 case TGSI_TEXTURE_CUBE: 1216 case TGSI_TEXTURE_CUBE_ARRAY: 1217 case TGSI_TEXTURE_SHADOWCUBE: 1218 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 1219 if (ctx->config->saturate_r & smask) 1220 mask |= TGSI_WRITEMASK_Z; 1221 pmask |= TGSI_WRITEMASK_Z; 1222 /* fallthrough */ 1223 1224 case TGSI_TEXTURE_2D: 1225 case TGSI_TEXTURE_2D_ARRAY: 1226 case TGSI_TEXTURE_SHADOW2D: 1227 case TGSI_TEXTURE_SHADOW2D_ARRAY: 1228 case TGSI_TEXTURE_2D_MSAA: 1229 case TGSI_TEXTURE_2D_ARRAY_MSAA: 1230 if (ctx->config->saturate_t & smask) 1231 mask |= TGSI_WRITEMASK_Y; 1232 pmask |= TGSI_WRITEMASK_Y; 1233 /* fallthrough */ 1234 1235 case TGSI_TEXTURE_1D: 1236 case TGSI_TEXTURE_1D_ARRAY: 1237 case TGSI_TEXTURE_SHADOW1D: 1238 case TGSI_TEXTURE_SHADOW1D_ARRAY: 1239 if (ctx->config->saturate_s & smask) 1240 mask |= TGSI_WRITEMASK_X; 1241 pmask |= TGSI_WRITEMASK_X; 1242 break; 1243 1244 case TGSI_TEXTURE_RECT: 1245 case TGSI_TEXTURE_SHADOWRECT: 1246 /* we don't saturate, but in case of lower_txp we 1247 * still need to do the perspective divide: 1248 */ 1249 pmask = TGSI_WRITEMASK_XY; 1250 break; 1251 } 1252 1253 /* sanity check.. driver could be asking to saturate a non- 1254 * existent coordinate component: 1255 */ 1256 if (!mask && !lower_txp) 1257 return -1; 1258 1259 /* MOV tmpA, src0 */ 1260 create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0); 1261 1262 /* This is a bit sad.. we need to clamp *after* the coords 1263 * are projected, which means lowering TXP to TEX and doing 1264 * the projection ourself. But since I haven't figured out 1265 * how to make the lowering code deliver an electric shock 1266 * to anyone using GL_CLAMP, we must do this instead: 1267 */ 1268 if (opcode == TGSI_OPCODE_TXP) { 1269 /* RCP tmpB.x tmpA.w */ 1270 new_inst = tgsi_default_full_instruction(); 1271 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP; 1272 new_inst.Instruction.NumDstRegs = 1; 1273 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X); 1274 new_inst.Instruction.NumSrcRegs = 1; 1275 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _)); 1276 tctx->emit_instruction(tctx, &new_inst); 1277 1278 /* MUL tmpA.mask, tmpA, tmpB.xxxx */ 1279 new_inst = tgsi_default_full_instruction(); 1280 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 1281 new_inst.Instruction.NumDstRegs = 1; 1282 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask); 1283 new_inst.Instruction.NumSrcRegs = 2; 1284 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 1285 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X)); 1286 tctx->emit_instruction(tctx, &new_inst); 1287 1288 opcode = TGSI_OPCODE_TEX; 1289 } 1290 1291 /* MOV_SAT tmpA.<mask>, tmpA */ 1292 if (mask) { 1293 create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1); 1294 } 1295 1296 /* modify the texture samp instruction to take fixed up coord: */ 1297 new_inst = *inst; 1298 new_inst.Instruction.Opcode = opcode; 1299 new_inst.Src[0] = ctx->tmp[A].src; 1300 tctx->emit_instruction(tctx, &new_inst); 1301 1302 return 0; 1303 } 1304 1305 /* Two-sided color emulation: 1306 * For each COLOR input, create a corresponding BCOLOR input, plus 1307 * CMP instruction to select front or back color based on FACE 1308 */ 1309 #define TWOSIDE_GROW(n) ( \ 1310 2 + /* FACE */ \ 1311 ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\ 1312 ((n) * 1) + /* TEMP[] */ \ 1313 ((n) * NINST(3)) /* CMP instr */ \ 1314 ) 1315 1316 static void 1317 emit_twoside(struct tgsi_transform_context *tctx) 1318 { 1319 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1320 struct tgsi_shader_info *info = ctx->info; 1321 struct tgsi_full_declaration decl; 1322 struct tgsi_full_instruction new_inst; 1323 unsigned inbase, tmpbase; 1324 int i; 1325 1326 inbase = info->file_max[TGSI_FILE_INPUT] + 1; 1327 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; 1328 1329 /* additional inputs for BCOLOR's */ 1330 for (i = 0; i < ctx->two_side_colors; i++) { 1331 unsigned in_idx = ctx->two_side_idx[i]; 1332 decl = tgsi_default_full_declaration(); 1333 decl.Declaration.File = TGSI_FILE_INPUT; 1334 decl.Declaration.Semantic = true; 1335 decl.Range.First = decl.Range.Last = inbase + i; 1336 decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR; 1337 decl.Semantic.Index = info->input_semantic_index[in_idx]; 1338 decl.Declaration.Interpolate = true; 1339 decl.Interp.Interpolate = info->input_interpolate[in_idx]; 1340 decl.Interp.Location = info->input_interpolate_loc[in_idx]; 1341 decl.Interp.CylindricalWrap = info->input_cylindrical_wrap[in_idx]; 1342 tctx->emit_declaration(tctx, &decl); 1343 } 1344 1345 /* additional input for FACE */ 1346 if (ctx->two_side_colors && (ctx->face_idx == -1)) { 1347 decl = tgsi_default_full_declaration(); 1348 decl.Declaration.File = TGSI_FILE_INPUT; 1349 decl.Declaration.Semantic = true; 1350 decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors; 1351 decl.Semantic.Name = TGSI_SEMANTIC_FACE; 1352 decl.Semantic.Index = 0; 1353 tctx->emit_declaration(tctx, &decl); 1354 1355 ctx->face_idx = decl.Range.First; 1356 } 1357 1358 /* additional temps for COLOR/BCOLOR selection: */ 1359 for (i = 0; i < ctx->two_side_colors; i++) { 1360 decl = tgsi_default_full_declaration(); 1361 decl.Declaration.File = TGSI_FILE_TEMPORARY; 1362 decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i; 1363 tctx->emit_declaration(tctx, &decl); 1364 } 1365 1366 /* and finally additional instructions to select COLOR/BCOLOR: */ 1367 for (i = 0; i < ctx->two_side_colors; i++) { 1368 new_inst = tgsi_default_full_instruction(); 1369 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; 1370 1371 new_inst.Instruction.NumDstRegs = 1; 1372 new_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 1373 new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i; 1374 new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; 1375 1376 new_inst.Instruction.NumSrcRegs = 3; 1377 new_inst.Src[0].Register.File = TGSI_FILE_INPUT; 1378 new_inst.Src[0].Register.Index = ctx->face_idx; 1379 new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; 1380 new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; 1381 new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; 1382 new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X; 1383 new_inst.Src[1].Register.File = TGSI_FILE_INPUT; 1384 new_inst.Src[1].Register.Index = inbase + i; 1385 new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X; 1386 new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y; 1387 new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z; 1388 new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W; 1389 new_inst.Src[2].Register.File = TGSI_FILE_INPUT; 1390 new_inst.Src[2].Register.Index = ctx->two_side_idx[i]; 1391 new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X; 1392 new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y; 1393 new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z; 1394 new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W; 1395 1396 tctx->emit_instruction(tctx, &new_inst); 1397 } 1398 } 1399 1400 static void 1401 emit_decls(struct tgsi_transform_context *tctx) 1402 { 1403 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1404 struct tgsi_shader_info *info = ctx->info; 1405 struct tgsi_full_declaration decl; 1406 struct tgsi_full_immediate immed; 1407 unsigned tmpbase; 1408 int i; 1409 1410 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; 1411 1412 ctx->color_base = tmpbase + ctx->numtmp; 1413 1414 /* declare immediate: */ 1415 immed = tgsi_default_full_immediate(); 1416 immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */ 1417 immed.u[0].Float = 0.0; 1418 immed.u[1].Float = 1.0; 1419 immed.u[2].Float = 128.0; 1420 immed.u[3].Float = 0.0; 1421 tctx->emit_immediate(tctx, &immed); 1422 1423 ctx->imm.Register.File = TGSI_FILE_IMMEDIATE; 1424 ctx->imm.Register.Index = info->immediate_count; 1425 ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X; 1426 ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y; 1427 ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z; 1428 ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W; 1429 1430 /* declare temp regs: */ 1431 for (i = 0; i < ctx->numtmp; i++) { 1432 decl = tgsi_default_full_declaration(); 1433 decl.Declaration.File = TGSI_FILE_TEMPORARY; 1434 decl.Range.First = decl.Range.Last = tmpbase + i; 1435 tctx->emit_declaration(tctx, &decl); 1436 1437 ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY; 1438 ctx->tmp[i].src.Register.Index = tmpbase + i; 1439 ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X; 1440 ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y; 1441 ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z; 1442 ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W; 1443 1444 ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY; 1445 ctx->tmp[i].dst.Register.Index = tmpbase + i; 1446 ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW; 1447 } 1448 1449 if (ctx->two_side_colors) 1450 emit_twoside(tctx); 1451 } 1452 1453 static void 1454 rename_color_inputs(struct tgsi_lowering_context *ctx, 1455 struct tgsi_full_instruction *inst) 1456 { 1457 unsigned i, j; 1458 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1459 struct tgsi_src_register *src = &inst->Src[i].Register; 1460 if (src->File == TGSI_FILE_INPUT) { 1461 for (j = 0; j < ctx->two_side_colors; j++) { 1462 if (src->Index == ctx->two_side_idx[j]) { 1463 src->File = TGSI_FILE_TEMPORARY; 1464 src->Index = ctx->color_base + j; 1465 break; 1466 } 1467 } 1468 } 1469 } 1470 1471 } 1472 1473 static void 1474 transform_instr(struct tgsi_transform_context *tctx, 1475 struct tgsi_full_instruction *inst) 1476 { 1477 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1478 1479 if (!ctx->emitted_decls) { 1480 emit_decls(tctx); 1481 ctx->emitted_decls = 1; 1482 } 1483 1484 /* if emulating two-sided-color, we need to re-write some 1485 * src registers: 1486 */ 1487 if (ctx->two_side_colors) 1488 rename_color_inputs(ctx, inst); 1489 1490 switch (inst->Instruction.Opcode) { 1491 case TGSI_OPCODE_DST: 1492 if (!ctx->config->lower_DST) 1493 goto skip; 1494 transform_dst(tctx, inst); 1495 break; 1496 case TGSI_OPCODE_XPD: 1497 if (!ctx->config->lower_XPD) 1498 goto skip; 1499 transform_xpd(tctx, inst); 1500 break; 1501 case TGSI_OPCODE_SCS: 1502 if (!ctx->config->lower_SCS) 1503 goto skip; 1504 transform_scs(tctx, inst); 1505 break; 1506 case TGSI_OPCODE_LRP: 1507 if (!ctx->config->lower_LRP) 1508 goto skip; 1509 transform_lrp(tctx, inst); 1510 break; 1511 case TGSI_OPCODE_FRC: 1512 if (!ctx->config->lower_FRC) 1513 goto skip; 1514 transform_frc(tctx, inst); 1515 break; 1516 case TGSI_OPCODE_POW: 1517 if (!ctx->config->lower_POW) 1518 goto skip; 1519 transform_pow(tctx, inst); 1520 break; 1521 case TGSI_OPCODE_LIT: 1522 if (!ctx->config->lower_LIT) 1523 goto skip; 1524 transform_lit(tctx, inst); 1525 break; 1526 case TGSI_OPCODE_EXP: 1527 if (!ctx->config->lower_EXP) 1528 goto skip; 1529 transform_exp(tctx, inst); 1530 break; 1531 case TGSI_OPCODE_LOG: 1532 if (!ctx->config->lower_LOG) 1533 goto skip; 1534 transform_log(tctx, inst); 1535 break; 1536 case TGSI_OPCODE_DP4: 1537 if (!ctx->config->lower_DP4) 1538 goto skip; 1539 transform_dotp(tctx, inst); 1540 break; 1541 case TGSI_OPCODE_DP3: 1542 if (!ctx->config->lower_DP3) 1543 goto skip; 1544 transform_dotp(tctx, inst); 1545 break; 1546 case TGSI_OPCODE_DPH: 1547 if (!ctx->config->lower_DPH) 1548 goto skip; 1549 transform_dotp(tctx, inst); 1550 break; 1551 case TGSI_OPCODE_DP2: 1552 if (!ctx->config->lower_DP2) 1553 goto skip; 1554 transform_dotp(tctx, inst); 1555 break; 1556 case TGSI_OPCODE_DP2A: 1557 if (!ctx->config->lower_DP2A) 1558 goto skip; 1559 transform_dotp(tctx, inst); 1560 break; 1561 case TGSI_OPCODE_FLR: 1562 if (!ctx->config->lower_FLR) 1563 goto skip; 1564 transform_flr_ceil(tctx, inst); 1565 break; 1566 case TGSI_OPCODE_CEIL: 1567 if (!ctx->config->lower_CEIL) 1568 goto skip; 1569 transform_flr_ceil(tctx, inst); 1570 break; 1571 case TGSI_OPCODE_TRUNC: 1572 if (!ctx->config->lower_TRUNC) 1573 goto skip; 1574 transform_trunc(tctx, inst); 1575 break; 1576 case TGSI_OPCODE_TEX: 1577 case TGSI_OPCODE_TXP: 1578 case TGSI_OPCODE_TXB: 1579 case TGSI_OPCODE_TXB2: 1580 case TGSI_OPCODE_TXL: 1581 if (transform_samp(tctx, inst)) 1582 goto skip; 1583 break; 1584 default: 1585 skip: 1586 tctx->emit_instruction(tctx, inst); 1587 break; 1588 } 1589 } 1590 1591 /* returns NULL if no lowering required, else returns the new 1592 * tokens (which caller is required to free()). In either case 1593 * returns the current info. 1594 */ 1595 const struct tgsi_token * 1596 tgsi_transform_lowering(const struct tgsi_lowering_config *config, 1597 const struct tgsi_token *tokens, 1598 struct tgsi_shader_info *info) 1599 { 1600 struct tgsi_lowering_context ctx; 1601 struct tgsi_token *newtoks; 1602 int newlen, numtmp; 1603 1604 /* sanity check in case limit is ever increased: */ 1605 STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS); 1606 1607 /* sanity check the lowering */ 1608 assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL))); 1609 assert(!(config->lower_FRC && config->lower_TRUNC)); 1610 1611 memset(&ctx, 0, sizeof(ctx)); 1612 ctx.base.transform_instruction = transform_instr; 1613 ctx.info = info; 1614 ctx.config = config; 1615 1616 tgsi_scan_shader(tokens, info); 1617 1618 /* if we are adding fragment shader support to emulate two-sided 1619 * color, then figure out the number of additional inputs we need 1620 * to create for BCOLOR's.. 1621 */ 1622 if ((info->processor == PIPE_SHADER_FRAGMENT) && 1623 config->color_two_side) { 1624 int i; 1625 ctx.face_idx = -1; 1626 for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) { 1627 if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR) 1628 ctx.two_side_idx[ctx.two_side_colors++] = i; 1629 if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE) 1630 ctx.face_idx = i; 1631 } 1632 } 1633 1634 ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t; 1635 1636 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0) 1637 /* if there are no instructions to lower, then we are done: */ 1638 if (!(OPCS(DST) || 1639 OPCS(XPD) || 1640 OPCS(SCS) || 1641 OPCS(LRP) || 1642 OPCS(FRC) || 1643 OPCS(POW) || 1644 OPCS(LIT) || 1645 OPCS(EXP) || 1646 OPCS(LOG) || 1647 OPCS(DP4) || 1648 OPCS(DP3) || 1649 OPCS(DPH) || 1650 OPCS(DP2) || 1651 OPCS(DP2A) || 1652 OPCS(FLR) || 1653 OPCS(CEIL) || 1654 OPCS(TRUNC) || 1655 OPCS(TXP) || 1656 ctx.two_side_colors || 1657 ctx.saturate)) 1658 return NULL; 1659 1660 #if 0 /* debug */ 1661 _debug_printf("BEFORE:"); 1662 tgsi_dump(tokens, 0); 1663 #endif 1664 1665 numtmp = 0; 1666 newlen = tgsi_num_tokens(tokens); 1667 if (OPCS(DST)) { 1668 newlen += DST_GROW * OPCS(DST); 1669 numtmp = MAX2(numtmp, DST_TMP); 1670 } 1671 if (OPCS(XPD)) { 1672 newlen += XPD_GROW * OPCS(XPD); 1673 numtmp = MAX2(numtmp, XPD_TMP); 1674 } 1675 if (OPCS(SCS)) { 1676 newlen += SCS_GROW * OPCS(SCS); 1677 numtmp = MAX2(numtmp, SCS_TMP); 1678 } 1679 if (OPCS(LRP)) { 1680 newlen += LRP_GROW * OPCS(LRP); 1681 numtmp = MAX2(numtmp, LRP_TMP); 1682 } 1683 if (OPCS(FRC)) { 1684 newlen += FRC_GROW * OPCS(FRC); 1685 numtmp = MAX2(numtmp, FRC_TMP); 1686 } 1687 if (OPCS(POW)) { 1688 newlen += POW_GROW * OPCS(POW); 1689 numtmp = MAX2(numtmp, POW_TMP); 1690 } 1691 if (OPCS(LIT)) { 1692 newlen += LIT_GROW * OPCS(LIT); 1693 numtmp = MAX2(numtmp, LIT_TMP); 1694 } 1695 if (OPCS(EXP)) { 1696 newlen += EXP_GROW * OPCS(EXP); 1697 numtmp = MAX2(numtmp, EXP_TMP); 1698 } 1699 if (OPCS(LOG)) { 1700 newlen += LOG_GROW * OPCS(LOG); 1701 numtmp = MAX2(numtmp, LOG_TMP); 1702 } 1703 if (OPCS(DP4)) { 1704 newlen += DP4_GROW * OPCS(DP4); 1705 numtmp = MAX2(numtmp, DOTP_TMP); 1706 } 1707 if (OPCS(DP3)) { 1708 newlen += DP3_GROW * OPCS(DP3); 1709 numtmp = MAX2(numtmp, DOTP_TMP); 1710 } 1711 if (OPCS(DPH)) { 1712 newlen += DPH_GROW * OPCS(DPH); 1713 numtmp = MAX2(numtmp, DOTP_TMP); 1714 } 1715 if (OPCS(DP2)) { 1716 newlen += DP2_GROW * OPCS(DP2); 1717 numtmp = MAX2(numtmp, DOTP_TMP); 1718 } 1719 if (OPCS(DP2A)) { 1720 newlen += DP2A_GROW * OPCS(DP2A); 1721 numtmp = MAX2(numtmp, DOTP_TMP); 1722 } 1723 if (OPCS(FLR)) { 1724 newlen += FLR_GROW * OPCS(FLR); 1725 numtmp = MAX2(numtmp, FLR_TMP); 1726 } 1727 if (OPCS(CEIL)) { 1728 newlen += CEIL_GROW * OPCS(CEIL); 1729 numtmp = MAX2(numtmp, CEIL_TMP); 1730 } 1731 if (OPCS(TRUNC)) { 1732 newlen += TRUNC_GROW * OPCS(TRUNC); 1733 numtmp = MAX2(numtmp, TRUNC_TMP); 1734 } 1735 if (ctx.saturate || config->lower_TXP) { 1736 int n = 0; 1737 1738 if (ctx.saturate) { 1739 n = info->opcode_count[TGSI_OPCODE_TEX] + 1740 info->opcode_count[TGSI_OPCODE_TXP] + 1741 info->opcode_count[TGSI_OPCODE_TXB] + 1742 info->opcode_count[TGSI_OPCODE_TXB2] + 1743 info->opcode_count[TGSI_OPCODE_TXL]; 1744 } else if (config->lower_TXP) { 1745 n = info->opcode_count[TGSI_OPCODE_TXP]; 1746 } 1747 1748 newlen += SAMP_GROW * n; 1749 numtmp = MAX2(numtmp, SAMP_TMP); 1750 } 1751 1752 /* specifically don't include two_side_colors temps in the count: */ 1753 ctx.numtmp = numtmp; 1754 1755 if (ctx.two_side_colors) { 1756 newlen += TWOSIDE_GROW(ctx.two_side_colors); 1757 /* note: we permanently consume temp regs, re-writing references 1758 * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP 1759 * instruction that selects which varying to use): 1760 */ 1761 numtmp += ctx.two_side_colors; 1762 } 1763 1764 newlen += 2 * numtmp; 1765 newlen += 5; /* immediate */ 1766 1767 newtoks = tgsi_alloc_tokens(newlen); 1768 if (!newtoks) 1769 return NULL; 1770 1771 tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base); 1772 1773 tgsi_scan_shader(newtoks, info); 1774 1775 #if 0 /* debug */ 1776 _debug_printf("AFTER:"); 1777 tgsi_dump(newtoks, 0); 1778 #endif 1779 1780 return newtoks; 1781 } 1782