1 /* 2 * Copyright (C) 2014 Rob Clark <robclark (at) freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark (at) freedesktop.org> 25 */ 26 27 #include "tgsi/tgsi_transform.h" 28 #include "tgsi/tgsi_scan.h" 29 #include "tgsi/tgsi_dump.h" 30 31 #include "util/u_debug.h" 32 #include "util/u_math.h" 33 34 #include "tgsi_lowering.h" 35 36 struct tgsi_lowering_context { 37 struct tgsi_transform_context base; 38 const struct tgsi_lowering_config *config; 39 struct tgsi_shader_info *info; 40 unsigned two_side_colors; 41 unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS]; 42 unsigned color_base; /* base register for chosen COLOR/BCOLOR's */ 43 int face_idx; 44 unsigned numtmp; 45 struct { 46 struct tgsi_full_src_register src; 47 struct tgsi_full_dst_register dst; 48 } tmp[2]; 49 #define A 0 50 #define B 1 51 struct tgsi_full_src_register imm; 52 int emitted_decls; 53 unsigned saturate; 54 }; 55 56 static inline struct tgsi_lowering_context * 57 tgsi_lowering_context(struct tgsi_transform_context *tctx) 58 { 59 return (struct tgsi_lowering_context *)tctx; 60 } 61 62 /* 63 * Utility helpers: 64 */ 65 66 static void 67 reg_dst(struct tgsi_full_dst_register *dst, 68 const struct tgsi_full_dst_register *orig_dst, unsigned wrmask) 69 { 70 *dst = *orig_dst; 71 dst->Register.WriteMask &= wrmask; 72 assert(dst->Register.WriteMask); 73 } 74 75 static inline void 76 get_swiz(unsigned *swiz, const struct tgsi_src_register *src) 77 { 78 swiz[0] = src->SwizzleX; 79 swiz[1] = src->SwizzleY; 80 swiz[2] = src->SwizzleZ; 81 swiz[3] = src->SwizzleW; 82 } 83 84 static void 85 reg_src(struct tgsi_full_src_register *src, 86 const struct tgsi_full_src_register *orig_src, 87 unsigned sx, unsigned sy, unsigned sz, unsigned sw) 88 { 89 unsigned swiz[4]; 90 get_swiz(swiz, &orig_src->Register); 91 *src = *orig_src; 92 src->Register.SwizzleX = swiz[sx]; 93 src->Register.SwizzleY = swiz[sy]; 94 src->Register.SwizzleZ = swiz[sz]; 95 src->Register.SwizzleW = swiz[sw]; 96 } 97 98 #define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */ 99 #define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \ 100 TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w 101 102 /* 103 * if (dst.x aliases src.x) { 104 * MOV tmpA.x, src.x 105 * src = tmpA 106 * } 107 * COS dst.x, src.x 108 * SIN dst.y, src.x 109 * MOV dst.zw, imm{0.0, 1.0} 110 */ 111 static bool 112 aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask, 113 const struct tgsi_full_src_register *src, unsigned src_mask) 114 { 115 if ((dst->Register.File == src->Register.File) && 116 (dst->Register.Index == src->Register.Index)) { 117 unsigned i, actual_mask = 0; 118 unsigned swiz[4]; 119 get_swiz(swiz, &src->Register); 120 for (i = 0; i < 4; i++) 121 if (src_mask & (1 << i)) 122 actual_mask |= (1 << swiz[i]); 123 if (actual_mask & dst_mask) 124 return true; 125 } 126 return false; 127 } 128 129 static void 130 create_mov(struct tgsi_transform_context *tctx, 131 const struct tgsi_full_dst_register *dst, 132 const struct tgsi_full_src_register *src, 133 unsigned mask, unsigned saturate) 134 { 135 struct tgsi_full_instruction new_inst; 136 137 new_inst = tgsi_default_full_instruction(); 138 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 139 new_inst.Instruction.Saturate = saturate; 140 new_inst.Instruction.NumDstRegs = 1; 141 reg_dst(&new_inst.Dst[0], dst, mask); 142 new_inst.Instruction.NumSrcRegs = 1; 143 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); 144 tctx->emit_instruction(tctx, &new_inst); 145 } 146 147 /* to help calculate # of tgsi tokens for a lowering.. we assume 148 * the worst case, ie. removed instructions don't have ADDR[] or 149 * anything which increases the # of tokens per src/dst and the 150 * inserted instructions do. 151 * 152 * OINST() - old instruction 153 * 1 : instruction itself 154 * 1 : dst 155 * 1 * nargs : srcN 156 * 157 * NINST() - new instruction 158 * 1 : instruction itself 159 * 2 : dst 160 * 2 * nargs : srcN 161 */ 162 163 #define OINST(nargs) (1 + 1 + 1 * (nargs)) 164 #define NINST(nargs) (1 + 2 + 2 * (nargs)) 165 166 /* 167 * Lowering Translators: 168 */ 169 170 /* DST - Distance Vector 171 * dst.x = 1.0 172 * dst.y = src0.y \times src1.y 173 * dst.z = src0.z 174 * dst.w = src1.w 175 * 176 * ; note: could be more clever and use just a single temp 177 * ; if I was clever enough to re-write the swizzles. 178 * ; needs: 2 tmp, imm{1.0} 179 * if (dst.y aliases src0.z) { 180 * MOV tmpA.yz, src0.yz 181 * src0 = tmpA 182 * } 183 * if (dst.yz aliases src1.w) { 184 * MOV tmpB.yw, src1.yw 185 * src1 = tmpB 186 * } 187 * MUL dst.y, src0.y, src1.y 188 * MOV dst.z, src0.z 189 * MOV dst.w, src1.w 190 * MOV dst.x, imm{1.0} 191 */ 192 #define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \ 193 NINST(1) + NINST(1) - OINST(2)) 194 #define DST_TMP 2 195 static void 196 transform_dst(struct tgsi_transform_context *tctx, 197 struct tgsi_full_instruction *inst) 198 { 199 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 200 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 201 struct tgsi_full_src_register *src0 = &inst->Src[0]; 202 struct tgsi_full_src_register *src1 = &inst->Src[1]; 203 struct tgsi_full_instruction new_inst; 204 205 if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) { 206 create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0); 207 src0 = &ctx->tmp[A].src; 208 } 209 210 if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) { 211 create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0); 212 src1 = &ctx->tmp[B].src; 213 } 214 215 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { 216 /* MUL dst.y, src0.y, src1.y */ 217 new_inst = tgsi_default_full_instruction(); 218 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 219 new_inst.Instruction.NumDstRegs = 1; 220 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); 221 new_inst.Instruction.NumSrcRegs = 2; 222 reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _)); 223 reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _)); 224 tctx->emit_instruction(tctx, &new_inst); 225 } 226 227 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { 228 /* MOV dst.z, src0.z */ 229 new_inst = tgsi_default_full_instruction(); 230 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 231 new_inst.Instruction.NumDstRegs = 1; 232 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z); 233 new_inst.Instruction.NumSrcRegs = 1; 234 reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _)); 235 tctx->emit_instruction(tctx, &new_inst); 236 } 237 238 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { 239 /* MOV dst.w, src1.w */ 240 new_inst = tgsi_default_full_instruction(); 241 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 242 new_inst.Instruction.NumDstRegs = 1; 243 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); 244 new_inst.Instruction.NumSrcRegs = 1; 245 reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W)); 246 tctx->emit_instruction(tctx, &new_inst); 247 } 248 249 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) { 250 /* MOV dst.x, imm{1.0} */ 251 new_inst = tgsi_default_full_instruction(); 252 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 253 new_inst.Instruction.NumDstRegs = 1; 254 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X); 255 new_inst.Instruction.NumSrcRegs = 1; 256 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _)); 257 tctx->emit_instruction(tctx, &new_inst); 258 } 259 } 260 261 /* LRP - Linear Interpolate 262 * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x 263 * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y 264 * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z 265 * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w 266 * 267 * This becomes: src0 \times src1 + src2 - src0 \times src2, which 268 * can then become: src0 \times src1 - (src0 \times src2 - src2) 269 * 270 * ; needs: 1 tmp 271 * MAD tmpA, src0, src2, -src2 272 * MAD dst, src0, src1, -tmpA 273 */ 274 #define LRP_GROW (NINST(3) + NINST(3) - OINST(3)) 275 #define LRP_TMP 1 276 static void 277 transform_lrp(struct tgsi_transform_context *tctx, 278 struct tgsi_full_instruction *inst) 279 { 280 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 281 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 282 struct tgsi_full_src_register *src0 = &inst->Src[0]; 283 struct tgsi_full_src_register *src1 = &inst->Src[1]; 284 struct tgsi_full_src_register *src2 = &inst->Src[2]; 285 struct tgsi_full_instruction new_inst; 286 287 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 288 /* MAD tmpA, src0, src2, -src2 */ 289 new_inst = tgsi_default_full_instruction(); 290 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 291 new_inst.Instruction.NumDstRegs = 1; 292 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 293 new_inst.Instruction.NumSrcRegs = 3; 294 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 295 reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W)); 296 reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W)); 297 new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate; 298 tctx->emit_instruction(tctx, &new_inst); 299 300 /* MAD dst, src0, src1, -tmpA */ 301 new_inst = tgsi_default_full_instruction(); 302 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 303 new_inst.Instruction.NumDstRegs = 1; 304 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 305 new_inst.Instruction.NumSrcRegs = 3; 306 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 307 reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W)); 308 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 309 new_inst.Src[2].Register.Negate = true; 310 tctx->emit_instruction(tctx, &new_inst); 311 } 312 } 313 314 /* FRC - Fraction 315 * dst.x = src.x - \lfloor src.x\rfloor 316 * dst.y = src.y - \lfloor src.y\rfloor 317 * dst.z = src.z - \lfloor src.z\rfloor 318 * dst.w = src.w - \lfloor src.w\rfloor 319 * 320 * ; needs: 1 tmp 321 * FLR tmpA, src 322 * SUB dst, src, tmpA 323 */ 324 #define FRC_GROW (NINST(1) + NINST(2) - OINST(1)) 325 #define FRC_TMP 1 326 static void 327 transform_frc(struct tgsi_transform_context *tctx, 328 struct tgsi_full_instruction *inst) 329 { 330 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 331 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 332 struct tgsi_full_src_register *src = &inst->Src[0]; 333 struct tgsi_full_instruction new_inst; 334 335 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 336 /* FLR tmpA, src */ 337 new_inst = tgsi_default_full_instruction(); 338 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 339 new_inst.Instruction.NumDstRegs = 1; 340 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 341 new_inst.Instruction.NumSrcRegs = 1; 342 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); 343 tctx->emit_instruction(tctx, &new_inst); 344 345 /* SUB dst, src, tmpA */ 346 new_inst = tgsi_default_full_instruction(); 347 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 348 new_inst.Instruction.NumDstRegs = 1; 349 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 350 new_inst.Instruction.NumSrcRegs = 2; 351 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); 352 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 353 new_inst.Src[1].Register.Negate = 1; 354 tctx->emit_instruction(tctx, &new_inst); 355 } 356 } 357 358 /* POW - Power 359 * dst.x = src0.x^{src1.x} 360 * dst.y = src0.x^{src1.x} 361 * dst.z = src0.x^{src1.x} 362 * dst.w = src0.x^{src1.x} 363 * 364 * ; needs: 1 tmp 365 * LG2 tmpA.x, src0.x 366 * MUL tmpA.x, src1.x, tmpA.x 367 * EX2 dst, tmpA.x 368 */ 369 #define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2)) 370 #define POW_TMP 1 371 static void 372 transform_pow(struct tgsi_transform_context *tctx, 373 struct tgsi_full_instruction *inst) 374 { 375 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 376 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 377 struct tgsi_full_src_register *src0 = &inst->Src[0]; 378 struct tgsi_full_src_register *src1 = &inst->Src[1]; 379 struct tgsi_full_instruction new_inst; 380 381 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 382 /* LG2 tmpA.x, src0.x */ 383 new_inst = tgsi_default_full_instruction(); 384 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2; 385 new_inst.Instruction.NumDstRegs = 1; 386 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 387 new_inst.Instruction.NumSrcRegs = 1; 388 reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _)); 389 tctx->emit_instruction(tctx, &new_inst); 390 391 /* MUL tmpA.x, src1.x, tmpA.x */ 392 new_inst = tgsi_default_full_instruction(); 393 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 394 new_inst.Instruction.NumDstRegs = 1; 395 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 396 new_inst.Instruction.NumSrcRegs = 2; 397 reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _)); 398 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 399 tctx->emit_instruction(tctx, &new_inst); 400 401 /* EX2 dst, tmpA.x */ 402 new_inst = tgsi_default_full_instruction(); 403 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 404 new_inst.Instruction.NumDstRegs = 1; 405 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 406 new_inst.Instruction.NumSrcRegs = 1; 407 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 408 tctx->emit_instruction(tctx, &new_inst); 409 } 410 } 411 412 /* LIT - Light Coefficients 413 * dst.x = 1.0 414 * dst.y = max(src.x, 0.0) 415 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0 416 * dst.w = 1.0 417 * 418 * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0} 419 * MAX tmpA.xy, src.xy, imm{0.0} 420 * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} 421 * LG2 tmpA.y, tmpA.y 422 * MUL tmpA.y, tmpA.z, tmpA.y 423 * EX2 tmpA.y, tmpA.y 424 * CMP tmpA.y, -src.x, tmpA.y, imm{0.0} 425 * MOV dst.yz, tmpA.xy 426 * MOV dst.xw, imm{1.0} 427 */ 428 #define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \ 429 NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1)) 430 #define LIT_TMP 1 431 static void 432 transform_lit(struct tgsi_transform_context *tctx, 433 struct tgsi_full_instruction *inst) 434 { 435 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 436 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 437 struct tgsi_full_src_register *src = &inst->Src[0]; 438 struct tgsi_full_instruction new_inst; 439 440 if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) { 441 /* MAX tmpA.xy, src.xy, imm{0.0} */ 442 new_inst = tgsi_default_full_instruction(); 443 new_inst.Instruction.Opcode = TGSI_OPCODE_MAX; 444 new_inst.Instruction.NumDstRegs = 1; 445 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY); 446 new_inst.Instruction.NumSrcRegs = 2; 447 reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _)); 448 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _)); 449 tctx->emit_instruction(tctx, &new_inst); 450 451 /* MIN tmpA.z, src.w, imm{128.0} */ 452 new_inst = tgsi_default_full_instruction(); 453 new_inst.Instruction.Opcode = TGSI_OPCODE_MIN; 454 new_inst.Instruction.NumDstRegs = 1; 455 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 456 new_inst.Instruction.NumSrcRegs = 2; 457 reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _)); 458 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _)); 459 tctx->emit_instruction(tctx, &new_inst); 460 461 /* MAX tmpA.z, tmpA.z, -imm{128.0} */ 462 new_inst = tgsi_default_full_instruction(); 463 new_inst.Instruction.Opcode = TGSI_OPCODE_MAX; 464 new_inst.Instruction.NumDstRegs = 1; 465 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 466 new_inst.Instruction.NumSrcRegs = 2; 467 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Z, _)); 468 reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _)); 469 new_inst.Src[1].Register.Negate = true; 470 tctx->emit_instruction(tctx, &new_inst); 471 472 /* LG2 tmpA.y, tmpA.y */ 473 new_inst = tgsi_default_full_instruction(); 474 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2; 475 new_inst.Instruction.NumDstRegs = 1; 476 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 477 new_inst.Instruction.NumSrcRegs = 1; 478 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _)); 479 tctx->emit_instruction(tctx, &new_inst); 480 481 /* MUL tmpA.y, tmpA.z, tmpA.y */ 482 new_inst = tgsi_default_full_instruction(); 483 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 484 new_inst.Instruction.NumDstRegs = 1; 485 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 486 new_inst.Instruction.NumSrcRegs = 2; 487 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _)); 488 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); 489 tctx->emit_instruction(tctx, &new_inst); 490 491 /* EX2 tmpA.y, tmpA.y */ 492 new_inst = tgsi_default_full_instruction(); 493 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 494 new_inst.Instruction.NumDstRegs = 1; 495 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 496 new_inst.Instruction.NumSrcRegs = 1; 497 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _)); 498 tctx->emit_instruction(tctx, &new_inst); 499 500 /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */ 501 new_inst = tgsi_default_full_instruction(); 502 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; 503 new_inst.Instruction.NumDstRegs = 1; 504 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 505 new_inst.Instruction.NumSrcRegs = 3; 506 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); 507 new_inst.Src[0].Register.Negate = true; 508 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); 509 reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _)); 510 tctx->emit_instruction(tctx, &new_inst); 511 512 /* MOV dst.yz, tmpA.xy */ 513 new_inst = tgsi_default_full_instruction(); 514 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 515 new_inst.Instruction.NumDstRegs = 1; 516 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ); 517 new_inst.Instruction.NumSrcRegs = 1; 518 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _)); 519 tctx->emit_instruction(tctx, &new_inst); 520 } 521 522 if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) { 523 /* MOV dst.xw, imm{1.0} */ 524 new_inst = tgsi_default_full_instruction(); 525 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 526 new_inst.Instruction.NumDstRegs = 1; 527 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW); 528 new_inst.Instruction.NumSrcRegs = 1; 529 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y)); 530 tctx->emit_instruction(tctx, &new_inst); 531 } 532 } 533 534 /* EXP - Approximate Exponential Base 2 535 * dst.x = 2^{\lfloor src.x\rfloor} 536 * dst.y = src.x - \lfloor src.x\rfloor 537 * dst.z = 2^{src.x} 538 * dst.w = 1.0 539 * 540 * ; needs: 1 tmp, imm{1.0} 541 * if (lowering FLR) { 542 * FRC tmpA.x, src.x 543 * SUB tmpA.x, src.x, tmpA.x 544 * } else { 545 * FLR tmpA.x, src.x 546 * } 547 * EX2 tmpA.y, src.x 548 * SUB dst.y, src.x, tmpA.x 549 * EX2 dst.x, tmpA.x 550 * MOV dst.z, tmpA.y 551 * MOV dst.w, imm{1.0} 552 */ 553 #define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \ 554 NINST(1)+ NINST(1) - OINST(1)) 555 #define EXP_TMP 1 556 static void 557 transform_exp(struct tgsi_transform_context *tctx, 558 struct tgsi_full_instruction *inst) 559 { 560 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 561 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 562 struct tgsi_full_src_register *src = &inst->Src[0]; 563 struct tgsi_full_instruction new_inst; 564 565 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { 566 if (ctx->config->lower_FLR) { 567 /* FRC tmpA.x, src.x */ 568 new_inst = tgsi_default_full_instruction(); 569 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 570 new_inst.Instruction.NumDstRegs = 1; 571 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 572 new_inst.Instruction.NumSrcRegs = 1; 573 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 574 tctx->emit_instruction(tctx, &new_inst); 575 576 /* SUB tmpA.x, src.x, tmpA.x */ 577 new_inst = tgsi_default_full_instruction(); 578 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 579 new_inst.Instruction.NumDstRegs = 1; 580 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 581 new_inst.Instruction.NumSrcRegs = 2; 582 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 583 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 584 new_inst.Src[1].Register.Negate = 1; 585 tctx->emit_instruction(tctx, &new_inst); 586 } else { 587 /* FLR tmpA.x, src.x */ 588 new_inst = tgsi_default_full_instruction(); 589 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 590 new_inst.Instruction.NumDstRegs = 1; 591 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 592 new_inst.Instruction.NumSrcRegs = 1; 593 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 594 tctx->emit_instruction(tctx, &new_inst); 595 } 596 } 597 598 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { 599 /* EX2 tmpA.y, src.x */ 600 new_inst = tgsi_default_full_instruction(); 601 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 602 new_inst.Instruction.NumDstRegs = 1; 603 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 604 new_inst.Instruction.NumSrcRegs = 1; 605 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 606 tctx->emit_instruction(tctx, &new_inst); 607 } 608 609 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { 610 /* SUB dst.y, src.x, tmpA.x */ 611 new_inst = tgsi_default_full_instruction(); 612 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 613 new_inst.Instruction.NumDstRegs = 1; 614 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); 615 new_inst.Instruction.NumSrcRegs = 2; 616 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); 617 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 618 new_inst.Src[1].Register.Negate = 1; 619 tctx->emit_instruction(tctx, &new_inst); 620 } 621 622 if (dst->Register.WriteMask & TGSI_WRITEMASK_X) { 623 /* EX2 dst.x, tmpA.x */ 624 new_inst = tgsi_default_full_instruction(); 625 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 626 new_inst.Instruction.NumDstRegs = 1; 627 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X); 628 new_inst.Instruction.NumSrcRegs = 1; 629 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 630 tctx->emit_instruction(tctx, &new_inst); 631 } 632 633 if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { 634 /* MOV dst.z, tmpA.y */ 635 new_inst = tgsi_default_full_instruction(); 636 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 637 new_inst.Instruction.NumDstRegs = 1; 638 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z); 639 new_inst.Instruction.NumSrcRegs = 1; 640 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _)); 641 tctx->emit_instruction(tctx, &new_inst); 642 } 643 644 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { 645 /* MOV dst.w, imm{1.0} */ 646 new_inst = tgsi_default_full_instruction(); 647 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 648 new_inst.Instruction.NumDstRegs = 1; 649 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); 650 new_inst.Instruction.NumSrcRegs = 1; 651 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y)); 652 tctx->emit_instruction(tctx, &new_inst); 653 } 654 } 655 656 /* LOG - Approximate Logarithm Base 2 657 * dst.x = \lfloor\log_2{|src.x|}\rfloor 658 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}} 659 * dst.z = \log_2{|src.x|} 660 * dst.w = 1.0 661 * 662 * ; needs: 1 tmp, imm{1.0} 663 * LG2 tmpA.x, |src.x| 664 * if (lowering FLR) { 665 * FRC tmpA.y, tmpA.x 666 * SUB tmpA.y, tmpA.x, tmpA.y 667 * } else { 668 * FLR tmpA.y, tmpA.x 669 * } 670 * EX2 tmpA.z, tmpA.y 671 * RCP tmpA.z, tmpA.z 672 * MUL dst.y, |src.x|, tmpA.z 673 * MOV dst.xz, tmpA.yx 674 * MOV dst.w, imm{1.0} 675 */ 676 #define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \ 677 NINST(2) + NINST(1) + NINST(1) - OINST(1)) 678 #define LOG_TMP 1 679 static void 680 transform_log(struct tgsi_transform_context *tctx, 681 struct tgsi_full_instruction *inst) 682 { 683 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 684 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 685 struct tgsi_full_src_register *src = &inst->Src[0]; 686 struct tgsi_full_instruction new_inst; 687 688 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) { 689 /* LG2 tmpA.x, |src.x| */ 690 new_inst = tgsi_default_full_instruction(); 691 new_inst.Instruction.Opcode = TGSI_OPCODE_LG2; 692 new_inst.Instruction.NumDstRegs = 1; 693 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 694 new_inst.Instruction.NumSrcRegs = 1; 695 reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 696 new_inst.Src[0].Register.Absolute = true; 697 tctx->emit_instruction(tctx, &new_inst); 698 } 699 700 if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { 701 if (ctx->config->lower_FLR) { 702 /* FRC tmpA.y, tmpA.x */ 703 new_inst = tgsi_default_full_instruction(); 704 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 705 new_inst.Instruction.NumDstRegs = 1; 706 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 707 new_inst.Instruction.NumSrcRegs = 1; 708 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 709 tctx->emit_instruction(tctx, &new_inst); 710 711 /* SUB tmpA.y, tmpA.x, tmpA.y */ 712 new_inst = tgsi_default_full_instruction(); 713 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 714 new_inst.Instruction.NumDstRegs = 1; 715 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 716 new_inst.Instruction.NumSrcRegs = 2; 717 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 718 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); 719 new_inst.Src[1].Register.Negate = 1; 720 tctx->emit_instruction(tctx, &new_inst); 721 } else { 722 /* FLR tmpA.y, tmpA.x */ 723 new_inst = tgsi_default_full_instruction(); 724 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 725 new_inst.Instruction.NumDstRegs = 1; 726 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 727 new_inst.Instruction.NumSrcRegs = 1; 728 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 729 tctx->emit_instruction(tctx, &new_inst); 730 } 731 } 732 733 if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { 734 /* EX2 tmpA.z, tmpA.y */ 735 new_inst = tgsi_default_full_instruction(); 736 new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 737 new_inst.Instruction.NumDstRegs = 1; 738 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 739 new_inst.Instruction.NumSrcRegs = 1; 740 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _)); 741 tctx->emit_instruction(tctx, &new_inst); 742 743 /* RCP tmpA.z, tmpA.z */ 744 new_inst = tgsi_default_full_instruction(); 745 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP; 746 new_inst.Instruction.NumDstRegs = 1; 747 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 748 new_inst.Instruction.NumSrcRegs = 1; 749 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _)); 750 tctx->emit_instruction(tctx, &new_inst); 751 752 /* MUL dst.y, |src.x|, tmpA.z */ 753 new_inst = tgsi_default_full_instruction(); 754 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 755 new_inst.Instruction.NumDstRegs = 1; 756 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); 757 new_inst.Instruction.NumSrcRegs = 2; 758 reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); 759 new_inst.Src[0].Register.Absolute = true; 760 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _)); 761 tctx->emit_instruction(tctx, &new_inst); 762 } 763 764 if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) { 765 /* MOV dst.xz, tmpA.yx */ 766 new_inst = tgsi_default_full_instruction(); 767 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 768 new_inst.Instruction.NumDstRegs = 1; 769 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ); 770 new_inst.Instruction.NumSrcRegs = 1; 771 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _)); 772 tctx->emit_instruction(tctx, &new_inst); 773 } 774 775 if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { 776 /* MOV dst.w, imm{1.0} */ 777 new_inst = tgsi_default_full_instruction(); 778 new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 779 new_inst.Instruction.NumDstRegs = 1; 780 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); 781 new_inst.Instruction.NumSrcRegs = 1; 782 reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y)); 783 tctx->emit_instruction(tctx, &new_inst); 784 } 785 } 786 787 /* DP4 - 4-component Dot Product 788 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w 789 * 790 * DP3 - 3-component Dot Product 791 * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z 792 * 793 * DP2 - 2-component Dot Product 794 * dst = src0.x \times src1.x + src0.y \times src1.y 795 * 796 * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar 797 * operations, which is what you'd prefer for a ISA that is natively 798 * scalar. Probably a native vector ISA would at least already have 799 * DP4/DP3 instructions, but perhaps there is room for an alternative 800 * translation for DP2 using vector instructions. 801 * 802 * ; needs: 1 tmp 803 * MUL tmpA.x, src0.x, src1.x 804 * MAD tmpA.x, src0.y, src1.y, tmpA.x 805 * if (DP3 || DP4) { 806 * MAD tmpA.x, src0.z, src1.z, tmpA.x 807 * if (DP4) { 808 * MAD tmpA.x, src0.w, src1.w, tmpA.x 809 * } 810 * } 811 * ; fixup last instruction to replicate into dst 812 */ 813 #define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2)) 814 #define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2)) 815 #define DP2_GROW (NINST(2) + NINST(3) - OINST(2)) 816 #define DOTP_TMP 1 817 static void 818 transform_dotp(struct tgsi_transform_context *tctx, 819 struct tgsi_full_instruction *inst) 820 { 821 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 822 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 823 struct tgsi_full_src_register *src0 = &inst->Src[0]; 824 struct tgsi_full_src_register *src1 = &inst->Src[1]; 825 struct tgsi_full_instruction new_inst; 826 unsigned opcode = inst->Instruction.Opcode; 827 828 /* NOTE: any potential last instruction must replicate src on all 829 * components (since it could be re-written to write to final dst) 830 */ 831 832 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 833 /* MUL tmpA.x, src0.x, src1.x */ 834 new_inst = tgsi_default_full_instruction(); 835 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 836 new_inst.Instruction.NumDstRegs = 1; 837 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 838 new_inst.Instruction.NumSrcRegs = 2; 839 reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _)); 840 reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _)); 841 tctx->emit_instruction(tctx, &new_inst); 842 843 /* MAD tmpA.x, src0.y, src1.y, tmpA.x */ 844 new_inst = tgsi_default_full_instruction(); 845 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 846 new_inst.Instruction.NumDstRegs = 1; 847 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 848 new_inst.Instruction.NumSrcRegs = 3; 849 reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y)); 850 reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y)); 851 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); 852 853 if ((opcode == TGSI_OPCODE_DP3) || 854 (opcode == TGSI_OPCODE_DP4)) { 855 tctx->emit_instruction(tctx, &new_inst); 856 857 /* MAD tmpA.x, src0.z, src1.z, tmpA.x */ 858 new_inst = tgsi_default_full_instruction(); 859 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 860 new_inst.Instruction.NumDstRegs = 1; 861 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 862 new_inst.Instruction.NumSrcRegs = 3; 863 reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z)); 864 reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z)); 865 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); 866 867 if (opcode == TGSI_OPCODE_DP4) { 868 tctx->emit_instruction(tctx, &new_inst); 869 870 /* MAD tmpA.x, src0.w, src1.w, tmpA.x */ 871 new_inst = tgsi_default_full_instruction(); 872 new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 873 new_inst.Instruction.NumDstRegs = 1; 874 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 875 new_inst.Instruction.NumSrcRegs = 3; 876 reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W)); 877 reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W)); 878 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); 879 } 880 } 881 882 /* fixup last instruction to write to dst: */ 883 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 884 885 tctx->emit_instruction(tctx, &new_inst); 886 } 887 } 888 889 /* FLR - floor, CEIL - ceil 890 * ; needs: 1 tmp 891 * if (CEIL) { 892 * FRC tmpA, -src 893 * ADD dst, src, tmpA 894 * } else { 895 * FRC tmpA, src 896 * SUB dst, src, tmpA 897 * } 898 */ 899 #define FLR_GROW (NINST(1) + NINST(2) - OINST(1)) 900 #define CEIL_GROW (NINST(1) + NINST(2) - OINST(1)) 901 #define FLR_TMP 1 902 #define CEIL_TMP 1 903 static void 904 transform_flr_ceil(struct tgsi_transform_context *tctx, 905 struct tgsi_full_instruction *inst) 906 { 907 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 908 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 909 struct tgsi_full_src_register *src0 = &inst->Src[0]; 910 struct tgsi_full_instruction new_inst; 911 unsigned opcode = inst->Instruction.Opcode; 912 913 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 914 /* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */ 915 new_inst = tgsi_default_full_instruction(); 916 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 917 new_inst.Instruction.NumDstRegs = 1; 918 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 919 new_inst.Instruction.NumSrcRegs = 1; 920 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 921 922 if (opcode == TGSI_OPCODE_CEIL) 923 new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate; 924 tctx->emit_instruction(tctx, &new_inst); 925 926 /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */ 927 new_inst = tgsi_default_full_instruction(); 928 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 929 new_inst.Instruction.NumDstRegs = 1; 930 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 931 new_inst.Instruction.NumSrcRegs = 2; 932 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 933 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 934 if (opcode == TGSI_OPCODE_FLR) 935 new_inst.Src[1].Register.Negate = 1; 936 tctx->emit_instruction(tctx, &new_inst); 937 } 938 } 939 940 /* TRUNC - truncate off fractional part 941 * dst.x = trunc(src.x) 942 * dst.y = trunc(src.y) 943 * dst.z = trunc(src.z) 944 * dst.w = trunc(src.w) 945 * 946 * ; needs: 1 tmp 947 * if (lower FLR) { 948 * FRC tmpA, |src| 949 * SUB tmpA, |src|, tmpA 950 * } else { 951 * FLR tmpA, |src| 952 * } 953 * CMP dst, src, -tmpA, tmpA 954 */ 955 #define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1)) 956 #define TRUNC_TMP 1 957 static void 958 transform_trunc(struct tgsi_transform_context *tctx, 959 struct tgsi_full_instruction *inst) 960 { 961 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 962 struct tgsi_full_dst_register *dst = &inst->Dst[0]; 963 struct tgsi_full_src_register *src0 = &inst->Src[0]; 964 struct tgsi_full_instruction new_inst; 965 966 if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 967 if (ctx->config->lower_FLR) { 968 new_inst = tgsi_default_full_instruction(); 969 new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 970 new_inst.Instruction.NumDstRegs = 1; 971 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 972 new_inst.Instruction.NumSrcRegs = 1; 973 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 974 new_inst.Src[0].Register.Absolute = true; 975 new_inst.Src[0].Register.Negate = false; 976 tctx->emit_instruction(tctx, &new_inst); 977 978 new_inst = tgsi_default_full_instruction(); 979 new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 980 new_inst.Instruction.NumDstRegs = 1; 981 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 982 new_inst.Instruction.NumSrcRegs = 2; 983 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 984 new_inst.Src[0].Register.Absolute = true; 985 new_inst.Src[0].Register.Negate = false; 986 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 987 new_inst.Src[1].Register.Negate = 1; 988 tctx->emit_instruction(tctx, &new_inst); 989 } else { 990 new_inst = tgsi_default_full_instruction(); 991 new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 992 new_inst.Instruction.NumDstRegs = 1; 993 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 994 new_inst.Instruction.NumSrcRegs = 1; 995 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 996 new_inst.Src[0].Register.Absolute = true; 997 new_inst.Src[0].Register.Negate = false; 998 tctx->emit_instruction(tctx, &new_inst); 999 } 1000 1001 new_inst = tgsi_default_full_instruction(); 1002 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; 1003 new_inst.Instruction.NumDstRegs = 1; 1004 reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 1005 new_inst.Instruction.NumSrcRegs = 3; 1006 reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 1007 reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 1008 new_inst.Src[1].Register.Negate = true; 1009 reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 1010 tctx->emit_instruction(tctx, &new_inst); 1011 } 1012 } 1013 1014 /* Inserts a MOV_SAT for the needed components of tex coord. Note that 1015 * in the case of TXP, the clamping must happen *after* projection, so 1016 * we need to lower TXP to TEX. 1017 * 1018 * MOV tmpA, src0 1019 * if (opc == TXP) { 1020 * ; do perspective division manually before clamping: 1021 * RCP tmpB, tmpA.w 1022 * MUL tmpB.<pmask>, tmpA, tmpB.xxxx 1023 * opc = TEX; 1024 * } 1025 * MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords 1026 * <opc> dst, tmpA, ... 1027 */ 1028 #define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1)) 1029 #define SAMP_TMP 2 1030 static int 1031 transform_samp(struct tgsi_transform_context *tctx, 1032 struct tgsi_full_instruction *inst) 1033 { 1034 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1035 struct tgsi_full_src_register *coord = &inst->Src[0]; 1036 struct tgsi_full_src_register *samp; 1037 struct tgsi_full_instruction new_inst; 1038 /* mask is clamped coords, pmask is all coords (for projection): */ 1039 unsigned mask = 0, pmask = 0, smask; 1040 unsigned tex = inst->Texture.Texture; 1041 unsigned opcode = inst->Instruction.Opcode; 1042 bool lower_txp = (opcode == TGSI_OPCODE_TXP) && 1043 (ctx->config->lower_TXP & (1 << tex)); 1044 1045 if (opcode == TGSI_OPCODE_TXB2) { 1046 samp = &inst->Src[2]; 1047 } else { 1048 samp = &inst->Src[1]; 1049 } 1050 1051 /* convert sampler # to bitmask to test: */ 1052 smask = 1 << samp->Register.Index; 1053 1054 /* check if we actually need to lower this one: */ 1055 if (!(ctx->saturate & smask) && !lower_txp) 1056 return -1; 1057 1058 /* figure out which coordinates need saturating: 1059 * - RECT textures should not get saturated 1060 * - array index coords should not get saturated 1061 */ 1062 switch (tex) { 1063 case TGSI_TEXTURE_3D: 1064 case TGSI_TEXTURE_CUBE: 1065 case TGSI_TEXTURE_CUBE_ARRAY: 1066 case TGSI_TEXTURE_SHADOWCUBE: 1067 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 1068 if (ctx->config->saturate_r & smask) 1069 mask |= TGSI_WRITEMASK_Z; 1070 pmask |= TGSI_WRITEMASK_Z; 1071 /* fallthrough */ 1072 1073 case TGSI_TEXTURE_2D: 1074 case TGSI_TEXTURE_2D_ARRAY: 1075 case TGSI_TEXTURE_SHADOW2D: 1076 case TGSI_TEXTURE_SHADOW2D_ARRAY: 1077 case TGSI_TEXTURE_2D_MSAA: 1078 case TGSI_TEXTURE_2D_ARRAY_MSAA: 1079 if (ctx->config->saturate_t & smask) 1080 mask |= TGSI_WRITEMASK_Y; 1081 pmask |= TGSI_WRITEMASK_Y; 1082 /* fallthrough */ 1083 1084 case TGSI_TEXTURE_1D: 1085 case TGSI_TEXTURE_1D_ARRAY: 1086 case TGSI_TEXTURE_SHADOW1D: 1087 case TGSI_TEXTURE_SHADOW1D_ARRAY: 1088 if (ctx->config->saturate_s & smask) 1089 mask |= TGSI_WRITEMASK_X; 1090 pmask |= TGSI_WRITEMASK_X; 1091 break; 1092 1093 case TGSI_TEXTURE_RECT: 1094 case TGSI_TEXTURE_SHADOWRECT: 1095 /* we don't saturate, but in case of lower_txp we 1096 * still need to do the perspective divide: 1097 */ 1098 pmask = TGSI_WRITEMASK_XY; 1099 break; 1100 } 1101 1102 /* sanity check.. driver could be asking to saturate a non- 1103 * existent coordinate component: 1104 */ 1105 if (!mask && !lower_txp) 1106 return -1; 1107 1108 /* MOV tmpA, src0 */ 1109 create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0); 1110 1111 /* This is a bit sad.. we need to clamp *after* the coords 1112 * are projected, which means lowering TXP to TEX and doing 1113 * the projection ourself. But since I haven't figured out 1114 * how to make the lowering code deliver an electric shock 1115 * to anyone using GL_CLAMP, we must do this instead: 1116 */ 1117 if (opcode == TGSI_OPCODE_TXP) { 1118 /* RCP tmpB.x tmpA.w */ 1119 new_inst = tgsi_default_full_instruction(); 1120 new_inst.Instruction.Opcode = TGSI_OPCODE_RCP; 1121 new_inst.Instruction.NumDstRegs = 1; 1122 reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X); 1123 new_inst.Instruction.NumSrcRegs = 1; 1124 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _)); 1125 tctx->emit_instruction(tctx, &new_inst); 1126 1127 /* MUL tmpA.mask, tmpA, tmpB.xxxx */ 1128 new_inst = tgsi_default_full_instruction(); 1129 new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 1130 new_inst.Instruction.NumDstRegs = 1; 1131 reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask); 1132 new_inst.Instruction.NumSrcRegs = 2; 1133 reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 1134 reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X)); 1135 tctx->emit_instruction(tctx, &new_inst); 1136 1137 opcode = TGSI_OPCODE_TEX; 1138 } 1139 1140 /* MOV_SAT tmpA.<mask>, tmpA */ 1141 if (mask) { 1142 create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1); 1143 } 1144 1145 /* modify the texture samp instruction to take fixed up coord: */ 1146 new_inst = *inst; 1147 new_inst.Instruction.Opcode = opcode; 1148 new_inst.Src[0] = ctx->tmp[A].src; 1149 tctx->emit_instruction(tctx, &new_inst); 1150 1151 return 0; 1152 } 1153 1154 /* Two-sided color emulation: 1155 * For each COLOR input, create a corresponding BCOLOR input, plus 1156 * CMP instruction to select front or back color based on FACE 1157 */ 1158 #define TWOSIDE_GROW(n) ( \ 1159 2 + /* FACE */ \ 1160 ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\ 1161 ((n) * 1) + /* TEMP[] */ \ 1162 ((n) * NINST(3)) /* CMP instr */ \ 1163 ) 1164 1165 static void 1166 emit_twoside(struct tgsi_transform_context *tctx) 1167 { 1168 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1169 struct tgsi_shader_info *info = ctx->info; 1170 struct tgsi_full_declaration decl; 1171 struct tgsi_full_instruction new_inst; 1172 unsigned inbase, tmpbase; 1173 int i; 1174 1175 inbase = info->file_max[TGSI_FILE_INPUT] + 1; 1176 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; 1177 1178 /* additional inputs for BCOLOR's */ 1179 for (i = 0; i < ctx->two_side_colors; i++) { 1180 unsigned in_idx = ctx->two_side_idx[i]; 1181 decl = tgsi_default_full_declaration(); 1182 decl.Declaration.File = TGSI_FILE_INPUT; 1183 decl.Declaration.Semantic = true; 1184 decl.Range.First = decl.Range.Last = inbase + i; 1185 decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR; 1186 decl.Semantic.Index = info->input_semantic_index[in_idx]; 1187 decl.Declaration.Interpolate = true; 1188 decl.Interp.Interpolate = info->input_interpolate[in_idx]; 1189 decl.Interp.Location = info->input_interpolate_loc[in_idx]; 1190 decl.Interp.CylindricalWrap = info->input_cylindrical_wrap[in_idx]; 1191 tctx->emit_declaration(tctx, &decl); 1192 } 1193 1194 /* additional input for FACE */ 1195 if (ctx->two_side_colors && (ctx->face_idx == -1)) { 1196 decl = tgsi_default_full_declaration(); 1197 decl.Declaration.File = TGSI_FILE_INPUT; 1198 decl.Declaration.Semantic = true; 1199 decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors; 1200 decl.Semantic.Name = TGSI_SEMANTIC_FACE; 1201 decl.Semantic.Index = 0; 1202 tctx->emit_declaration(tctx, &decl); 1203 1204 ctx->face_idx = decl.Range.First; 1205 } 1206 1207 /* additional temps for COLOR/BCOLOR selection: */ 1208 for (i = 0; i < ctx->two_side_colors; i++) { 1209 decl = tgsi_default_full_declaration(); 1210 decl.Declaration.File = TGSI_FILE_TEMPORARY; 1211 decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i; 1212 tctx->emit_declaration(tctx, &decl); 1213 } 1214 1215 /* and finally additional instructions to select COLOR/BCOLOR: */ 1216 for (i = 0; i < ctx->two_side_colors; i++) { 1217 new_inst = tgsi_default_full_instruction(); 1218 new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; 1219 1220 new_inst.Instruction.NumDstRegs = 1; 1221 new_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 1222 new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i; 1223 new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; 1224 1225 new_inst.Instruction.NumSrcRegs = 3; 1226 new_inst.Src[0].Register.File = TGSI_FILE_INPUT; 1227 new_inst.Src[0].Register.Index = ctx->face_idx; 1228 new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; 1229 new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; 1230 new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; 1231 new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X; 1232 new_inst.Src[1].Register.File = TGSI_FILE_INPUT; 1233 new_inst.Src[1].Register.Index = inbase + i; 1234 new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X; 1235 new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y; 1236 new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z; 1237 new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W; 1238 new_inst.Src[2].Register.File = TGSI_FILE_INPUT; 1239 new_inst.Src[2].Register.Index = ctx->two_side_idx[i]; 1240 new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X; 1241 new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y; 1242 new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z; 1243 new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W; 1244 1245 tctx->emit_instruction(tctx, &new_inst); 1246 } 1247 } 1248 1249 static void 1250 emit_decls(struct tgsi_transform_context *tctx) 1251 { 1252 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1253 struct tgsi_shader_info *info = ctx->info; 1254 struct tgsi_full_declaration decl; 1255 struct tgsi_full_immediate immed; 1256 unsigned tmpbase; 1257 int i; 1258 1259 tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; 1260 1261 ctx->color_base = tmpbase + ctx->numtmp; 1262 1263 /* declare immediate: */ 1264 immed = tgsi_default_full_immediate(); 1265 immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */ 1266 immed.u[0].Float = 0.0; 1267 immed.u[1].Float = 1.0; 1268 immed.u[2].Float = 128.0; 1269 immed.u[3].Float = 0.0; 1270 tctx->emit_immediate(tctx, &immed); 1271 1272 ctx->imm.Register.File = TGSI_FILE_IMMEDIATE; 1273 ctx->imm.Register.Index = info->immediate_count; 1274 ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X; 1275 ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y; 1276 ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z; 1277 ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W; 1278 1279 /* declare temp regs: */ 1280 for (i = 0; i < ctx->numtmp; i++) { 1281 decl = tgsi_default_full_declaration(); 1282 decl.Declaration.File = TGSI_FILE_TEMPORARY; 1283 decl.Range.First = decl.Range.Last = tmpbase + i; 1284 tctx->emit_declaration(tctx, &decl); 1285 1286 ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY; 1287 ctx->tmp[i].src.Register.Index = tmpbase + i; 1288 ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X; 1289 ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y; 1290 ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z; 1291 ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W; 1292 1293 ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY; 1294 ctx->tmp[i].dst.Register.Index = tmpbase + i; 1295 ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW; 1296 } 1297 1298 if (ctx->two_side_colors) 1299 emit_twoside(tctx); 1300 } 1301 1302 static void 1303 rename_color_inputs(struct tgsi_lowering_context *ctx, 1304 struct tgsi_full_instruction *inst) 1305 { 1306 unsigned i, j; 1307 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1308 struct tgsi_src_register *src = &inst->Src[i].Register; 1309 if (src->File == TGSI_FILE_INPUT) { 1310 for (j = 0; j < ctx->two_side_colors; j++) { 1311 if (src->Index == ctx->two_side_idx[j]) { 1312 src->File = TGSI_FILE_TEMPORARY; 1313 src->Index = ctx->color_base + j; 1314 break; 1315 } 1316 } 1317 } 1318 } 1319 1320 } 1321 1322 static void 1323 transform_instr(struct tgsi_transform_context *tctx, 1324 struct tgsi_full_instruction *inst) 1325 { 1326 struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1327 1328 if (!ctx->emitted_decls) { 1329 emit_decls(tctx); 1330 ctx->emitted_decls = 1; 1331 } 1332 1333 /* if emulating two-sided-color, we need to re-write some 1334 * src registers: 1335 */ 1336 if (ctx->two_side_colors) 1337 rename_color_inputs(ctx, inst); 1338 1339 switch (inst->Instruction.Opcode) { 1340 case TGSI_OPCODE_DST: 1341 if (!ctx->config->lower_DST) 1342 goto skip; 1343 transform_dst(tctx, inst); 1344 break; 1345 case TGSI_OPCODE_LRP: 1346 if (!ctx->config->lower_LRP) 1347 goto skip; 1348 transform_lrp(tctx, inst); 1349 break; 1350 case TGSI_OPCODE_FRC: 1351 if (!ctx->config->lower_FRC) 1352 goto skip; 1353 transform_frc(tctx, inst); 1354 break; 1355 case TGSI_OPCODE_POW: 1356 if (!ctx->config->lower_POW) 1357 goto skip; 1358 transform_pow(tctx, inst); 1359 break; 1360 case TGSI_OPCODE_LIT: 1361 if (!ctx->config->lower_LIT) 1362 goto skip; 1363 transform_lit(tctx, inst); 1364 break; 1365 case TGSI_OPCODE_EXP: 1366 if (!ctx->config->lower_EXP) 1367 goto skip; 1368 transform_exp(tctx, inst); 1369 break; 1370 case TGSI_OPCODE_LOG: 1371 if (!ctx->config->lower_LOG) 1372 goto skip; 1373 transform_log(tctx, inst); 1374 break; 1375 case TGSI_OPCODE_DP4: 1376 if (!ctx->config->lower_DP4) 1377 goto skip; 1378 transform_dotp(tctx, inst); 1379 break; 1380 case TGSI_OPCODE_DP3: 1381 if (!ctx->config->lower_DP3) 1382 goto skip; 1383 transform_dotp(tctx, inst); 1384 break; 1385 case TGSI_OPCODE_DP2: 1386 if (!ctx->config->lower_DP2) 1387 goto skip; 1388 transform_dotp(tctx, inst); 1389 break; 1390 case TGSI_OPCODE_FLR: 1391 if (!ctx->config->lower_FLR) 1392 goto skip; 1393 transform_flr_ceil(tctx, inst); 1394 break; 1395 case TGSI_OPCODE_CEIL: 1396 if (!ctx->config->lower_CEIL) 1397 goto skip; 1398 transform_flr_ceil(tctx, inst); 1399 break; 1400 case TGSI_OPCODE_TRUNC: 1401 if (!ctx->config->lower_TRUNC) 1402 goto skip; 1403 transform_trunc(tctx, inst); 1404 break; 1405 case TGSI_OPCODE_TEX: 1406 case TGSI_OPCODE_TXP: 1407 case TGSI_OPCODE_TXB: 1408 case TGSI_OPCODE_TXB2: 1409 case TGSI_OPCODE_TXL: 1410 if (transform_samp(tctx, inst)) 1411 goto skip; 1412 break; 1413 default: 1414 skip: 1415 tctx->emit_instruction(tctx, inst); 1416 break; 1417 } 1418 } 1419 1420 /* returns NULL if no lowering required, else returns the new 1421 * tokens (which caller is required to free()). In either case 1422 * returns the current info. 1423 */ 1424 const struct tgsi_token * 1425 tgsi_transform_lowering(const struct tgsi_lowering_config *config, 1426 const struct tgsi_token *tokens, 1427 struct tgsi_shader_info *info) 1428 { 1429 struct tgsi_lowering_context ctx; 1430 struct tgsi_token *newtoks; 1431 int newlen, numtmp; 1432 1433 /* sanity check in case limit is ever increased: */ 1434 STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS); 1435 1436 /* sanity check the lowering */ 1437 assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL))); 1438 assert(!(config->lower_FRC && config->lower_TRUNC)); 1439 1440 memset(&ctx, 0, sizeof(ctx)); 1441 ctx.base.transform_instruction = transform_instr; 1442 ctx.info = info; 1443 ctx.config = config; 1444 1445 tgsi_scan_shader(tokens, info); 1446 1447 /* if we are adding fragment shader support to emulate two-sided 1448 * color, then figure out the number of additional inputs we need 1449 * to create for BCOLOR's.. 1450 */ 1451 if ((info->processor == PIPE_SHADER_FRAGMENT) && 1452 config->color_two_side) { 1453 int i; 1454 ctx.face_idx = -1; 1455 for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) { 1456 if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR) 1457 ctx.two_side_idx[ctx.two_side_colors++] = i; 1458 if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE) 1459 ctx.face_idx = i; 1460 } 1461 } 1462 1463 ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t; 1464 1465 #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0) 1466 /* if there are no instructions to lower, then we are done: */ 1467 if (!(OPCS(DST) || 1468 OPCS(LRP) || 1469 OPCS(FRC) || 1470 OPCS(POW) || 1471 OPCS(LIT) || 1472 OPCS(EXP) || 1473 OPCS(LOG) || 1474 OPCS(DP4) || 1475 OPCS(DP3) || 1476 OPCS(DP2) || 1477 OPCS(FLR) || 1478 OPCS(CEIL) || 1479 OPCS(TRUNC) || 1480 OPCS(TXP) || 1481 ctx.two_side_colors || 1482 ctx.saturate)) 1483 return NULL; 1484 1485 #if 0 /* debug */ 1486 _debug_printf("BEFORE:"); 1487 tgsi_dump(tokens, 0); 1488 #endif 1489 1490 numtmp = 0; 1491 newlen = tgsi_num_tokens(tokens); 1492 if (OPCS(DST)) { 1493 newlen += DST_GROW * OPCS(DST); 1494 numtmp = MAX2(numtmp, DST_TMP); 1495 } 1496 if (OPCS(LRP)) { 1497 newlen += LRP_GROW * OPCS(LRP); 1498 numtmp = MAX2(numtmp, LRP_TMP); 1499 } 1500 if (OPCS(FRC)) { 1501 newlen += FRC_GROW * OPCS(FRC); 1502 numtmp = MAX2(numtmp, FRC_TMP); 1503 } 1504 if (OPCS(POW)) { 1505 newlen += POW_GROW * OPCS(POW); 1506 numtmp = MAX2(numtmp, POW_TMP); 1507 } 1508 if (OPCS(LIT)) { 1509 newlen += LIT_GROW * OPCS(LIT); 1510 numtmp = MAX2(numtmp, LIT_TMP); 1511 } 1512 if (OPCS(EXP)) { 1513 newlen += EXP_GROW * OPCS(EXP); 1514 numtmp = MAX2(numtmp, EXP_TMP); 1515 } 1516 if (OPCS(LOG)) { 1517 newlen += LOG_GROW * OPCS(LOG); 1518 numtmp = MAX2(numtmp, LOG_TMP); 1519 } 1520 if (OPCS(DP4)) { 1521 newlen += DP4_GROW * OPCS(DP4); 1522 numtmp = MAX2(numtmp, DOTP_TMP); 1523 } 1524 if (OPCS(DP3)) { 1525 newlen += DP3_GROW * OPCS(DP3); 1526 numtmp = MAX2(numtmp, DOTP_TMP); 1527 } 1528 if (OPCS(DP2)) { 1529 newlen += DP2_GROW * OPCS(DP2); 1530 numtmp = MAX2(numtmp, DOTP_TMP); 1531 } 1532 if (OPCS(FLR)) { 1533 newlen += FLR_GROW * OPCS(FLR); 1534 numtmp = MAX2(numtmp, FLR_TMP); 1535 } 1536 if (OPCS(CEIL)) { 1537 newlen += CEIL_GROW * OPCS(CEIL); 1538 numtmp = MAX2(numtmp, CEIL_TMP); 1539 } 1540 if (OPCS(TRUNC)) { 1541 newlen += TRUNC_GROW * OPCS(TRUNC); 1542 numtmp = MAX2(numtmp, TRUNC_TMP); 1543 } 1544 if (ctx.saturate || config->lower_TXP) { 1545 int n = 0; 1546 1547 if (ctx.saturate) { 1548 n = info->opcode_count[TGSI_OPCODE_TEX] + 1549 info->opcode_count[TGSI_OPCODE_TXP] + 1550 info->opcode_count[TGSI_OPCODE_TXB] + 1551 info->opcode_count[TGSI_OPCODE_TXB2] + 1552 info->opcode_count[TGSI_OPCODE_TXL]; 1553 } else if (config->lower_TXP) { 1554 n = info->opcode_count[TGSI_OPCODE_TXP]; 1555 } 1556 1557 newlen += SAMP_GROW * n; 1558 numtmp = MAX2(numtmp, SAMP_TMP); 1559 } 1560 1561 /* specifically don't include two_side_colors temps in the count: */ 1562 ctx.numtmp = numtmp; 1563 1564 if (ctx.two_side_colors) { 1565 newlen += TWOSIDE_GROW(ctx.two_side_colors); 1566 /* note: we permanently consume temp regs, re-writing references 1567 * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP 1568 * instruction that selects which varying to use): 1569 */ 1570 numtmp += ctx.two_side_colors; 1571 } 1572 1573 newlen += 2 * numtmp; 1574 newlen += 5; /* immediate */ 1575 1576 newtoks = tgsi_alloc_tokens(newlen); 1577 if (!newtoks) 1578 return NULL; 1579 1580 tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base); 1581 1582 tgsi_scan_shader(newtoks, info); 1583 1584 #if 0 /* debug */ 1585 _debug_printf("AFTER:"); 1586 tgsi_dump(newtoks, 0); 1587 #endif 1588 1589 return newtoks; 1590 } 1591