1 /* 2 * Copyright 2011 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 * SOFTWARE. 21 */ 22 23 extern "C" { 24 #include "tgsi/tgsi_dump.h" 25 #include "tgsi/tgsi_scan.h" 26 } 27 28 #include "nv50_ir.h" 29 #include "nv50_ir_util.h" 30 #include "nv50_ir_build_util.h" 31 32 namespace tgsi { 33 34 class Source; 35 36 static nv50_ir::operation translateOpcode(uint opcode); 37 static nv50_ir::DataFile translateFile(uint file); 38 static nv50_ir::TexTarget translateTexture(uint texTarg); 39 static nv50_ir::SVSemantic translateSysVal(uint sysval); 40 41 class Instruction 42 { 43 public: 44 Instruction(const struct tgsi_full_instruction *inst) : insn(inst) { } 45 46 class SrcRegister 47 { 48 public: 49 SrcRegister(const struct tgsi_full_src_register *src) 50 : reg(src->Register), 51 fsr(src) 52 { } 53 54 SrcRegister(const struct tgsi_src_register& src) : reg(src), fsr(NULL) { } 55 56 struct tgsi_src_register offsetToSrc(struct tgsi_texture_offset off) 57 { 58 struct tgsi_src_register reg; 59 memset(®, 0, sizeof(reg)); 60 reg.Index = off.Index; 61 reg.File = off.File; 62 reg.SwizzleX = off.SwizzleX; 63 reg.SwizzleY = off.SwizzleY; 64 reg.SwizzleZ = off.SwizzleZ; 65 return reg; 66 } 67 68 SrcRegister(const struct tgsi_texture_offset& off) : 69 reg(offsetToSrc(off)), 70 fsr(NULL) 71 { } 72 73 uint getFile() const { return reg.File; } 74 75 bool is2D() const { return reg.Dimension; } 76 77 bool isIndirect(int dim) const 78 { 79 return (dim && fsr) ? fsr->Dimension.Indirect : reg.Indirect; 80 } 81 82 int getIndex(int dim) const 83 { 84 return (dim && fsr) ? fsr->Dimension.Index : reg.Index; 85 } 86 87 int getSwizzle(int chan) const 88 { 89 return tgsi_util_get_src_register_swizzle(®, chan); 90 } 91 92 nv50_ir::Modifier getMod(int chan) const; 93 94 SrcRegister getIndirect(int dim) const 95 { 96 assert(fsr && isIndirect(dim)); 97 if (dim) 98 return SrcRegister(fsr->DimIndirect); 99 return SrcRegister(fsr->Indirect); 100 } 101 102 uint32_t getValueU32(int c, const struct nv50_ir_prog_info *info) const 103 { 104 assert(reg.File == TGSI_FILE_IMMEDIATE); 105 assert(!reg.Absolute); 106 assert(!reg.Negate); 107 return info->immd.data[reg.Index * 4 + getSwizzle(c)]; 108 } 109 110 private: 111 const struct tgsi_src_register reg; 112 const struct tgsi_full_src_register *fsr; 113 }; 114 115 class DstRegister 116 { 117 public: 118 DstRegister(const struct tgsi_full_dst_register *dst) 119 : reg(dst->Register), 120 fdr(dst) 121 { } 122 123 DstRegister(const struct tgsi_dst_register& dst) : reg(dst), fdr(NULL) { } 124 125 uint getFile() const { return reg.File; } 126 127 bool is2D() const { return reg.Dimension; } 128 129 bool isIndirect(int dim) const 130 { 131 return (dim && fdr) ? fdr->Dimension.Indirect : reg.Indirect; 132 } 133 134 int getIndex(int dim) const 135 { 136 return (dim && fdr) ? fdr->Dimension.Dimension : reg.Index; 137 } 138 139 unsigned int getMask() const { return reg.WriteMask; } 140 141 bool isMasked(int chan) const { return !(getMask() & (1 << chan)); } 142 143 SrcRegister getIndirect(int dim) const 144 { 145 assert(fdr && isIndirect(dim)); 146 if (dim) 147 return SrcRegister(fdr->DimIndirect); 148 return SrcRegister(fdr->Indirect); 149 } 150 151 private: 152 const struct tgsi_dst_register reg; 153 const struct tgsi_full_dst_register *fdr; 154 }; 155 156 inline uint getOpcode() const { return insn->Instruction.Opcode; } 157 158 unsigned int srcCount() const { return insn->Instruction.NumSrcRegs; } 159 unsigned int dstCount() const { return insn->Instruction.NumDstRegs; } 160 161 // mask of used components of source s 162 unsigned int srcMask(unsigned int s) const; 163 164 SrcRegister getSrc(unsigned int s) const 165 { 166 assert(s < srcCount()); 167 return SrcRegister(&insn->Src[s]); 168 } 169 170 DstRegister getDst(unsigned int d) const 171 { 172 assert(d < dstCount()); 173 return DstRegister(&insn->Dst[d]); 174 } 175 176 SrcRegister getTexOffset(unsigned int i) const 177 { 178 assert(i < TGSI_FULL_MAX_TEX_OFFSETS); 179 return SrcRegister(insn->TexOffsets[i]); 180 } 181 182 unsigned int getNumTexOffsets() const { return insn->Texture.NumOffsets; } 183 184 bool checkDstSrcAliasing() const; 185 186 inline nv50_ir::operation getOP() const { 187 return translateOpcode(getOpcode()); } 188 189 nv50_ir::DataType inferSrcType() const; 190 nv50_ir::DataType inferDstType() const; 191 192 nv50_ir::CondCode getSetCond() const; 193 194 nv50_ir::TexInstruction::Target getTexture(const Source *, int s) const; 195 196 inline uint getLabel() { return insn->Label.Label; } 197 198 unsigned getSaturate() const { return insn->Instruction.Saturate; } 199 200 void print() const 201 { 202 tgsi_dump_instruction(insn, 1); 203 } 204 205 private: 206 const struct tgsi_full_instruction *insn; 207 }; 208 209 unsigned int Instruction::srcMask(unsigned int s) const 210 { 211 unsigned int mask = insn->Dst[0].Register.WriteMask; 212 213 switch (insn->Instruction.Opcode) { 214 case TGSI_OPCODE_COS: 215 case TGSI_OPCODE_SIN: 216 return (mask & 0x8) | ((mask & 0x7) ? 0x1 : 0x0); 217 case TGSI_OPCODE_DP2: 218 return 0x3; 219 case TGSI_OPCODE_DP3: 220 return 0x7; 221 case TGSI_OPCODE_DP4: 222 case TGSI_OPCODE_DPH: 223 case TGSI_OPCODE_KIL: /* WriteMask ignored */ 224 return 0xf; 225 case TGSI_OPCODE_DST: 226 return mask & (s ? 0xa : 0x6); 227 case TGSI_OPCODE_EX2: 228 case TGSI_OPCODE_EXP: 229 case TGSI_OPCODE_LG2: 230 case TGSI_OPCODE_LOG: 231 case TGSI_OPCODE_POW: 232 case TGSI_OPCODE_RCP: 233 case TGSI_OPCODE_RSQ: 234 case TGSI_OPCODE_SCS: 235 return 0x1; 236 case TGSI_OPCODE_IF: 237 return 0x1; 238 case TGSI_OPCODE_LIT: 239 return 0xb; 240 case TGSI_OPCODE_TEX: 241 case TGSI_OPCODE_TXB: 242 case TGSI_OPCODE_TXD: 243 case TGSI_OPCODE_TXL: 244 case TGSI_OPCODE_TXP: 245 { 246 const struct tgsi_instruction_texture *tex = &insn->Texture; 247 248 assert(insn->Instruction.Texture); 249 250 mask = 0x7; 251 if (insn->Instruction.Opcode != TGSI_OPCODE_TEX && 252 insn->Instruction.Opcode != TGSI_OPCODE_TXD) 253 mask |= 0x8; /* bias, lod or proj */ 254 255 switch (tex->Texture) { 256 case TGSI_TEXTURE_1D: 257 mask &= 0x9; 258 break; 259 case TGSI_TEXTURE_SHADOW1D: 260 mask &= 0xd; 261 break; 262 case TGSI_TEXTURE_1D_ARRAY: 263 case TGSI_TEXTURE_2D: 264 case TGSI_TEXTURE_RECT: 265 mask &= 0xb; 266 break; 267 default: 268 break; 269 } 270 } 271 return mask; 272 case TGSI_OPCODE_XPD: 273 { 274 unsigned int x = 0; 275 if (mask & 1) x |= 0x6; 276 if (mask & 2) x |= 0x5; 277 if (mask & 4) x |= 0x3; 278 return x; 279 } 280 default: 281 break; 282 } 283 284 return mask; 285 } 286 287 nv50_ir::Modifier Instruction::SrcRegister::getMod(int chan) const 288 { 289 nv50_ir::Modifier m(0); 290 291 if (reg.Absolute) 292 m = m | nv50_ir::Modifier(NV50_IR_MOD_ABS); 293 if (reg.Negate) 294 m = m | nv50_ir::Modifier(NV50_IR_MOD_NEG); 295 return m; 296 } 297 298 static nv50_ir::DataFile translateFile(uint file) 299 { 300 switch (file) { 301 case TGSI_FILE_CONSTANT: return nv50_ir::FILE_MEMORY_CONST; 302 case TGSI_FILE_INPUT: return nv50_ir::FILE_SHADER_INPUT; 303 case TGSI_FILE_OUTPUT: return nv50_ir::FILE_SHADER_OUTPUT; 304 case TGSI_FILE_TEMPORARY: return nv50_ir::FILE_GPR; 305 case TGSI_FILE_ADDRESS: return nv50_ir::FILE_ADDRESS; 306 case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE; 307 case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE; 308 case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE; 309 case TGSI_FILE_IMMEDIATE_ARRAY: return nv50_ir::FILE_IMMEDIATE; 310 case TGSI_FILE_TEMPORARY_ARRAY: return nv50_ir::FILE_MEMORY_LOCAL; 311 case TGSI_FILE_RESOURCE: return nv50_ir::FILE_MEMORY_GLOBAL; 312 case TGSI_FILE_SAMPLER: 313 case TGSI_FILE_NULL: 314 default: 315 return nv50_ir::FILE_NULL; 316 } 317 } 318 319 static nv50_ir::SVSemantic translateSysVal(uint sysval) 320 { 321 switch (sysval) { 322 case TGSI_SEMANTIC_FACE: return nv50_ir::SV_FACE; 323 case TGSI_SEMANTIC_PSIZE: return nv50_ir::SV_POINT_SIZE; 324 case TGSI_SEMANTIC_PRIMID: return nv50_ir::SV_PRIMITIVE_ID; 325 case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID; 326 case TGSI_SEMANTIC_VERTEXID: return nv50_ir::SV_VERTEX_ID; 327 default: 328 assert(0); 329 return nv50_ir::SV_CLOCK; 330 } 331 } 332 333 #define NV50_IR_TEX_TARG_CASE(a, b) \ 334 case TGSI_TEXTURE_##a: return nv50_ir::TEX_TARGET_##b; 335 336 static nv50_ir::TexTarget translateTexture(uint tex) 337 { 338 switch (tex) { 339 NV50_IR_TEX_TARG_CASE(1D, 1D); 340 NV50_IR_TEX_TARG_CASE(2D, 2D); 341 NV50_IR_TEX_TARG_CASE(3D, 3D); 342 NV50_IR_TEX_TARG_CASE(CUBE, CUBE); 343 NV50_IR_TEX_TARG_CASE(RECT, RECT); 344 NV50_IR_TEX_TARG_CASE(1D_ARRAY, 1D_ARRAY); 345 NV50_IR_TEX_TARG_CASE(2D_ARRAY, 2D_ARRAY); 346 NV50_IR_TEX_TARG_CASE(SHADOW1D, 1D_SHADOW); 347 NV50_IR_TEX_TARG_CASE(SHADOW2D, 2D_SHADOW); 348 NV50_IR_TEX_TARG_CASE(SHADOW1D_ARRAY, 1D_ARRAY_SHADOW); 349 NV50_IR_TEX_TARG_CASE(SHADOW2D_ARRAY, 2D_ARRAY_SHADOW); 350 NV50_IR_TEX_TARG_CASE(SHADOWCUBE, CUBE_SHADOW); 351 NV50_IR_TEX_TARG_CASE(SHADOWRECT, RECT_SHADOW); 352 NV50_IR_TEX_TARG_CASE(BUFFER, BUFFER); 353 354 case TGSI_TEXTURE_UNKNOWN: 355 default: 356 assert(!"invalid texture target"); 357 return nv50_ir::TEX_TARGET_2D; 358 } 359 } 360 361 nv50_ir::DataType Instruction::inferSrcType() const 362 { 363 switch (getOpcode()) { 364 case TGSI_OPCODE_AND: 365 case TGSI_OPCODE_OR: 366 case TGSI_OPCODE_XOR: 367 case TGSI_OPCODE_NOT: 368 case TGSI_OPCODE_U2F: 369 case TGSI_OPCODE_UADD: 370 case TGSI_OPCODE_UDIV: 371 case TGSI_OPCODE_UMOD: 372 case TGSI_OPCODE_UMAD: 373 case TGSI_OPCODE_UMUL: 374 case TGSI_OPCODE_UMAX: 375 case TGSI_OPCODE_UMIN: 376 case TGSI_OPCODE_USEQ: 377 case TGSI_OPCODE_USGE: 378 case TGSI_OPCODE_USLT: 379 case TGSI_OPCODE_USNE: 380 case TGSI_OPCODE_USHR: 381 case TGSI_OPCODE_UCMP: 382 return nv50_ir::TYPE_U32; 383 case TGSI_OPCODE_I2F: 384 case TGSI_OPCODE_IDIV: 385 case TGSI_OPCODE_IMAX: 386 case TGSI_OPCODE_IMIN: 387 case TGSI_OPCODE_IABS: 388 case TGSI_OPCODE_INEG: 389 case TGSI_OPCODE_ISGE: 390 case TGSI_OPCODE_ISHR: 391 case TGSI_OPCODE_ISLT: 392 case TGSI_OPCODE_ISSG: 393 case TGSI_OPCODE_SAD: // not sure about SAD, but no one has a float version 394 case TGSI_OPCODE_MOD: 395 case TGSI_OPCODE_UARL: 396 return nv50_ir::TYPE_S32; 397 default: 398 return nv50_ir::TYPE_F32; 399 } 400 } 401 402 nv50_ir::DataType Instruction::inferDstType() const 403 { 404 switch (getOpcode()) { 405 case TGSI_OPCODE_F2U: return nv50_ir::TYPE_U32; 406 case TGSI_OPCODE_F2I: return nv50_ir::TYPE_S32; 407 case TGSI_OPCODE_I2F: 408 case TGSI_OPCODE_U2F: 409 return nv50_ir::TYPE_F32; 410 default: 411 return inferSrcType(); 412 } 413 } 414 415 nv50_ir::CondCode Instruction::getSetCond() const 416 { 417 using namespace nv50_ir; 418 419 switch (getOpcode()) { 420 case TGSI_OPCODE_SLT: 421 case TGSI_OPCODE_ISLT: 422 case TGSI_OPCODE_USLT: 423 return CC_LT; 424 case TGSI_OPCODE_SLE: 425 return CC_LE; 426 case TGSI_OPCODE_SGE: 427 case TGSI_OPCODE_ISGE: 428 case TGSI_OPCODE_USGE: 429 return CC_GE; 430 case TGSI_OPCODE_SGT: 431 return CC_GT; 432 case TGSI_OPCODE_SEQ: 433 case TGSI_OPCODE_USEQ: 434 return CC_EQ; 435 case TGSI_OPCODE_SNE: 436 return CC_NEU; 437 case TGSI_OPCODE_USNE: 438 return CC_NE; 439 case TGSI_OPCODE_SFL: 440 return CC_NEVER; 441 case TGSI_OPCODE_STR: 442 default: 443 return CC_ALWAYS; 444 } 445 } 446 447 #define NV50_IR_OPCODE_CASE(a, b) case TGSI_OPCODE_##a: return nv50_ir::OP_##b 448 449 static nv50_ir::operation translateOpcode(uint opcode) 450 { 451 switch (opcode) { 452 NV50_IR_OPCODE_CASE(ARL, SHL); 453 NV50_IR_OPCODE_CASE(MOV, MOV); 454 455 NV50_IR_OPCODE_CASE(RCP, RCP); 456 NV50_IR_OPCODE_CASE(RSQ, RSQ); 457 458 NV50_IR_OPCODE_CASE(MUL, MUL); 459 NV50_IR_OPCODE_CASE(ADD, ADD); 460 461 NV50_IR_OPCODE_CASE(MIN, MIN); 462 NV50_IR_OPCODE_CASE(MAX, MAX); 463 NV50_IR_OPCODE_CASE(SLT, SET); 464 NV50_IR_OPCODE_CASE(SGE, SET); 465 NV50_IR_OPCODE_CASE(MAD, MAD); 466 NV50_IR_OPCODE_CASE(SUB, SUB); 467 468 NV50_IR_OPCODE_CASE(FLR, FLOOR); 469 NV50_IR_OPCODE_CASE(ROUND, CVT); 470 NV50_IR_OPCODE_CASE(EX2, EX2); 471 NV50_IR_OPCODE_CASE(LG2, LG2); 472 NV50_IR_OPCODE_CASE(POW, POW); 473 474 NV50_IR_OPCODE_CASE(ABS, ABS); 475 476 NV50_IR_OPCODE_CASE(COS, COS); 477 NV50_IR_OPCODE_CASE(DDX, DFDX); 478 NV50_IR_OPCODE_CASE(DDY, DFDY); 479 NV50_IR_OPCODE_CASE(KILP, DISCARD); 480 481 NV50_IR_OPCODE_CASE(SEQ, SET); 482 NV50_IR_OPCODE_CASE(SFL, SET); 483 NV50_IR_OPCODE_CASE(SGT, SET); 484 NV50_IR_OPCODE_CASE(SIN, SIN); 485 NV50_IR_OPCODE_CASE(SLE, SET); 486 NV50_IR_OPCODE_CASE(SNE, SET); 487 NV50_IR_OPCODE_CASE(STR, SET); 488 NV50_IR_OPCODE_CASE(TEX, TEX); 489 NV50_IR_OPCODE_CASE(TXD, TXD); 490 NV50_IR_OPCODE_CASE(TXP, TEX); 491 492 NV50_IR_OPCODE_CASE(BRA, BRA); 493 NV50_IR_OPCODE_CASE(CAL, CALL); 494 NV50_IR_OPCODE_CASE(RET, RET); 495 NV50_IR_OPCODE_CASE(CMP, SLCT); 496 497 NV50_IR_OPCODE_CASE(TXB, TXB); 498 499 NV50_IR_OPCODE_CASE(DIV, DIV); 500 501 NV50_IR_OPCODE_CASE(TXL, TXL); 502 503 NV50_IR_OPCODE_CASE(CEIL, CEIL); 504 NV50_IR_OPCODE_CASE(I2F, CVT); 505 NV50_IR_OPCODE_CASE(NOT, NOT); 506 NV50_IR_OPCODE_CASE(TRUNC, TRUNC); 507 NV50_IR_OPCODE_CASE(SHL, SHL); 508 509 NV50_IR_OPCODE_CASE(AND, AND); 510 NV50_IR_OPCODE_CASE(OR, OR); 511 NV50_IR_OPCODE_CASE(MOD, MOD); 512 NV50_IR_OPCODE_CASE(XOR, XOR); 513 NV50_IR_OPCODE_CASE(SAD, SAD); 514 NV50_IR_OPCODE_CASE(TXF, TXF); 515 NV50_IR_OPCODE_CASE(TXQ, TXQ); 516 517 NV50_IR_OPCODE_CASE(EMIT, EMIT); 518 NV50_IR_OPCODE_CASE(ENDPRIM, RESTART); 519 520 NV50_IR_OPCODE_CASE(KIL, DISCARD); 521 522 NV50_IR_OPCODE_CASE(F2I, CVT); 523 NV50_IR_OPCODE_CASE(IDIV, DIV); 524 NV50_IR_OPCODE_CASE(IMAX, MAX); 525 NV50_IR_OPCODE_CASE(IMIN, MIN); 526 NV50_IR_OPCODE_CASE(IABS, ABS); 527 NV50_IR_OPCODE_CASE(INEG, NEG); 528 NV50_IR_OPCODE_CASE(ISGE, SET); 529 NV50_IR_OPCODE_CASE(ISHR, SHR); 530 NV50_IR_OPCODE_CASE(ISLT, SET); 531 NV50_IR_OPCODE_CASE(F2U, CVT); 532 NV50_IR_OPCODE_CASE(U2F, CVT); 533 NV50_IR_OPCODE_CASE(UADD, ADD); 534 NV50_IR_OPCODE_CASE(UDIV, DIV); 535 NV50_IR_OPCODE_CASE(UMAD, MAD); 536 NV50_IR_OPCODE_CASE(UMAX, MAX); 537 NV50_IR_OPCODE_CASE(UMIN, MIN); 538 NV50_IR_OPCODE_CASE(UMOD, MOD); 539 NV50_IR_OPCODE_CASE(UMUL, MUL); 540 NV50_IR_OPCODE_CASE(USEQ, SET); 541 NV50_IR_OPCODE_CASE(USGE, SET); 542 NV50_IR_OPCODE_CASE(USHR, SHR); 543 NV50_IR_OPCODE_CASE(USLT, SET); 544 NV50_IR_OPCODE_CASE(USNE, SET); 545 546 NV50_IR_OPCODE_CASE(LOAD, TXF); 547 NV50_IR_OPCODE_CASE(SAMPLE, TEX); 548 NV50_IR_OPCODE_CASE(SAMPLE_B, TXB); 549 NV50_IR_OPCODE_CASE(SAMPLE_C, TEX); 550 NV50_IR_OPCODE_CASE(SAMPLE_C_LZ, TEX); 551 NV50_IR_OPCODE_CASE(SAMPLE_D, TXD); 552 NV50_IR_OPCODE_CASE(SAMPLE_L, TXL); 553 NV50_IR_OPCODE_CASE(GATHER4, TXG); 554 NV50_IR_OPCODE_CASE(SVIEWINFO, TXQ); 555 556 NV50_IR_OPCODE_CASE(END, EXIT); 557 558 default: 559 return nv50_ir::OP_NOP; 560 } 561 } 562 563 bool Instruction::checkDstSrcAliasing() const 564 { 565 if (insn->Dst[0].Register.Indirect) // no danger if indirect, using memory 566 return false; 567 568 for (int s = 0; s < TGSI_FULL_MAX_SRC_REGISTERS; ++s) { 569 if (insn->Src[s].Register.File == TGSI_FILE_NULL) 570 break; 571 if (insn->Src[s].Register.File == insn->Dst[0].Register.File && 572 insn->Src[s].Register.Index == insn->Dst[0].Register.Index) 573 return true; 574 } 575 return false; 576 } 577 578 class Source 579 { 580 public: 581 Source(struct nv50_ir_prog_info *); 582 ~Source(); 583 584 public: 585 bool scanSource(); 586 unsigned fileSize(unsigned file) const { return scan.file_max[file] + 1; } 587 588 public: 589 struct tgsi_shader_info scan; 590 struct tgsi_full_instruction *insns; 591 const struct tgsi_token *tokens; 592 struct nv50_ir_prog_info *info; 593 594 nv50_ir::DynArray tempArrays; 595 nv50_ir::DynArray immdArrays; 596 int tempArrayCount; 597 int immdArrayCount; 598 599 bool mainTempsInLMem; 600 601 int clipVertexOutput; 602 603 uint8_t *samplerViewTargets; // TGSI_TEXTURE_* 604 unsigned samplerViewCount; 605 606 private: 607 int inferSysValDirection(unsigned sn) const; 608 bool scanDeclaration(const struct tgsi_full_declaration *); 609 bool scanInstruction(const struct tgsi_full_instruction *); 610 void scanProperty(const struct tgsi_full_property *); 611 void scanImmediate(const struct tgsi_full_immediate *); 612 613 inline bool isEdgeFlagPassthrough(const Instruction&) const; 614 }; 615 616 Source::Source(struct nv50_ir_prog_info *prog) : info(prog) 617 { 618 tokens = (const struct tgsi_token *)info->bin.source; 619 620 if (prog->dbgFlags & NV50_IR_DEBUG_BASIC) 621 tgsi_dump(tokens, 0); 622 623 samplerViewTargets = NULL; 624 625 mainTempsInLMem = FALSE; 626 } 627 628 Source::~Source() 629 { 630 if (insns) 631 FREE(insns); 632 633 if (info->immd.data) 634 FREE(info->immd.data); 635 if (info->immd.type) 636 FREE(info->immd.type); 637 638 if (samplerViewTargets) 639 delete[] samplerViewTargets; 640 } 641 642 bool Source::scanSource() 643 { 644 unsigned insnCount = 0; 645 struct tgsi_parse_context parse; 646 647 tgsi_scan_shader(tokens, &scan); 648 649 insns = (struct tgsi_full_instruction *)MALLOC(scan.num_instructions * 650 sizeof(insns[0])); 651 if (!insns) 652 return false; 653 654 clipVertexOutput = -1; 655 656 samplerViewCount = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; 657 samplerViewTargets = new uint8_t[samplerViewCount]; 658 659 info->immd.bufSize = 0; 660 tempArrayCount = 0; 661 immdArrayCount = 0; 662 663 info->numInputs = scan.file_max[TGSI_FILE_INPUT] + 1; 664 info->numOutputs = scan.file_max[TGSI_FILE_OUTPUT] + 1; 665 info->numSysVals = scan.file_max[TGSI_FILE_SYSTEM_VALUE] + 1; 666 667 if (info->type == PIPE_SHADER_FRAGMENT) { 668 info->prop.fp.writesDepth = scan.writes_z; 669 info->prop.fp.usesDiscard = scan.uses_kill; 670 } else 671 if (info->type == PIPE_SHADER_GEOMETRY) { 672 info->prop.gp.instanceCount = 1; // default value 673 } 674 675 info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16); 676 info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte)); 677 678 tgsi_parse_init(&parse, tokens); 679 while (!tgsi_parse_end_of_tokens(&parse)) { 680 tgsi_parse_token(&parse); 681 682 switch (parse.FullToken.Token.Type) { 683 case TGSI_TOKEN_TYPE_IMMEDIATE: 684 scanImmediate(&parse.FullToken.FullImmediate); 685 break; 686 case TGSI_TOKEN_TYPE_DECLARATION: 687 scanDeclaration(&parse.FullToken.FullDeclaration); 688 break; 689 case TGSI_TOKEN_TYPE_INSTRUCTION: 690 insns[insnCount++] = parse.FullToken.FullInstruction; 691 scanInstruction(&parse.FullToken.FullInstruction); 692 break; 693 case TGSI_TOKEN_TYPE_PROPERTY: 694 scanProperty(&parse.FullToken.FullProperty); 695 break; 696 default: 697 INFO("unknown TGSI token type: %d\n", parse.FullToken.Token.Type); 698 break; 699 } 700 } 701 tgsi_parse_free(&parse); 702 703 if (mainTempsInLMem) 704 info->bin.tlsSpace += (scan.file_max[TGSI_FILE_TEMPORARY] + 1) * 16; 705 706 if (info->io.genUserClip > 0) { 707 info->io.clipDistanceMask = (1 << info->io.genUserClip) - 1; 708 709 for (unsigned int n = 0; n < ((info->io.genUserClip + 3) / 4); ++n) { 710 unsigned int i = info->numOutputs++; 711 info->out[i].id = i; 712 info->out[i].sn = TGSI_SEMANTIC_CLIPDIST; 713 info->out[i].si = n; 714 info->out[i].mask = info->io.clipDistanceMask >> (n * 4); 715 } 716 } 717 718 return info->assignSlots(info) == 0; 719 } 720 721 void Source::scanProperty(const struct tgsi_full_property *prop) 722 { 723 switch (prop->Property.PropertyName) { 724 case TGSI_PROPERTY_GS_OUTPUT_PRIM: 725 info->prop.gp.outputPrim = prop->u[0].Data; 726 break; 727 case TGSI_PROPERTY_GS_INPUT_PRIM: 728 info->prop.gp.inputPrim = prop->u[0].Data; 729 break; 730 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: 731 info->prop.gp.maxVertices = prop->u[0].Data; 732 break; 733 #if 0 734 case TGSI_PROPERTY_GS_INSTANCE_COUNT: 735 info->prop.gp.instanceCount = prop->u[0].Data; 736 break; 737 #endif 738 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: 739 info->prop.fp.separateFragData = TRUE; 740 break; 741 case TGSI_PROPERTY_FS_COORD_ORIGIN: 742 case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER: 743 // we don't care 744 break; 745 case TGSI_PROPERTY_VS_PROHIBIT_UCPS: 746 info->io.genUserClip = -1; 747 break; 748 default: 749 INFO("unhandled TGSI property %d\n", prop->Property.PropertyName); 750 break; 751 } 752 } 753 754 void Source::scanImmediate(const struct tgsi_full_immediate *imm) 755 { 756 const unsigned n = info->immd.count++; 757 758 assert(n < scan.immediate_count); 759 760 for (int c = 0; c < 4; ++c) 761 info->immd.data[n * 4 + c] = imm->u[c].Uint; 762 763 info->immd.type[n] = imm->Immediate.DataType; 764 } 765 766 int Source::inferSysValDirection(unsigned sn) const 767 { 768 switch (sn) { 769 case TGSI_SEMANTIC_INSTANCEID: 770 case TGSI_SEMANTIC_VERTEXID: 771 return 1; 772 #if 0 773 case TGSI_SEMANTIC_LAYER: 774 case TGSI_SEMANTIC_VIEWPORTINDEX: 775 return 0; 776 #endif 777 case TGSI_SEMANTIC_PRIMID: 778 return (info->type == PIPE_SHADER_FRAGMENT) ? 1 : 0; 779 default: 780 return 0; 781 } 782 } 783 784 bool Source::scanDeclaration(const struct tgsi_full_declaration *decl) 785 { 786 unsigned i; 787 unsigned sn = TGSI_SEMANTIC_GENERIC; 788 unsigned si = 0; 789 const unsigned first = decl->Range.First, last = decl->Range.Last; 790 791 if (decl->Declaration.Semantic) { 792 sn = decl->Semantic.Name; 793 si = decl->Semantic.Index; 794 } 795 796 switch (decl->Declaration.File) { 797 case TGSI_FILE_INPUT: 798 if (info->type == PIPE_SHADER_VERTEX) { 799 // all vertex attributes are equal 800 for (i = first; i <= last; ++i) { 801 info->in[i].sn = TGSI_SEMANTIC_GENERIC; 802 info->in[i].si = i; 803 } 804 } else { 805 for (i = first; i <= last; ++i, ++si) { 806 info->in[i].id = i; 807 info->in[i].sn = sn; 808 info->in[i].si = si; 809 if (info->type == PIPE_SHADER_FRAGMENT) { 810 // translate interpolation mode 811 switch (decl->Interp.Interpolate) { 812 case TGSI_INTERPOLATE_CONSTANT: 813 info->in[i].flat = 1; 814 break; 815 case TGSI_INTERPOLATE_COLOR: 816 info->in[i].sc = 1; 817 break; 818 case TGSI_INTERPOLATE_LINEAR: 819 info->in[i].linear = 1; 820 break; 821 default: 822 break; 823 } 824 if (decl->Interp.Centroid) 825 info->in[i].centroid = 1; 826 } 827 } 828 } 829 break; 830 case TGSI_FILE_OUTPUT: 831 for (i = first; i <= last; ++i, ++si) { 832 switch (sn) { 833 case TGSI_SEMANTIC_POSITION: 834 if (info->type == PIPE_SHADER_FRAGMENT) 835 info->io.fragDepth = i; 836 else 837 if (clipVertexOutput < 0) 838 clipVertexOutput = i; 839 break; 840 case TGSI_SEMANTIC_COLOR: 841 if (info->type == PIPE_SHADER_FRAGMENT) 842 info->prop.fp.numColourResults++; 843 break; 844 case TGSI_SEMANTIC_EDGEFLAG: 845 info->io.edgeFlagOut = i; 846 break; 847 case TGSI_SEMANTIC_CLIPVERTEX: 848 clipVertexOutput = i; 849 break; 850 case TGSI_SEMANTIC_CLIPDIST: 851 info->io.clipDistanceMask |= 852 decl->Declaration.UsageMask << (si * 4); 853 info->io.genUserClip = -1; 854 break; 855 default: 856 break; 857 } 858 info->out[i].id = i; 859 info->out[i].sn = sn; 860 info->out[i].si = si; 861 } 862 break; 863 case TGSI_FILE_SYSTEM_VALUE: 864 switch (sn) { 865 case TGSI_SEMANTIC_INSTANCEID: 866 info->io.instanceId = first; 867 break; 868 case TGSI_SEMANTIC_VERTEXID: 869 info->io.vertexId = first; 870 break; 871 default: 872 break; 873 } 874 for (i = first; i <= last; ++i, ++si) { 875 info->sv[i].sn = sn; 876 info->sv[i].si = si; 877 info->sv[i].input = inferSysValDirection(sn); 878 } 879 break; 880 case TGSI_FILE_SAMPLER_VIEW: 881 for (i = first; i <= last; ++i) 882 samplerViewTargets[i] = decl->SamplerView.Resource; 883 break; 884 case TGSI_FILE_IMMEDIATE_ARRAY: 885 { 886 if (decl->Dim.Index2D >= immdArrayCount) 887 immdArrayCount = decl->Dim.Index2D + 1; 888 immdArrays[decl->Dim.Index2D].u32 = (last + 1) << 2; 889 int c; 890 uint32_t base, count; 891 switch (decl->Declaration.UsageMask) { 892 case 0x1: c = 1; break; 893 case 0x3: c = 2; break; 894 default: 895 c = 4; 896 break; 897 } 898 immdArrays[decl->Dim.Index2D].u32 |= c; 899 count = (last + 1) * c; 900 base = info->immd.bufSize / 4; 901 info->immd.bufSize = (info->immd.bufSize + count * 4 + 0xf) & ~0xf; 902 info->immd.buf = (uint32_t *)REALLOC(info->immd.buf, base * 4, 903 info->immd.bufSize); 904 // NOTE: this assumes array declarations are ordered by Dim.Index2D 905 for (i = 0; i < count; ++i) 906 info->immd.buf[base + i] = decl->ImmediateData.u[i].Uint; 907 } 908 break; 909 case TGSI_FILE_TEMPORARY_ARRAY: 910 { 911 if (decl->Dim.Index2D >= tempArrayCount) 912 tempArrayCount = decl->Dim.Index2D + 1; 913 tempArrays[decl->Dim.Index2D].u32 = (last + 1) << 2; 914 int c; 915 uint32_t count; 916 switch (decl->Declaration.UsageMask) { 917 case 0x1: c = 1; break; 918 case 0x3: c = 2; break; 919 default: 920 c = 4; 921 break; 922 } 923 tempArrays[decl->Dim.Index2D].u32 |= c; 924 count = (last + 1) * c; 925 info->bin.tlsSpace += (info->bin.tlsSpace + count * 4 + 0xf) & ~0xf; 926 } 927 break; 928 case TGSI_FILE_NULL: 929 case TGSI_FILE_TEMPORARY: 930 case TGSI_FILE_ADDRESS: 931 case TGSI_FILE_CONSTANT: 932 case TGSI_FILE_IMMEDIATE: 933 case TGSI_FILE_PREDICATE: 934 case TGSI_FILE_SAMPLER: 935 break; 936 default: 937 ERROR("unhandled TGSI_FILE %d\n", decl->Declaration.File); 938 return false; 939 } 940 return true; 941 } 942 943 inline bool Source::isEdgeFlagPassthrough(const Instruction& insn) const 944 { 945 return insn.getOpcode() == TGSI_OPCODE_MOV && 946 insn.getDst(0).getIndex(0) == info->io.edgeFlagOut && 947 insn.getSrc(0).getFile() == TGSI_FILE_INPUT; 948 } 949 950 bool Source::scanInstruction(const struct tgsi_full_instruction *inst) 951 { 952 Instruction insn(inst); 953 954 if (insn.dstCount()) { 955 if (insn.getDst(0).getFile() == TGSI_FILE_OUTPUT) { 956 Instruction::DstRegister dst = insn.getDst(0); 957 958 if (dst.isIndirect(0)) 959 for (unsigned i = 0; i < info->numOutputs; ++i) 960 info->out[i].mask = 0xf; 961 else 962 info->out[dst.getIndex(0)].mask |= dst.getMask(); 963 964 if (info->out[dst.getIndex(0)].sn == TGSI_SEMANTIC_PSIZE) 965 info->out[dst.getIndex(0)].mask &= 1; 966 967 if (isEdgeFlagPassthrough(insn)) 968 info->io.edgeFlagIn = insn.getSrc(0).getIndex(0); 969 } else 970 if (insn.getDst(0).getFile() == TGSI_FILE_TEMPORARY) { 971 if (insn.getDst(0).isIndirect(0)) 972 mainTempsInLMem = TRUE; 973 } 974 } 975 976 for (unsigned s = 0; s < insn.srcCount(); ++s) { 977 Instruction::SrcRegister src = insn.getSrc(s); 978 if (src.getFile() == TGSI_FILE_TEMPORARY) 979 if (src.isIndirect(0)) 980 mainTempsInLMem = TRUE; 981 if (src.getFile() != TGSI_FILE_INPUT) 982 continue; 983 unsigned mask = insn.srcMask(s); 984 985 if (src.isIndirect(0)) { 986 for (unsigned i = 0; i < info->numInputs; ++i) 987 info->in[i].mask = 0xf; 988 } else { 989 for (unsigned c = 0; c < 4; ++c) { 990 if (!(mask & (1 << c))) 991 continue; 992 int k = src.getSwizzle(c); 993 int i = src.getIndex(0); 994 if (info->in[i].sn != TGSI_SEMANTIC_FOG || k == TGSI_SWIZZLE_X) 995 if (k <= TGSI_SWIZZLE_W) 996 info->in[i].mask |= 1 << k; 997 } 998 } 999 } 1000 return true; 1001 } 1002 1003 nv50_ir::TexInstruction::Target 1004 Instruction::getTexture(const tgsi::Source *code, int s) const 1005 { 1006 switch (getSrc(s).getFile()) { 1007 case TGSI_FILE_SAMPLER_VIEW: { 1008 // XXX: indirect access 1009 unsigned int r = getSrc(s).getIndex(0); 1010 assert(r < code->samplerViewCount); 1011 return translateTexture(code->samplerViewTargets[r]); 1012 } 1013 default: 1014 return translateTexture(insn->Texture.Texture); 1015 } 1016 } 1017 1018 } // namespace tgsi 1019 1020 namespace { 1021 1022 using namespace nv50_ir; 1023 1024 class Converter : public BuildUtil 1025 { 1026 public: 1027 Converter(Program *, const tgsi::Source *); 1028 ~Converter(); 1029 1030 bool run(); 1031 1032 private: 1033 struct Subroutine 1034 { 1035 Subroutine(Function *f) : f(f) { } 1036 Function *f; 1037 ValueMap values; 1038 }; 1039 1040 Value *getVertexBase(int s); 1041 DataArray *getArrayForFile(unsigned file, int idx); 1042 Value *fetchSrc(int s, int c); 1043 Value *acquireDst(int d, int c); 1044 void storeDst(int d, int c, Value *); 1045 1046 Value *fetchSrc(const tgsi::Instruction::SrcRegister src, int c, Value *ptr); 1047 void storeDst(const tgsi::Instruction::DstRegister dst, int c, 1048 Value *val, Value *ptr); 1049 1050 Value *applySrcMod(Value *, int s, int c); 1051 1052 Symbol *makeSym(uint file, int fileIndex, int idx, int c, uint32_t addr); 1053 Symbol *srcToSym(tgsi::Instruction::SrcRegister, int c); 1054 Symbol *dstToSym(tgsi::Instruction::DstRegister, int c); 1055 1056 bool handleInstruction(const struct tgsi_full_instruction *); 1057 void exportOutputs(); 1058 inline Subroutine *getSubroutine(unsigned ip); 1059 inline Subroutine *getSubroutine(Function *); 1060 inline bool isEndOfSubroutine(uint ip); 1061 1062 void loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask); 1063 1064 // R,S,L,C,Dx,Dy encode TGSI sources for respective values (0xSf for auto) 1065 void setTexRS(TexInstruction *, unsigned int& s, int R, int S); 1066 void handleTEX(Value *dst0[4], int R, int S, int L, int C, int Dx, int Dy); 1067 void handleTXF(Value *dst0[4], int R); 1068 void handleTXQ(Value *dst0[4], enum TexQuery); 1069 void handleLIT(Value *dst0[4]); 1070 void handleUserClipPlanes(); 1071 1072 Value *interpolate(tgsi::Instruction::SrcRegister, int c, Value *ptr); 1073 1074 void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork); 1075 1076 Value *buildDot(int dim); 1077 1078 class BindArgumentsPass : public Pass { 1079 public: 1080 BindArgumentsPass(Converter &conv) : conv(conv) { } 1081 1082 private: 1083 Converter &conv; 1084 Subroutine *sub; 1085 1086 template<typename T> inline void 1087 updateCallArgs(Instruction *i, void (Instruction::*setArg)(int, Value *), 1088 T (Function::*proto)); 1089 1090 template<typename T> inline void 1091 updatePrototype(BitSet *set, void (Function::*updateSet)(), 1092 T (Function::*proto)); 1093 1094 protected: 1095 bool visit(Function *); 1096 bool visit(BasicBlock *bb) { return false; } 1097 }; 1098 1099 private: 1100 const struct tgsi::Source *code; 1101 const struct nv50_ir_prog_info *info; 1102 1103 struct { 1104 std::map<unsigned, Subroutine> map; 1105 Subroutine *cur; 1106 } sub; 1107 1108 uint ip; // instruction pointer 1109 1110 tgsi::Instruction tgsi; 1111 1112 DataType dstTy; 1113 DataType srcTy; 1114 1115 DataArray tData; // TGSI_FILE_TEMPORARY 1116 DataArray aData; // TGSI_FILE_ADDRESS 1117 DataArray pData; // TGSI_FILE_PREDICATE 1118 DataArray oData; // TGSI_FILE_OUTPUT (if outputs in registers) 1119 std::vector<DataArray> lData; // TGSI_FILE_TEMPORARY_ARRAY 1120 std::vector<DataArray> iData; // TGSI_FILE_IMMEDIATE_ARRAY 1121 1122 Value *zero; 1123 Value *fragCoord[4]; 1124 Value *clipVtx[4]; 1125 1126 Value *vtxBase[5]; // base address of vertex in primitive (for TP/GP) 1127 uint8_t vtxBaseValid; 1128 1129 Stack condBBs; // fork BB, then else clause BB 1130 Stack joinBBs; // fork BB, for inserting join ops on ENDIF 1131 Stack loopBBs; // loop headers 1132 Stack breakBBs; // end of / after loop 1133 }; 1134 1135 Symbol * 1136 Converter::srcToSym(tgsi::Instruction::SrcRegister src, int c) 1137 { 1138 const int swz = src.getSwizzle(c); 1139 1140 return makeSym(src.getFile(), 1141 src.is2D() ? src.getIndex(1) : 0, 1142 src.isIndirect(0) ? -1 : src.getIndex(0), swz, 1143 src.getIndex(0) * 16 + swz * 4); 1144 } 1145 1146 Symbol * 1147 Converter::dstToSym(tgsi::Instruction::DstRegister dst, int c) 1148 { 1149 return makeSym(dst.getFile(), 1150 dst.is2D() ? dst.getIndex(1) : 0, 1151 dst.isIndirect(0) ? -1 : dst.getIndex(0), c, 1152 dst.getIndex(0) * 16 + c * 4); 1153 } 1154 1155 Symbol * 1156 Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address) 1157 { 1158 Symbol *sym = new_Symbol(prog, tgsi::translateFile(tgsiFile)); 1159 1160 sym->reg.fileIndex = fileIdx; 1161 1162 if (idx >= 0) { 1163 if (sym->reg.file == FILE_SHADER_INPUT) 1164 sym->setOffset(info->in[idx].slot[c] * 4); 1165 else 1166 if (sym->reg.file == FILE_SHADER_OUTPUT) 1167 sym->setOffset(info->out[idx].slot[c] * 4); 1168 else 1169 if (sym->reg.file == FILE_SYSTEM_VALUE) 1170 sym->setSV(tgsi::translateSysVal(info->sv[idx].sn), c); 1171 else 1172 sym->setOffset(address); 1173 } else { 1174 sym->setOffset(address); 1175 } 1176 return sym; 1177 } 1178 1179 static inline uint8_t 1180 translateInterpMode(const struct nv50_ir_varying *var, operation& op) 1181 { 1182 uint8_t mode = NV50_IR_INTERP_PERSPECTIVE; 1183 1184 if (var->flat) 1185 mode = NV50_IR_INTERP_FLAT; 1186 else 1187 if (var->linear) 1188 mode = NV50_IR_INTERP_LINEAR; 1189 else 1190 if (var->sc) 1191 mode = NV50_IR_INTERP_SC; 1192 1193 op = (mode == NV50_IR_INTERP_PERSPECTIVE || mode == NV50_IR_INTERP_SC) 1194 ? OP_PINTERP : OP_LINTERP; 1195 1196 if (var->centroid) 1197 mode |= NV50_IR_INTERP_CENTROID; 1198 1199 return mode; 1200 } 1201 1202 Value * 1203 Converter::interpolate(tgsi::Instruction::SrcRegister src, int c, Value *ptr) 1204 { 1205 operation op; 1206 1207 // XXX: no way to know interpolation mode if we don't know what's accessed 1208 const uint8_t mode = translateInterpMode(&info->in[ptr ? 0 : 1209 src.getIndex(0)], op); 1210 1211 Instruction *insn = new_Instruction(func, op, TYPE_F32); 1212 1213 insn->setDef(0, getScratch()); 1214 insn->setSrc(0, srcToSym(src, c)); 1215 if (op == OP_PINTERP) 1216 insn->setSrc(1, fragCoord[3]); 1217 if (ptr) 1218 insn->setIndirect(0, 0, ptr); 1219 1220 insn->setInterpolate(mode); 1221 1222 bb->insertTail(insn); 1223 return insn->getDef(0); 1224 } 1225 1226 Value * 1227 Converter::applySrcMod(Value *val, int s, int c) 1228 { 1229 Modifier m = tgsi.getSrc(s).getMod(c); 1230 DataType ty = tgsi.inferSrcType(); 1231 1232 if (m & Modifier(NV50_IR_MOD_ABS)) 1233 val = mkOp1v(OP_ABS, ty, getScratch(), val); 1234 1235 if (m & Modifier(NV50_IR_MOD_NEG)) 1236 val = mkOp1v(OP_NEG, ty, getScratch(), val); 1237 1238 return val; 1239 } 1240 1241 Value * 1242 Converter::getVertexBase(int s) 1243 { 1244 assert(s < 5); 1245 if (!(vtxBaseValid & (1 << s))) { 1246 const int index = tgsi.getSrc(s).getIndex(1); 1247 Value *rel = NULL; 1248 if (tgsi.getSrc(s).isIndirect(1)) 1249 rel = fetchSrc(tgsi.getSrc(s).getIndirect(1), 0, NULL); 1250 vtxBaseValid |= 1 << s; 1251 vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(), mkImm(index), rel); 1252 } 1253 return vtxBase[s]; 1254 } 1255 1256 Value * 1257 Converter::fetchSrc(int s, int c) 1258 { 1259 Value *res; 1260 Value *ptr = NULL, *dimRel = NULL; 1261 1262 tgsi::Instruction::SrcRegister src = tgsi.getSrc(s); 1263 1264 if (src.isIndirect(0)) 1265 ptr = fetchSrc(src.getIndirect(0), 0, NULL); 1266 1267 if (src.is2D()) { 1268 switch (src.getFile()) { 1269 case TGSI_FILE_INPUT: 1270 dimRel = getVertexBase(s); 1271 break; 1272 case TGSI_FILE_CONSTANT: 1273 // on NVC0, this is valid and c{I+J}[k] == cI[(J << 16) + k] 1274 if (src.isIndirect(1)) 1275 dimRel = fetchSrc(src.getIndirect(1), 0, 0); 1276 break; 1277 default: 1278 break; 1279 } 1280 } 1281 1282 res = fetchSrc(src, c, ptr); 1283 1284 if (dimRel) 1285 res->getInsn()->setIndirect(0, 1, dimRel); 1286 1287 return applySrcMod(res, s, c); 1288 } 1289 1290 Converter::DataArray * 1291 Converter::getArrayForFile(unsigned file, int idx) 1292 { 1293 switch (file) { 1294 case TGSI_FILE_TEMPORARY: 1295 return &tData; 1296 case TGSI_FILE_PREDICATE: 1297 return &pData; 1298 case TGSI_FILE_ADDRESS: 1299 return &aData; 1300 case TGSI_FILE_TEMPORARY_ARRAY: 1301 assert(idx < code->tempArrayCount); 1302 return &lData[idx]; 1303 case TGSI_FILE_IMMEDIATE_ARRAY: 1304 assert(idx < code->immdArrayCount); 1305 return &iData[idx]; 1306 case TGSI_FILE_OUTPUT: 1307 assert(prog->getType() == Program::TYPE_FRAGMENT); 1308 return &oData; 1309 default: 1310 assert(!"invalid/unhandled TGSI source file"); 1311 return NULL; 1312 } 1313 } 1314 1315 Value * 1316 Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr) 1317 { 1318 const int idx2d = src.is2D() ? src.getIndex(1) : 0; 1319 const int idx = src.getIndex(0); 1320 const int swz = src.getSwizzle(c); 1321 1322 switch (src.getFile()) { 1323 case TGSI_FILE_IMMEDIATE: 1324 assert(!ptr); 1325 return loadImm(NULL, info->immd.data[idx * 4 + swz]); 1326 case TGSI_FILE_CONSTANT: 1327 return mkLoad(TYPE_U32, srcToSym(src, c), ptr); 1328 case TGSI_FILE_INPUT: 1329 if (prog->getType() == Program::TYPE_FRAGMENT) { 1330 // don't load masked inputs, won't be assigned a slot 1331 if (!ptr && !(info->in[idx].mask & (1 << swz))) 1332 return loadImm(NULL, swz == TGSI_SWIZZLE_W ? 1.0f : 0.0f); 1333 if (!ptr && info->in[idx].sn == TGSI_SEMANTIC_FACE) 1334 return mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_FACE, 0)); 1335 return interpolate(src, c, ptr); 1336 } 1337 return mkLoad(TYPE_U32, srcToSym(src, c), ptr); 1338 case TGSI_FILE_OUTPUT: 1339 assert(!"load from output file"); 1340 return NULL; 1341 case TGSI_FILE_SYSTEM_VALUE: 1342 assert(!ptr); 1343 return mkOp1v(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c)); 1344 default: 1345 return getArrayForFile(src.getFile(), idx2d)->load( 1346 sub.cur->values, idx, swz, ptr); 1347 } 1348 } 1349 1350 Value * 1351 Converter::acquireDst(int d, int c) 1352 { 1353 const tgsi::Instruction::DstRegister dst = tgsi.getDst(d); 1354 const unsigned f = dst.getFile(); 1355 const int idx = dst.getIndex(0); 1356 const int idx2d = dst.is2D() ? dst.getIndex(1) : 0; 1357 1358 if (dst.isMasked(c) || f == TGSI_FILE_RESOURCE) 1359 return NULL; 1360 1361 if (dst.isIndirect(0) || 1362 f == TGSI_FILE_TEMPORARY_ARRAY || 1363 f == TGSI_FILE_SYSTEM_VALUE || 1364 (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT)) 1365 return getScratch(); 1366 1367 return getArrayForFile(f, idx2d)-> acquire(sub.cur->values, idx, c); 1368 } 1369 1370 void 1371 Converter::storeDst(int d, int c, Value *val) 1372 { 1373 const tgsi::Instruction::DstRegister dst = tgsi.getDst(d); 1374 1375 switch (tgsi.getSaturate()) { 1376 case TGSI_SAT_NONE: 1377 break; 1378 case TGSI_SAT_ZERO_ONE: 1379 mkOp1(OP_SAT, dstTy, val, val); 1380 break; 1381 case TGSI_SAT_MINUS_PLUS_ONE: 1382 mkOp2(OP_MAX, dstTy, val, val, mkImm(-1.0f)); 1383 mkOp2(OP_MIN, dstTy, val, val, mkImm(+1.0f)); 1384 break; 1385 default: 1386 assert(!"invalid saturation mode"); 1387 break; 1388 } 1389 1390 Value *ptr = dst.isIndirect(0) ? 1391 fetchSrc(dst.getIndirect(0), 0, NULL) : NULL; 1392 1393 if (info->io.genUserClip > 0 && 1394 dst.getFile() == TGSI_FILE_OUTPUT && 1395 !dst.isIndirect(0) && dst.getIndex(0) == code->clipVertexOutput) { 1396 mkMov(clipVtx[c], val); 1397 val = clipVtx[c]; 1398 } 1399 1400 storeDst(dst, c, val, ptr); 1401 } 1402 1403 void 1404 Converter::storeDst(const tgsi::Instruction::DstRegister dst, int c, 1405 Value *val, Value *ptr) 1406 { 1407 const unsigned f = dst.getFile(); 1408 const int idx = dst.getIndex(0); 1409 const int idx2d = dst.is2D() ? dst.getIndex(1) : 0; 1410 1411 if (f == TGSI_FILE_SYSTEM_VALUE) { 1412 assert(!ptr); 1413 mkOp2(OP_WRSV, TYPE_U32, NULL, dstToSym(dst, c), val); 1414 } else 1415 if (f == TGSI_FILE_OUTPUT && prog->getType() != Program::TYPE_FRAGMENT) { 1416 if (ptr || (info->out[idx].mask & (1 << c))) 1417 mkStore(OP_EXPORT, TYPE_U32, dstToSym(dst, c), ptr, val); 1418 } else 1419 if (f == TGSI_FILE_TEMPORARY || 1420 f == TGSI_FILE_TEMPORARY_ARRAY || 1421 f == TGSI_FILE_PREDICATE || 1422 f == TGSI_FILE_ADDRESS || 1423 f == TGSI_FILE_OUTPUT) { 1424 getArrayForFile(f, idx2d)->store(sub.cur->values, idx, c, ptr, val); 1425 } else { 1426 assert(!"invalid dst file"); 1427 } 1428 } 1429 1430 #define FOR_EACH_DST_ENABLED_CHANNEL(d, chan, inst) \ 1431 for (chan = 0; chan < 4; ++chan) \ 1432 if (!inst.getDst(d).isMasked(chan)) 1433 1434 Value * 1435 Converter::buildDot(int dim) 1436 { 1437 assert(dim > 0); 1438 1439 Value *src0 = fetchSrc(0, 0), *src1 = fetchSrc(1, 0); 1440 Value *dotp = getScratch(); 1441 1442 mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1); 1443 1444 for (int c = 1; c < dim; ++c) { 1445 src0 = fetchSrc(0, c); 1446 src1 = fetchSrc(1, c); 1447 mkOp3(OP_MAD, TYPE_F32, dotp, src0, src1, dotp); 1448 } 1449 return dotp; 1450 } 1451 1452 void 1453 Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork) 1454 { 1455 FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL); 1456 join->fixed = 1; 1457 conv->insertHead(join); 1458 1459 fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv); 1460 fork->insertBefore(fork->getExit(), fork->joinAt); 1461 } 1462 1463 void 1464 Converter::setTexRS(TexInstruction *tex, unsigned int& s, int R, int S) 1465 { 1466 unsigned rIdx = 0, sIdx = 0; 1467 1468 if (R >= 0) 1469 rIdx = tgsi.getSrc(R).getIndex(0); 1470 if (S >= 0) 1471 sIdx = tgsi.getSrc(S).getIndex(0); 1472 1473 tex->setTexture(tgsi.getTexture(code, R), rIdx, sIdx); 1474 1475 if (tgsi.getSrc(R).isIndirect(0)) { 1476 tex->tex.rIndirectSrc = s; 1477 tex->setSrc(s++, fetchSrc(tgsi.getSrc(R).getIndirect(0), 0, NULL)); 1478 } 1479 if (S >= 0 && tgsi.getSrc(S).isIndirect(0)) { 1480 tex->tex.sIndirectSrc = s; 1481 tex->setSrc(s++, fetchSrc(tgsi.getSrc(S).getIndirect(0), 0, NULL)); 1482 } 1483 } 1484 1485 void 1486 Converter::handleTXQ(Value *dst0[4], enum TexQuery query) 1487 { 1488 TexInstruction *tex = new_TexInstruction(func, OP_TXQ); 1489 tex->tex.query = query; 1490 unsigned int c, d; 1491 1492 for (d = 0, c = 0; c < 4; ++c) { 1493 if (!dst0[c]) 1494 continue; 1495 tex->tex.mask |= 1 << c; 1496 tex->setDef(d++, dst0[c]); 1497 } 1498 tex->setSrc((c = 0), fetchSrc(0, 0)); // mip level 1499 1500 setTexRS(tex, c, 1, -1); 1501 1502 bb->insertTail(tex); 1503 } 1504 1505 void 1506 Converter::loadProjTexCoords(Value *dst[4], Value *src[4], unsigned int mask) 1507 { 1508 Value *proj = fetchSrc(0, 3); 1509 Instruction *insn = proj->getUniqueInsn(); 1510 int c; 1511 1512 if (insn->op == OP_PINTERP) { 1513 bb->insertTail(insn = cloneForward(func, insn)); 1514 insn->op = OP_LINTERP; 1515 insn->setInterpolate(NV50_IR_INTERP_LINEAR | insn->getSampleMode()); 1516 insn->setSrc(1, NULL); 1517 proj = insn->getDef(0); 1518 } 1519 proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), proj); 1520 1521 for (c = 0; c < 4; ++c) { 1522 if (!(mask & (1 << c))) 1523 continue; 1524 if ((insn = src[c]->getUniqueInsn())->op != OP_PINTERP) 1525 continue; 1526 mask &= ~(1 << c); 1527 1528 bb->insertTail(insn = cloneForward(func, insn)); 1529 insn->setInterpolate(NV50_IR_INTERP_PERSPECTIVE | insn->getSampleMode()); 1530 insn->setSrc(1, proj); 1531 dst[c] = insn->getDef(0); 1532 } 1533 if (!mask) 1534 return; 1535 1536 proj = mkOp1v(OP_RCP, TYPE_F32, getSSA(), fetchSrc(0, 3)); 1537 1538 for (c = 0; c < 4; ++c) 1539 if (mask & (1 << c)) 1540 dst[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), src[c], proj); 1541 } 1542 1543 // order of nv50 ir sources: x y z layer lod/bias shadow 1544 // order of TGSI TEX sources: x y z layer shadow lod/bias 1545 // lowering will finally set the hw specific order (like array first on nvc0) 1546 void 1547 Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy) 1548 { 1549 Value *val; 1550 Value *arg[4], *src[8]; 1551 Value *lod = NULL, *shd = NULL; 1552 unsigned int s, c, d; 1553 TexInstruction *texi = new_TexInstruction(func, tgsi.getOP()); 1554 1555 TexInstruction::Target tgt = tgsi.getTexture(code, R); 1556 1557 for (s = 0; s < tgt.getArgCount(); ++s) 1558 arg[s] = src[s] = fetchSrc(0, s); 1559 1560 if (texi->op == OP_TXL || texi->op == OP_TXB) 1561 lod = fetchSrc(L >> 4, L & 3); 1562 1563 if (C == 0x0f) 1564 C = 0x00 | MAX2(tgt.getArgCount(), 2); // guess DC src 1565 1566 if (tgt.isShadow()) 1567 shd = fetchSrc(C >> 4, C & 3); 1568 1569 if (texi->op == OP_TXD) { 1570 for (c = 0; c < tgt.getDim(); ++c) { 1571 texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c)); 1572 texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c)); 1573 } 1574 } 1575 1576 // cube textures don't care about projection value, it's divided out 1577 if (tgsi.getOpcode() == TGSI_OPCODE_TXP && !tgt.isCube() && !tgt.isArray()) { 1578 unsigned int n = tgt.getDim(); 1579 if (shd) { 1580 arg[n] = shd; 1581 ++n; 1582 assert(tgt.getDim() == tgt.getArgCount()); 1583 } 1584 loadProjTexCoords(src, arg, (1 << n) - 1); 1585 if (shd) 1586 shd = src[n - 1]; 1587 } 1588 1589 if (tgt.isCube()) { 1590 for (c = 0; c < 3; ++c) 1591 src[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]); 1592 val = getScratch(); 1593 mkOp2(OP_MAX, TYPE_F32, val, src[0], src[1]); 1594 mkOp2(OP_MAX, TYPE_F32, val, src[2], val); 1595 mkOp1(OP_RCP, TYPE_F32, val, val); 1596 for (c = 0; c < 3; ++c) 1597 src[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val); 1598 } 1599 1600 for (c = 0, d = 0; c < 4; ++c) { 1601 if (dst[c]) { 1602 texi->setDef(d++, dst[c]); 1603 texi->tex.mask |= 1 << c; 1604 } else { 1605 // NOTE: maybe hook up def too, for CSE 1606 } 1607 } 1608 for (s = 0; s < tgt.getArgCount(); ++s) 1609 texi->setSrc(s, src[s]); 1610 if (lod) 1611 texi->setSrc(s++, lod); 1612 if (shd) 1613 texi->setSrc(s++, shd); 1614 1615 setTexRS(texi, s, R, S); 1616 1617 if (tgsi.getOpcode() == TGSI_OPCODE_SAMPLE_C_LZ) 1618 texi->tex.levelZero = true; 1619 1620 bb->insertTail(texi); 1621 } 1622 1623 // 1st source: xyz = coordinates, w = lod 1624 // 2nd source: offset 1625 void 1626 Converter::handleTXF(Value *dst[4], int R) 1627 { 1628 TexInstruction *texi = new_TexInstruction(func, tgsi.getOP()); 1629 unsigned int c, d, s; 1630 1631 texi->tex.target = tgsi.getTexture(code, R); 1632 1633 for (c = 0, d = 0; c < 4; ++c) { 1634 if (dst[c]) { 1635 texi->setDef(d++, dst[c]); 1636 texi->tex.mask |= 1 << c; 1637 } 1638 } 1639 for (c = 0; c < texi->tex.target.getArgCount(); ++c) 1640 texi->setSrc(c, fetchSrc(0, c)); 1641 texi->setSrc(c++, fetchSrc(0, 3)); // lod 1642 1643 setTexRS(texi, c, R, -1); 1644 1645 for (s = 0; s < tgsi.getNumTexOffsets(); ++s) { 1646 for (c = 0; c < 3; ++c) { 1647 texi->tex.offset[s][c] = tgsi.getTexOffset(s).getValueU32(c, info); 1648 if (texi->tex.offset[s][c]) 1649 texi->tex.useOffsets = s + 1; 1650 } 1651 } 1652 1653 bb->insertTail(texi); 1654 } 1655 1656 void 1657 Converter::handleLIT(Value *dst0[4]) 1658 { 1659 Value *val0 = NULL; 1660 unsigned int mask = tgsi.getDst(0).getMask(); 1661 1662 if (mask & (1 << 0)) 1663 loadImm(dst0[0], 1.0f); 1664 1665 if (mask & (1 << 3)) 1666 loadImm(dst0[3], 1.0f); 1667 1668 if (mask & (3 << 1)) { 1669 val0 = getScratch(); 1670 mkOp2(OP_MAX, TYPE_F32, val0, fetchSrc(0, 0), zero); 1671 if (mask & (1 << 1)) 1672 mkMov(dst0[1], val0); 1673 } 1674 1675 if (mask & (1 << 2)) { 1676 Value *src1 = fetchSrc(0, 1), *src3 = fetchSrc(0, 3); 1677 Value *val1 = getScratch(), *val3 = getScratch(); 1678 1679 Value *pos128 = loadImm(NULL, +127.999999f); 1680 Value *neg128 = loadImm(NULL, -127.999999f); 1681 1682 mkOp2(OP_MAX, TYPE_F32, val1, src1, zero); 1683 mkOp2(OP_MAX, TYPE_F32, val3, src3, neg128); 1684 mkOp2(OP_MIN, TYPE_F32, val3, val3, pos128); 1685 mkOp2(OP_POW, TYPE_F32, val3, val1, val3); 1686 1687 mkCmp(OP_SLCT, CC_GT, TYPE_F32, dst0[2], val3, zero, val0); 1688 } 1689 } 1690 1691 Converter::Subroutine * 1692 Converter::getSubroutine(unsigned ip) 1693 { 1694 std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip); 1695 1696 if (it == sub.map.end()) 1697 it = sub.map.insert(std::make_pair( 1698 ip, Subroutine(new Function(prog, "SUB", ip)))).first; 1699 1700 return &it->second; 1701 } 1702 1703 Converter::Subroutine * 1704 Converter::getSubroutine(Function *f) 1705 { 1706 unsigned ip = f->getLabel(); 1707 std::map<unsigned, Subroutine>::iterator it = sub.map.find(ip); 1708 1709 if (it == sub.map.end()) 1710 it = sub.map.insert(std::make_pair(ip, Subroutine(f))).first; 1711 1712 return &it->second; 1713 } 1714 1715 bool 1716 Converter::isEndOfSubroutine(uint ip) 1717 { 1718 assert(ip < code->scan.num_instructions); 1719 tgsi::Instruction insn(&code->insns[ip]); 1720 return (insn.getOpcode() == TGSI_OPCODE_END || 1721 insn.getOpcode() == TGSI_OPCODE_ENDSUB || 1722 // does END occur at end of main or the very end ? 1723 insn.getOpcode() == TGSI_OPCODE_BGNSUB); 1724 } 1725 1726 bool 1727 Converter::handleInstruction(const struct tgsi_full_instruction *insn) 1728 { 1729 Value *dst0[4], *rDst0[4]; 1730 Value *src0, *src1, *src2; 1731 Value *val0, *val1; 1732 int c; 1733 1734 tgsi = tgsi::Instruction(insn); 1735 1736 bool useScratchDst = tgsi.checkDstSrcAliasing(); 1737 1738 operation op = tgsi.getOP(); 1739 dstTy = tgsi.inferDstType(); 1740 srcTy = tgsi.inferSrcType(); 1741 1742 unsigned int mask = tgsi.dstCount() ? tgsi.getDst(0).getMask() : 0; 1743 1744 if (tgsi.dstCount()) { 1745 for (c = 0; c < 4; ++c) { 1746 rDst0[c] = acquireDst(0, c); 1747 dst0[c] = (useScratchDst && rDst0[c]) ? getScratch() : rDst0[c]; 1748 } 1749 } 1750 1751 switch (tgsi.getOpcode()) { 1752 case TGSI_OPCODE_ADD: 1753 case TGSI_OPCODE_UADD: 1754 case TGSI_OPCODE_AND: 1755 case TGSI_OPCODE_DIV: 1756 case TGSI_OPCODE_IDIV: 1757 case TGSI_OPCODE_UDIV: 1758 case TGSI_OPCODE_MAX: 1759 case TGSI_OPCODE_MIN: 1760 case TGSI_OPCODE_IMAX: 1761 case TGSI_OPCODE_IMIN: 1762 case TGSI_OPCODE_UMAX: 1763 case TGSI_OPCODE_UMIN: 1764 case TGSI_OPCODE_MOD: 1765 case TGSI_OPCODE_UMOD: 1766 case TGSI_OPCODE_MUL: 1767 case TGSI_OPCODE_UMUL: 1768 case TGSI_OPCODE_OR: 1769 case TGSI_OPCODE_POW: 1770 case TGSI_OPCODE_SHL: 1771 case TGSI_OPCODE_ISHR: 1772 case TGSI_OPCODE_USHR: 1773 case TGSI_OPCODE_SUB: 1774 case TGSI_OPCODE_XOR: 1775 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 1776 src0 = fetchSrc(0, c); 1777 src1 = fetchSrc(1, c); 1778 mkOp2(op, dstTy, dst0[c], src0, src1); 1779 } 1780 break; 1781 case TGSI_OPCODE_MAD: 1782 case TGSI_OPCODE_UMAD: 1783 case TGSI_OPCODE_SAD: 1784 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 1785 src0 = fetchSrc(0, c); 1786 src1 = fetchSrc(1, c); 1787 src2 = fetchSrc(2, c); 1788 mkOp3(op, dstTy, dst0[c], src0, src1, src2); 1789 } 1790 break; 1791 case TGSI_OPCODE_MOV: 1792 case TGSI_OPCODE_ABS: 1793 case TGSI_OPCODE_CEIL: 1794 case TGSI_OPCODE_FLR: 1795 case TGSI_OPCODE_TRUNC: 1796 case TGSI_OPCODE_RCP: 1797 case TGSI_OPCODE_IABS: 1798 case TGSI_OPCODE_INEG: 1799 case TGSI_OPCODE_NOT: 1800 case TGSI_OPCODE_DDX: 1801 case TGSI_OPCODE_DDY: 1802 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 1803 mkOp1(op, dstTy, dst0[c], fetchSrc(0, c)); 1804 break; 1805 case TGSI_OPCODE_RSQ: 1806 src0 = fetchSrc(0, 0); 1807 val0 = getScratch(); 1808 mkOp1(OP_ABS, TYPE_F32, val0, src0); 1809 mkOp1(OP_RSQ, TYPE_F32, val0, val0); 1810 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 1811 mkMov(dst0[c], val0); 1812 break; 1813 case TGSI_OPCODE_ARL: 1814 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 1815 src0 = fetchSrc(0, c); 1816 mkCvt(OP_CVT, TYPE_S32, dst0[c], TYPE_F32, src0)->rnd = ROUND_M; 1817 mkOp2(OP_SHL, TYPE_U32, dst0[c], dst0[c], mkImm(4)); 1818 } 1819 break; 1820 case TGSI_OPCODE_UARL: 1821 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 1822 mkOp2(OP_SHL, TYPE_U32, dst0[c], fetchSrc(0, c), mkImm(4)); 1823 break; 1824 case TGSI_OPCODE_EX2: 1825 case TGSI_OPCODE_LG2: 1826 val0 = mkOp1(op, TYPE_F32, getScratch(), fetchSrc(0, 0))->getDef(0); 1827 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 1828 mkOp1(OP_MOV, TYPE_F32, dst0[c], val0); 1829 break; 1830 case TGSI_OPCODE_COS: 1831 case TGSI_OPCODE_SIN: 1832 val0 = getScratch(); 1833 if (mask & 7) { 1834 mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 0)); 1835 mkOp1(op, TYPE_F32, val0, val0); 1836 for (c = 0; c < 3; ++c) 1837 if (dst0[c]) 1838 mkMov(dst0[c], val0); 1839 } 1840 if (dst0[3]) { 1841 mkOp1(OP_PRESIN, TYPE_F32, val0, fetchSrc(0, 3)); 1842 mkOp1(op, TYPE_F32, dst0[3], val0); 1843 } 1844 break; 1845 case TGSI_OPCODE_SCS: 1846 if (mask & 3) { 1847 val0 = mkOp1v(OP_PRESIN, TYPE_F32, getSSA(), fetchSrc(0, 0)); 1848 if (dst0[0]) 1849 mkOp1(OP_COS, TYPE_F32, dst0[0], val0); 1850 if (dst0[1]) 1851 mkOp1(OP_SIN, TYPE_F32, dst0[1], val0); 1852 } 1853 if (dst0[2]) 1854 loadImm(dst0[2], 0.0f); 1855 if (dst0[3]) 1856 loadImm(dst0[3], 1.0f); 1857 break; 1858 case TGSI_OPCODE_EXP: 1859 src0 = fetchSrc(0, 0); 1860 val0 = mkOp1v(OP_FLOOR, TYPE_F32, getSSA(), src0); 1861 if (dst0[1]) 1862 mkOp2(OP_SUB, TYPE_F32, dst0[1], src0, val0); 1863 if (dst0[0]) 1864 mkOp1(OP_EX2, TYPE_F32, dst0[0], val0); 1865 if (dst0[2]) 1866 mkOp1(OP_EX2, TYPE_F32, dst0[2], src0); 1867 if (dst0[3]) 1868 loadImm(dst0[3], 1.0f); 1869 break; 1870 case TGSI_OPCODE_LOG: 1871 src0 = mkOp1v(OP_ABS, TYPE_F32, getSSA(), fetchSrc(0, 0)); 1872 val0 = mkOp1v(OP_LG2, TYPE_F32, dst0[2] ? dst0[2] : getSSA(), src0); 1873 if (dst0[0] || dst0[1]) 1874 val1 = mkOp1v(OP_FLOOR, TYPE_F32, dst0[0] ? dst0[0] : getSSA(), val0); 1875 if (dst0[1]) { 1876 mkOp1(OP_EX2, TYPE_F32, dst0[1], val1); 1877 mkOp1(OP_RCP, TYPE_F32, dst0[1], dst0[1]); 1878 mkOp2(OP_MUL, TYPE_F32, dst0[1], dst0[1], src0); 1879 } 1880 if (dst0[3]) 1881 loadImm(dst0[3], 1.0f); 1882 break; 1883 case TGSI_OPCODE_DP2: 1884 val0 = buildDot(2); 1885 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 1886 mkMov(dst0[c], val0); 1887 break; 1888 case TGSI_OPCODE_DP3: 1889 val0 = buildDot(3); 1890 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 1891 mkMov(dst0[c], val0); 1892 break; 1893 case TGSI_OPCODE_DP4: 1894 val0 = buildDot(4); 1895 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 1896 mkMov(dst0[c], val0); 1897 break; 1898 case TGSI_OPCODE_DPH: 1899 val0 = buildDot(3); 1900 src1 = fetchSrc(1, 3); 1901 mkOp2(OP_ADD, TYPE_F32, val0, val0, src1); 1902 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 1903 mkMov(dst0[c], val0); 1904 break; 1905 case TGSI_OPCODE_DST: 1906 if (dst0[0]) 1907 loadImm(dst0[0], 1.0f); 1908 if (dst0[1]) { 1909 src0 = fetchSrc(0, 1); 1910 src1 = fetchSrc(1, 1); 1911 mkOp2(OP_MUL, TYPE_F32, dst0[1], src0, src1); 1912 } 1913 if (dst0[2]) 1914 mkMov(dst0[2], fetchSrc(0, 2)); 1915 if (dst0[3]) 1916 mkMov(dst0[3], fetchSrc(1, 3)); 1917 break; 1918 case TGSI_OPCODE_LRP: 1919 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 1920 src0 = fetchSrc(0, c); 1921 src1 = fetchSrc(1, c); 1922 src2 = fetchSrc(2, c); 1923 mkOp3(OP_MAD, TYPE_F32, dst0[c], 1924 mkOp2v(OP_SUB, TYPE_F32, getSSA(), src1, src2), src0, src2); 1925 } 1926 break; 1927 case TGSI_OPCODE_LIT: 1928 handleLIT(dst0); 1929 break; 1930 case TGSI_OPCODE_XPD: 1931 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 1932 if (c < 3) { 1933 val0 = getSSA(); 1934 src0 = fetchSrc(1, (c + 1) % 3); 1935 src1 = fetchSrc(0, (c + 2) % 3); 1936 mkOp2(OP_MUL, TYPE_F32, val0, src0, src1); 1937 mkOp1(OP_NEG, TYPE_F32, val0, val0); 1938 1939 src0 = fetchSrc(0, (c + 1) % 3); 1940 src1 = fetchSrc(1, (c + 2) % 3); 1941 mkOp3(OP_MAD, TYPE_F32, dst0[c], src0, src1, val0); 1942 } else { 1943 loadImm(dst0[c], 1.0f); 1944 } 1945 } 1946 break; 1947 case TGSI_OPCODE_ISSG: 1948 case TGSI_OPCODE_SSG: 1949 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 1950 src0 = fetchSrc(0, c); 1951 val0 = getScratch(); 1952 val1 = getScratch(); 1953 mkCmp(OP_SET, CC_GT, srcTy, val0, src0, zero); 1954 mkCmp(OP_SET, CC_LT, srcTy, val1, src0, zero); 1955 if (srcTy == TYPE_F32) 1956 mkOp2(OP_SUB, TYPE_F32, dst0[c], val0, val1); 1957 else 1958 mkOp2(OP_SUB, TYPE_S32, dst0[c], val1, val0); 1959 } 1960 break; 1961 case TGSI_OPCODE_UCMP: 1962 case TGSI_OPCODE_CMP: 1963 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 1964 src0 = fetchSrc(0, c); 1965 src1 = fetchSrc(1, c); 1966 src2 = fetchSrc(2, c); 1967 if (src1 == src2) 1968 mkMov(dst0[c], src1); 1969 else 1970 mkCmp(OP_SLCT, (srcTy == TYPE_F32) ? CC_LT : CC_NE, 1971 srcTy, dst0[c], src1, src2, src0); 1972 } 1973 break; 1974 case TGSI_OPCODE_FRC: 1975 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 1976 src0 = fetchSrc(0, c); 1977 val0 = getScratch(); 1978 mkOp1(OP_FLOOR, TYPE_F32, val0, src0); 1979 mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val0); 1980 } 1981 break; 1982 case TGSI_OPCODE_ROUND: 1983 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 1984 mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_F32, fetchSrc(0, c)) 1985 ->rnd = ROUND_NI; 1986 break; 1987 case TGSI_OPCODE_CLAMP: 1988 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 1989 src0 = fetchSrc(0, c); 1990 src1 = fetchSrc(1, c); 1991 src2 = fetchSrc(2, c); 1992 val0 = getScratch(); 1993 mkOp2(OP_MIN, TYPE_F32, val0, src0, src1); 1994 mkOp2(OP_MAX, TYPE_F32, dst0[c], val0, src2); 1995 } 1996 break; 1997 case TGSI_OPCODE_SLT: 1998 case TGSI_OPCODE_SGE: 1999 case TGSI_OPCODE_SEQ: 2000 case TGSI_OPCODE_SFL: 2001 case TGSI_OPCODE_SGT: 2002 case TGSI_OPCODE_SLE: 2003 case TGSI_OPCODE_SNE: 2004 case TGSI_OPCODE_STR: 2005 case TGSI_OPCODE_ISGE: 2006 case TGSI_OPCODE_ISLT: 2007 case TGSI_OPCODE_USEQ: 2008 case TGSI_OPCODE_USGE: 2009 case TGSI_OPCODE_USLT: 2010 case TGSI_OPCODE_USNE: 2011 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) { 2012 src0 = fetchSrc(0, c); 2013 src1 = fetchSrc(1, c); 2014 mkCmp(op, tgsi.getSetCond(), dstTy, dst0[c], src0, src1); 2015 } 2016 break; 2017 case TGSI_OPCODE_KIL: 2018 val0 = new_LValue(func, FILE_PREDICATE); 2019 for (c = 0; c < 4; ++c) { 2020 mkCmp(OP_SET, CC_LT, TYPE_F32, val0, fetchSrc(0, c), zero); 2021 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate(CC_P, val0); 2022 } 2023 break; 2024 case TGSI_OPCODE_KILP: 2025 mkOp(OP_DISCARD, TYPE_NONE, NULL); 2026 break; 2027 case TGSI_OPCODE_TEX: 2028 case TGSI_OPCODE_TXB: 2029 case TGSI_OPCODE_TXL: 2030 case TGSI_OPCODE_TXP: 2031 // R S L C Dx Dy 2032 handleTEX(dst0, 1, 1, 0x03, 0x0f, 0x00, 0x00); 2033 break; 2034 case TGSI_OPCODE_TXD: 2035 handleTEX(dst0, 3, 3, 0x03, 0x0f, 0x10, 0x20); 2036 break; 2037 case TGSI_OPCODE_SAMPLE: 2038 case TGSI_OPCODE_SAMPLE_B: 2039 case TGSI_OPCODE_SAMPLE_D: 2040 case TGSI_OPCODE_SAMPLE_L: 2041 case TGSI_OPCODE_SAMPLE_C: 2042 case TGSI_OPCODE_SAMPLE_C_LZ: 2043 handleTEX(dst0, 1, 2, 0x30, 0x31, 0x40, 0x50); 2044 break; 2045 case TGSI_OPCODE_TXF: 2046 case TGSI_OPCODE_LOAD: 2047 handleTXF(dst0, 1); 2048 break; 2049 case TGSI_OPCODE_TXQ: 2050 case TGSI_OPCODE_SVIEWINFO: 2051 handleTXQ(dst0, TXQ_DIMS); 2052 break; 2053 case TGSI_OPCODE_F2I: 2054 case TGSI_OPCODE_F2U: 2055 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 2056 mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c))->rnd = ROUND_Z; 2057 break; 2058 case TGSI_OPCODE_I2F: 2059 case TGSI_OPCODE_U2F: 2060 FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) 2061 mkCvt(OP_CVT, dstTy, dst0[c], srcTy, fetchSrc(0, c)); 2062 break; 2063 case TGSI_OPCODE_EMIT: 2064 case TGSI_OPCODE_ENDPRIM: 2065 // get vertex stream if specified (must be immediate) 2066 src0 = tgsi.srcCount() ? 2067 mkImm(tgsi.getSrc(0).getValueU32(0, info)) : zero; 2068 mkOp1(op, TYPE_U32, NULL, src0)->fixed = 1; 2069 break; 2070 case TGSI_OPCODE_IF: 2071 { 2072 BasicBlock *ifBB = new BasicBlock(func); 2073 2074 bb->cfg.attach(&ifBB->cfg, Graph::Edge::TREE); 2075 condBBs.push(bb); 2076 joinBBs.push(bb); 2077 2078 mkFlow(OP_BRA, NULL, CC_NOT_P, fetchSrc(0, 0)); 2079 2080 setPosition(ifBB, true); 2081 } 2082 break; 2083 case TGSI_OPCODE_ELSE: 2084 { 2085 BasicBlock *elseBB = new BasicBlock(func); 2086 BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p); 2087 2088 forkBB->cfg.attach(&elseBB->cfg, Graph::Edge::TREE); 2089 condBBs.push(bb); 2090 2091 forkBB->getExit()->asFlow()->target.bb = elseBB; 2092 if (!bb->isTerminated()) 2093 mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL); 2094 2095 setPosition(elseBB, true); 2096 } 2097 break; 2098 case TGSI_OPCODE_ENDIF: 2099 { 2100 BasicBlock *convBB = new BasicBlock(func); 2101 BasicBlock *prevBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p); 2102 BasicBlock *forkBB = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p); 2103 2104 if (!bb->isTerminated()) { 2105 // we only want join if none of the clauses ended with CONT/BREAK/RET 2106 if (prevBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6) 2107 insertConvergenceOps(convBB, forkBB); 2108 mkFlow(OP_BRA, convBB, CC_ALWAYS, NULL); 2109 bb->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD); 2110 } 2111 2112 if (prevBB->getExit()->op == OP_BRA) { 2113 prevBB->cfg.attach(&convBB->cfg, Graph::Edge::FORWARD); 2114 prevBB->getExit()->asFlow()->target.bb = convBB; 2115 } 2116 setPosition(convBB, true); 2117 } 2118 break; 2119 case TGSI_OPCODE_BGNLOOP: 2120 { 2121 BasicBlock *lbgnBB = new BasicBlock(func); 2122 BasicBlock *lbrkBB = new BasicBlock(func); 2123 2124 loopBBs.push(lbgnBB); 2125 breakBBs.push(lbrkBB); 2126 if (loopBBs.getSize() > func->loopNestingBound) 2127 func->loopNestingBound++; 2128 2129 mkFlow(OP_PREBREAK, lbrkBB, CC_ALWAYS, NULL); 2130 2131 bb->cfg.attach(&lbgnBB->cfg, Graph::Edge::TREE); 2132 setPosition(lbgnBB, true); 2133 mkFlow(OP_PRECONT, lbgnBB, CC_ALWAYS, NULL); 2134 } 2135 break; 2136 case TGSI_OPCODE_ENDLOOP: 2137 { 2138 BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p); 2139 2140 if (!bb->isTerminated()) { 2141 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL); 2142 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK); 2143 } 2144 setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true); 2145 } 2146 break; 2147 case TGSI_OPCODE_BRK: 2148 { 2149 if (bb->isTerminated()) 2150 break; 2151 BasicBlock *brkBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p); 2152 mkFlow(OP_BREAK, brkBB, CC_ALWAYS, NULL); 2153 bb->cfg.attach(&brkBB->cfg, Graph::Edge::CROSS); 2154 } 2155 break; 2156 case TGSI_OPCODE_CONT: 2157 { 2158 if (bb->isTerminated()) 2159 break; 2160 BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p); 2161 mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL); 2162 contBB->explicitCont = true; 2163 bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK); 2164 } 2165 break; 2166 case TGSI_OPCODE_BGNSUB: 2167 { 2168 Subroutine *s = getSubroutine(ip); 2169 BasicBlock *entry = new BasicBlock(s->f); 2170 BasicBlock *leave = new BasicBlock(s->f); 2171 2172 // multiple entrypoints possible, keep the graph connected 2173 if (prog->getType() == Program::TYPE_COMPUTE) 2174 prog->main->call.attach(&s->f->call, Graph::Edge::TREE); 2175 2176 sub.cur = s; 2177 s->f->setEntry(entry); 2178 s->f->setExit(leave); 2179 setPosition(entry, true); 2180 return true; 2181 } 2182 case TGSI_OPCODE_ENDSUB: 2183 { 2184 sub.cur = getSubroutine(prog->main); 2185 setPosition(BasicBlock::get(sub.cur->f->cfg.getRoot()), true); 2186 return true; 2187 } 2188 case TGSI_OPCODE_CAL: 2189 { 2190 Subroutine *s = getSubroutine(tgsi.getLabel()); 2191 mkFlow(OP_CALL, s->f, CC_ALWAYS, NULL); 2192 func->call.attach(&s->f->call, Graph::Edge::TREE); 2193 return true; 2194 } 2195 case TGSI_OPCODE_RET: 2196 { 2197 if (bb->isTerminated()) 2198 return true; 2199 BasicBlock *leave = BasicBlock::get(func->cfgExit); 2200 2201 if (!isEndOfSubroutine(ip + 1)) { 2202 // insert a PRERET at the entry if this is an early return 2203 // (only needed for sharing code in the epilogue) 2204 BasicBlock *pos = getBB(); 2205 setPosition(BasicBlock::get(func->cfg.getRoot()), false); 2206 mkFlow(OP_PRERET, leave, CC_ALWAYS, NULL)->fixed = 1; 2207 setPosition(pos, true); 2208 } 2209 mkFlow(OP_RET, NULL, CC_ALWAYS, NULL)->fixed = 1; 2210 bb->cfg.attach(&leave->cfg, Graph::Edge::CROSS); 2211 } 2212 break; 2213 case TGSI_OPCODE_END: 2214 { 2215 // attach and generate epilogue code 2216 BasicBlock *epilogue = BasicBlock::get(func->cfgExit); 2217 bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE); 2218 setPosition(epilogue, true); 2219 if (prog->getType() == Program::TYPE_FRAGMENT) 2220 exportOutputs(); 2221 if (info->io.genUserClip > 0) 2222 handleUserClipPlanes(); 2223 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1; 2224 } 2225 break; 2226 case TGSI_OPCODE_SWITCH: 2227 case TGSI_OPCODE_CASE: 2228 ERROR("switch/case opcode encountered, should have been lowered\n"); 2229 abort(); 2230 break; 2231 default: 2232 ERROR("unhandled TGSI opcode: %u\n", tgsi.getOpcode()); 2233 assert(0); 2234 break; 2235 } 2236 2237 if (tgsi.dstCount()) { 2238 for (c = 0; c < 4; ++c) { 2239 if (!dst0[c]) 2240 continue; 2241 if (dst0[c] != rDst0[c]) 2242 mkMov(rDst0[c], dst0[c]); 2243 storeDst(0, c, rDst0[c]); 2244 } 2245 } 2246 vtxBaseValid = 0; 2247 2248 return true; 2249 } 2250 2251 void 2252 Converter::handleUserClipPlanes() 2253 { 2254 Value *res[8]; 2255 int n, i, c; 2256 2257 for (c = 0; c < 4; ++c) { 2258 for (i = 0; i < info->io.genUserClip; ++i) { 2259 Symbol *sym = mkSymbol(FILE_MEMORY_CONST, info->io.ucpBinding, 2260 TYPE_F32, info->io.ucpBase + i * 16 + c * 4); 2261 Value *ucp = mkLoad(TYPE_F32, sym, NULL); 2262 if (c == 0) 2263 res[i] = mkOp2v(OP_MUL, TYPE_F32, getScratch(), clipVtx[c], ucp); 2264 else 2265 mkOp3(OP_MAD, TYPE_F32, res[i], clipVtx[c], ucp, res[i]); 2266 } 2267 } 2268 2269 const int first = info->numOutputs - (info->io.genUserClip + 3) / 4; 2270 2271 for (i = 0; i < info->io.genUserClip; ++i) { 2272 n = i / 4 + first; 2273 c = i % 4; 2274 Symbol *sym = 2275 mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, info->out[n].slot[c] * 4); 2276 mkStore(OP_EXPORT, TYPE_F32, sym, NULL, res[i]); 2277 } 2278 } 2279 2280 void 2281 Converter::exportOutputs() 2282 { 2283 for (unsigned int i = 0; i < info->numOutputs; ++i) { 2284 for (unsigned int c = 0; c < 4; ++c) { 2285 if (!oData.exists(sub.cur->values, i, c)) 2286 continue; 2287 Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, 2288 info->out[i].slot[c] * 4); 2289 Value *val = oData.load(sub.cur->values, i, c, NULL); 2290 if (val) 2291 mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val); 2292 } 2293 } 2294 } 2295 2296 Converter::Converter(Program *ir, const tgsi::Source *code) : BuildUtil(ir), 2297 code(code), 2298 tgsi(NULL), 2299 tData(this), aData(this), pData(this), oData(this) 2300 { 2301 info = code->info; 2302 2303 const DataFile tFile = code->mainTempsInLMem ? FILE_MEMORY_LOCAL : FILE_GPR; 2304 2305 const unsigned tSize = code->fileSize(TGSI_FILE_TEMPORARY); 2306 const unsigned pSize = code->fileSize(TGSI_FILE_PREDICATE); 2307 const unsigned aSize = code->fileSize(TGSI_FILE_ADDRESS); 2308 const unsigned oSize = code->fileSize(TGSI_FILE_OUTPUT); 2309 2310 tData.setup(TGSI_FILE_TEMPORARY, 0, 0, tSize, 4, 4, tFile, 0); 2311 pData.setup(TGSI_FILE_PREDICATE, 0, 0, pSize, 4, 4, FILE_PREDICATE, 0); 2312 aData.setup(TGSI_FILE_ADDRESS, 0, 0, aSize, 4, 4, FILE_ADDRESS, 0); 2313 oData.setup(TGSI_FILE_OUTPUT, 0, 0, oSize, 4, 4, FILE_GPR, 0); 2314 2315 for (int vol = 0, i = 0; i < code->tempArrayCount; ++i) { 2316 int len = code->tempArrays[i].u32 >> 2; 2317 int dim = code->tempArrays[i].u32 & 3; 2318 2319 lData.push_back(DataArray(this)); 2320 lData.back().setup(TGSI_FILE_TEMPORARY_ARRAY, i, vol, len, dim, 4, 2321 FILE_MEMORY_LOCAL, 0); 2322 2323 vol += (len * dim * 4 + 0xf) & ~0xf; 2324 } 2325 2326 for (int vol = 0, i = 0; i < code->immdArrayCount; ++i) { 2327 int len = code->immdArrays[i].u32 >> 2; 2328 int dim = code->immdArrays[i].u32 & 3; 2329 2330 lData.push_back(DataArray(this)); 2331 lData.back().setup(TGSI_FILE_IMMEDIATE_ARRAY, i, vol, len, dim, 4, 2332 FILE_MEMORY_CONST, 14); 2333 2334 vol += (len * dim * 4 + 0xf) & ~0xf; 2335 } 2336 2337 zero = mkImm((uint32_t)0); 2338 2339 vtxBaseValid = 0; 2340 } 2341 2342 Converter::~Converter() 2343 { 2344 } 2345 2346 template<typename T> inline void 2347 Converter::BindArgumentsPass::updateCallArgs( 2348 Instruction *i, void (Instruction::*setArg)(int, Value *), 2349 T (Function::*proto)) 2350 { 2351 Function *g = i->asFlow()->target.fn; 2352 Subroutine *subg = conv.getSubroutine(g); 2353 2354 for (unsigned a = 0; a < (g->*proto).size(); ++a) { 2355 Value *v = (g->*proto)[a].get(); 2356 const Converter::Location &l = subg->values.l.find(v)->second; 2357 Converter::DataArray *array = conv.getArrayForFile(l.array, l.arrayIdx); 2358 2359 (i->*setArg)(a, array->acquire(sub->values, l.i, l.c)); 2360 } 2361 } 2362 2363 template<typename T> inline void 2364 Converter::BindArgumentsPass::updatePrototype( 2365 BitSet *set, void (Function::*updateSet)(), T (Function::*proto)) 2366 { 2367 (func->*updateSet)(); 2368 2369 for (unsigned i = 0; i < set->getSize(); ++i) { 2370 Value *v = func->getLValue(i); 2371 2372 // only include values with a matching TGSI register 2373 if (set->test(i) && sub->values.l.find(v) != sub->values.l.end()) 2374 (func->*proto).push_back(v); 2375 } 2376 } 2377 2378 bool 2379 Converter::BindArgumentsPass::visit(Function *f) 2380 { 2381 sub = conv.getSubroutine(f); 2382 2383 for (ArrayList::Iterator bi = f->allBBlocks.iterator(); 2384 !bi.end(); bi.next()) { 2385 for (Instruction *i = BasicBlock::get(bi)->getFirst(); 2386 i; i = i->next) { 2387 if (i->op == OP_CALL && !i->asFlow()->builtin) { 2388 updateCallArgs(i, &Instruction::setSrc, &Function::ins); 2389 updateCallArgs(i, &Instruction::setDef, &Function::outs); 2390 } 2391 } 2392 } 2393 2394 if (func == prog->main && prog->getType() != Program::TYPE_COMPUTE) 2395 return true; 2396 updatePrototype(&BasicBlock::get(f->cfg.getRoot())->liveSet, 2397 &Function::buildLiveSets, &Function::ins); 2398 updatePrototype(&BasicBlock::get(f->cfgExit)->defSet, 2399 &Function::buildDefSets, &Function::outs); 2400 2401 return true; 2402 } 2403 2404 bool 2405 Converter::run() 2406 { 2407 BasicBlock *entry = new BasicBlock(prog->main); 2408 BasicBlock *leave = new BasicBlock(prog->main); 2409 2410 prog->main->setEntry(entry); 2411 prog->main->setExit(leave); 2412 2413 setPosition(entry, true); 2414 sub.cur = getSubroutine(prog->main); 2415 2416 if (info->io.genUserClip > 0) { 2417 for (int c = 0; c < 4; ++c) 2418 clipVtx[c] = getScratch(); 2419 } 2420 2421 if (prog->getType() == Program::TYPE_FRAGMENT) { 2422 Symbol *sv = mkSysVal(SV_POSITION, 3); 2423 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv); 2424 mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]); 2425 } 2426 2427 for (ip = 0; ip < code->scan.num_instructions; ++ip) { 2428 if (!handleInstruction(&code->insns[ip])) 2429 return false; 2430 } 2431 2432 if (!BindArgumentsPass(*this).run(prog)) 2433 return false; 2434 2435 return true; 2436 } 2437 2438 } // unnamed namespace 2439 2440 namespace nv50_ir { 2441 2442 bool 2443 Program::makeFromTGSI(struct nv50_ir_prog_info *info) 2444 { 2445 tgsi::Source src(info); 2446 if (!src.scanSource()) 2447 return false; 2448 tlsSize = info->bin.tlsSpace; 2449 2450 Converter builder(this, &src); 2451 return builder.run(); 2452 } 2453 2454 } // namespace nv50_ir 2455