1 2 #include "nv50_ir.h" 3 #include "nv50_ir_target.h" 4 #include "nv50_ir_build_util.h" 5 6 #include "nv50_ir_from_sm4.h" 7 8 // WTF: pass-through is implicit ??? check ReadWriteMask 9 10 namespace tgsi { 11 12 static nv50_ir::SVSemantic irSemantic(unsigned sn) 13 { 14 switch (sn) { 15 case TGSI_SEMANTIC_POSITION: return nv50_ir::SV_POSITION; 16 case TGSI_SEMANTIC_FACE: return nv50_ir::SV_FACE; 17 case NV50_SEMANTIC_LAYER: return nv50_ir::SV_LAYER; 18 case NV50_SEMANTIC_VIEWPORTINDEX: return nv50_ir::SV_VIEWPORT_INDEX; 19 case TGSI_SEMANTIC_PSIZE: return nv50_ir::SV_POINT_SIZE; 20 case NV50_SEMANTIC_CLIPDISTANCE: return nv50_ir::SV_CLIP_DISTANCE; 21 case TGSI_SEMANTIC_VERTEXID: return nv50_ir::SV_VERTEX_ID; 22 case TGSI_SEMANTIC_INSTANCEID: return nv50_ir::SV_INSTANCE_ID; 23 case TGSI_SEMANTIC_PRIMID: return nv50_ir::SV_PRIMITIVE_ID; 24 case NV50_SEMANTIC_TESSFACTOR: return nv50_ir::SV_TESS_FACTOR; 25 case NV50_SEMANTIC_TESSCOORD: return nv50_ir::SV_TESS_COORD; 26 default: 27 return nv50_ir::SV_UNDEFINED; 28 } 29 } 30 31 } // namespace tgsi 32 33 namespace { 34 35 using namespace nv50_ir; 36 37 #define NV50_IR_MAX_RESOURCES 64 38 39 class Converter : public BuildUtil 40 { 41 public: 42 Converter(Program *, struct nv50_ir_prog_info *); 43 ~Converter(); 44 45 private: 46 DataArray tData32; 47 DataArray tData64; 48 unsigned int nrRegVals; 49 50 DataArray *lData; 51 unsigned int nrArrays; 52 unsigned int arrayVol; 53 54 DataArray oData; 55 56 uint8_t interpMode[PIPE_MAX_SHADER_INPUTS]; 57 58 // outputs for each phase 59 struct nv50_ir_varying out[3][PIPE_MAX_SHADER_OUTPUTS]; 60 61 int phase; 62 int subPhaseCnt[2]; 63 int subPhase; 64 unsigned int phaseStart; 65 unsigned int phaseInstance; 66 unsigned int *phaseInstCnt[2]; 67 bool unrollPhase; 68 bool phaseInstanceUsed; 69 int phaseEnded; // (phase + 1) if $phase ended 70 71 bool finalized; 72 73 Value *srcPtr[3][3]; // for indirect addressing, save pointer values 74 Value *dstPtr[3]; 75 Value *vtxBase[3]; // base address of vertex in a primitive (TP/GP) 76 77 Value *domainPt[3]; // pre-fetched TessCoord 78 79 unsigned int nDstOpnds; 80 81 Stack condBBs; 82 Stack joinBBs; 83 Stack loopBBs; 84 Stack breakBBs; 85 Stack entryBBs; 86 Stack leaveBBs; 87 Stack retIPs; 88 89 bool shadow[NV50_IR_MAX_RESOURCES]; 90 TexTarget resourceType[NV50_IR_MAX_RESOURCES][2]; 91 92 struct nv50_ir_prog_info& info; 93 94 Value *fragCoord[4]; 95 96 public: 97 bool run(); 98 99 private: 100 bool handleInstruction(unsigned int pos); 101 bool inspectInstruction(unsigned int pos); 102 bool handleDeclaration(const sm4_dcl& dcl); 103 bool inspectDeclaration(const sm4_dcl& dcl); 104 bool parseSignature(); 105 106 bool haveNextPhase(unsigned int pos) const; 107 108 void allocateValues(); 109 void exportOutputs(); 110 111 void emitTex(Value *dst0[4], TexInstruction *, const uint8_t swizzle[4]); 112 void handleLOAD(Value *dst0[4]); 113 void handleSAMPLE(operation, Value *dst0[4]); 114 void handleQUERY(Value *dst0[4], enum TexQuery query); 115 void handleDP(Value *dst0[4], int dim); 116 117 Symbol *iSym(int i, int c); 118 Symbol *oSym(int i, int c); 119 120 Value *src(int i, int c); 121 Value *src(const sm4_op&, int c, int i); 122 Value *dst(int i, int c); 123 Value *dst(const sm4_op&, int c, int i); 124 void saveDst(int i, int c, Value *value); 125 void saveDst(const sm4_op&, int c, Value *value, int i); 126 void saveFragDepth(operation op, Value *value); 127 128 Value *interpolate(const sm4_op&, int c, int i); 129 130 Value *getSrcPtr(int s, int dim, int shl); 131 Value *getDstPtr(int d, int dim, int shl); 132 Value *getVtxPtr(int s); 133 134 bool checkDstSrcAliasing() const; 135 void insertConvergenceOps(BasicBlock *conv, BasicBlock *fork); 136 void finalizeShader(); 137 138 operation cvtOpcode(enum sm4_opcode op) const; 139 unsigned int getDstOpndCount(enum sm4_opcode opcode) const; 140 141 DataType inferSrcType(enum sm4_opcode op) const; 142 DataType inferDstType(enum sm4_opcode op) const; 143 144 unsigned g3dPrim(const unsigned prim, unsigned *patchSize = NULL) const; 145 CondCode cvtCondCode(enum sm4_opcode op) const; 146 RoundMode cvtRoundingMode(enum sm4_opcode op) const; 147 TexTarget cvtTexTarget(enum sm4_target, 148 enum sm4_opcode, operation *) const; 149 SVSemantic cvtSemantic(enum sm4_sv, uint8_t &index) const; 150 uint8_t cvtInterpMode(enum sm4_interpolation) const; 151 152 unsigned tgsiSemantic(SVSemantic, int index); 153 void recordSV(unsigned sn, unsigned si, unsigned mask, bool input); 154 155 private: 156 sm4_insn *insn; 157 DataType dTy, sTy; 158 159 const struct sm4_program& sm4; 160 Program *prog; 161 }; 162 163 #define PRIM_CASE(a, b) \ 164 case D3D_PRIMITIVE_TOPOLOGY_##a: return PIPE_PRIM_##b; 165 166 unsigned 167 Converter::g3dPrim(const unsigned prim, unsigned *patchSize) const 168 { 169 switch (prim) { 170 PRIM_CASE(UNDEFINED, POINTS); 171 PRIM_CASE(POINTLIST, POINTS); 172 PRIM_CASE(LINELIST, LINES); 173 PRIM_CASE(LINESTRIP, LINE_STRIP); 174 PRIM_CASE(TRIANGLELIST, TRIANGLES); 175 PRIM_CASE(TRIANGLESTRIP, TRIANGLE_STRIP); 176 PRIM_CASE(LINELIST_ADJ, LINES_ADJACENCY); 177 PRIM_CASE(LINESTRIP_ADJ, LINE_STRIP_ADJACENCY); 178 PRIM_CASE(TRIANGLELIST_ADJ, TRIANGLES_ADJACENCY); 179 PRIM_CASE(TRIANGLESTRIP_ADJ, TRIANGLES_ADJACENCY); 180 default: 181 if (prim < D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST || 182 prim > D3D_PRIMITIVE_TOPOLOGY_32_CONTROL_POINT_PATCHLIST) 183 return PIPE_PRIM_POINTS; 184 if (patchSize) 185 *patchSize = 186 prim - D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + 1; 187 return NV50_PRIM_PATCHES; 188 } 189 } 190 191 #define IPM_CASE(n, a, b) \ 192 case SM4_INTERPOLATION_##n: return NV50_IR_INTERP_##a | NV50_IR_INTERP_##b 193 194 uint8_t 195 Converter::cvtInterpMode(enum sm4_interpolation mode) const 196 { 197 switch (mode) { 198 IPM_CASE(CONSTANT, FLAT, FLAT); 199 IPM_CASE(LINEAR, PERSPECTIVE, PERSPECTIVE); 200 IPM_CASE(LINEAR_CENTROID, PERSPECTIVE, CENTROID); 201 IPM_CASE(LINEAR_NOPERSPECTIVE, LINEAR, LINEAR); 202 IPM_CASE(LINEAR_NOPERSPECTIVE_CENTROID, LINEAR, CENTROID); 203 IPM_CASE(LINEAR_SAMPLE, PERSPECTIVE, OFFSET); 204 IPM_CASE(LINEAR_NOPERSPECTIVE_SAMPLE, LINEAR, OFFSET); 205 IPM_CASE(UNDEFINED, LINEAR, LINEAR); 206 default: 207 assert(!"invalid interpolation mode"); 208 return 0; 209 } 210 } 211 212 static void 213 setVaryingInterpMode(struct nv50_ir_varying *var, uint8_t mode) 214 { 215 switch (mode & NV50_IR_INTERP_MODE_MASK) { 216 case NV50_IR_INTERP_LINEAR: 217 var->linear = 1; 218 break; 219 case NV50_IR_INTERP_FLAT: 220 var->flat = 1; 221 break; 222 default: 223 break; 224 } 225 if (mode & NV50_IR_INTERP_CENTROID) 226 var->centroid = 1; 227 } 228 229 RoundMode 230 Converter::cvtRoundingMode(enum sm4_opcode op) const 231 { 232 switch (op) { 233 case SM4_OPCODE_ROUND_NE: return ROUND_NI; 234 case SM4_OPCODE_ROUND_NI: return ROUND_MI; 235 case SM4_OPCODE_ROUND_PI: return ROUND_PI; 236 case SM4_OPCODE_ROUND_Z: return ROUND_ZI; 237 default: 238 return ROUND_N; 239 } 240 } 241 242 CondCode 243 Converter::cvtCondCode(enum sm4_opcode op) const 244 { 245 switch (op) { 246 case SM4_OPCODE_EQ: 247 case SM4_OPCODE_DEQ: 248 case SM4_OPCODE_IEQ: return CC_EQ; 249 case SM4_OPCODE_GE: 250 case SM4_OPCODE_DGE: 251 case SM4_OPCODE_IGE: 252 case SM4_OPCODE_UGE: return CC_GE; 253 case SM4_OPCODE_LT: 254 case SM4_OPCODE_DLT: 255 case SM4_OPCODE_ILT: 256 case SM4_OPCODE_ULT: return CC_LT; 257 case SM4_OPCODE_NE: 258 case SM4_OPCODE_INE: 259 case SM4_OPCODE_DNE: return CC_NEU; 260 default: 261 return CC_ALWAYS; 262 } 263 } 264 265 DataType 266 Converter::inferSrcType(enum sm4_opcode op) const 267 { 268 switch (op) { 269 case SM4_OPCODE_IADD: 270 case SM4_OPCODE_IEQ: 271 case SM4_OPCODE_IGE: 272 case SM4_OPCODE_ILT: 273 case SM4_OPCODE_IMAD: 274 case SM4_OPCODE_IMAX: 275 case SM4_OPCODE_IMIN: 276 case SM4_OPCODE_IMUL: 277 case SM4_OPCODE_INE: 278 case SM4_OPCODE_INEG: 279 case SM4_OPCODE_ISHL: 280 case SM4_OPCODE_ISHR: 281 case SM4_OPCODE_ITOF: 282 case SM4_OPCODE_ATOMIC_IADD: 283 case SM4_OPCODE_ATOMIC_IMAX: 284 case SM4_OPCODE_ATOMIC_IMIN: 285 return TYPE_S32; 286 case SM4_OPCODE_AND: 287 case SM4_OPCODE_NOT: 288 case SM4_OPCODE_OR: 289 case SM4_OPCODE_UDIV: 290 case SM4_OPCODE_ULT: 291 case SM4_OPCODE_UGE: 292 case SM4_OPCODE_UMUL: 293 case SM4_OPCODE_UMAD: 294 case SM4_OPCODE_UMAX: 295 case SM4_OPCODE_UMIN: 296 case SM4_OPCODE_USHR: 297 case SM4_OPCODE_UTOF: 298 case SM4_OPCODE_XOR: 299 case SM4_OPCODE_UADDC: 300 case SM4_OPCODE_USUBB: 301 case SM4_OPCODE_ATOMIC_AND: 302 case SM4_OPCODE_ATOMIC_OR: 303 case SM4_OPCODE_ATOMIC_XOR: 304 case SM4_OPCODE_ATOMIC_UMAX: 305 case SM4_OPCODE_ATOMIC_UMIN: 306 return TYPE_U32; 307 case SM4_OPCODE_DADD: 308 case SM4_OPCODE_DMAX: 309 case SM4_OPCODE_DMIN: 310 case SM4_OPCODE_DMUL: 311 case SM4_OPCODE_DEQ: 312 case SM4_OPCODE_DGE: 313 case SM4_OPCODE_DLT: 314 case SM4_OPCODE_DNE: 315 case SM4_OPCODE_DMOV: 316 case SM4_OPCODE_DMOVC: 317 case SM4_OPCODE_DTOF: 318 return TYPE_F64; 319 case SM4_OPCODE_F16TOF32: 320 return TYPE_F16; 321 default: 322 return TYPE_F32; 323 } 324 } 325 326 DataType 327 Converter::inferDstType(enum sm4_opcode op) const 328 { 329 switch (op) { 330 case SM4_OPCODE_FTOI: 331 return TYPE_S32; 332 case SM4_OPCODE_FTOU: 333 case SM4_OPCODE_EQ: 334 case SM4_OPCODE_GE: 335 case SM4_OPCODE_LT: 336 case SM4_OPCODE_NE: 337 return TYPE_U32; 338 case SM4_OPCODE_FTOD: 339 return TYPE_F64; 340 case SM4_OPCODE_F32TOF16: 341 return TYPE_F16; 342 case SM4_OPCODE_ITOF: 343 case SM4_OPCODE_UTOF: 344 case SM4_OPCODE_DTOF: 345 return TYPE_F32; 346 default: 347 return inferSrcType(op); 348 } 349 } 350 351 operation 352 Converter::cvtOpcode(enum sm4_opcode op) const 353 { 354 switch (op) { 355 case SM4_OPCODE_ADD: return OP_ADD; 356 case SM4_OPCODE_AND: return OP_AND; 357 case SM4_OPCODE_BREAK: return OP_BREAK; 358 case SM4_OPCODE_BREAKC: return OP_BREAK; 359 case SM4_OPCODE_CALL: return OP_CALL; 360 case SM4_OPCODE_CALLC: return OP_CALL; 361 case SM4_OPCODE_CASE: return OP_NOP; 362 case SM4_OPCODE_CONTINUE: return OP_CONT; 363 case SM4_OPCODE_CONTINUEC: return OP_CONT; 364 case SM4_OPCODE_CUT: return OP_RESTART; 365 case SM4_OPCODE_DEFAULT: return OP_NOP; 366 case SM4_OPCODE_DERIV_RTX: return OP_DFDX; 367 case SM4_OPCODE_DERIV_RTY: return OP_DFDY; 368 case SM4_OPCODE_DISCARD: return OP_DISCARD; 369 case SM4_OPCODE_DIV: return OP_DIV; 370 case SM4_OPCODE_DP2: return OP_MAD; 371 case SM4_OPCODE_DP3: return OP_MAD; 372 case SM4_OPCODE_DP4: return OP_MAD; 373 case SM4_OPCODE_ELSE: return OP_BRA; 374 case SM4_OPCODE_EMIT: return OP_EMIT; 375 case SM4_OPCODE_EMITTHENCUT: return OP_EMIT; 376 case SM4_OPCODE_ENDIF: return OP_BRA; 377 case SM4_OPCODE_ENDLOOP: return OP_PREBREAK; 378 case SM4_OPCODE_ENDSWITCH: return OP_NOP; 379 case SM4_OPCODE_EQ: return OP_SET; 380 case SM4_OPCODE_EXP: return OP_EX2; 381 case SM4_OPCODE_FRC: return OP_CVT; 382 case SM4_OPCODE_FTOI: return OP_CVT; 383 case SM4_OPCODE_FTOU: return OP_CVT; 384 case SM4_OPCODE_GE: return OP_SET; 385 case SM4_OPCODE_IADD: return OP_ADD; 386 case SM4_OPCODE_IF: return OP_BRA; 387 case SM4_OPCODE_IEQ: return OP_SET; 388 case SM4_OPCODE_IGE: return OP_SET; 389 case SM4_OPCODE_ILT: return OP_SET; 390 case SM4_OPCODE_IMAD: return OP_MAD; 391 case SM4_OPCODE_IMAX: return OP_MAX; 392 case SM4_OPCODE_IMIN: return OP_MIN; 393 case SM4_OPCODE_IMUL: return OP_MUL; 394 case SM4_OPCODE_INE: return OP_SET; 395 case SM4_OPCODE_INEG: return OP_NEG; 396 case SM4_OPCODE_ISHL: return OP_SHL; 397 case SM4_OPCODE_ISHR: return OP_SHR; 398 case SM4_OPCODE_ITOF: return OP_CVT; 399 case SM4_OPCODE_LD: return OP_TXF; 400 case SM4_OPCODE_LD_MS: return OP_TXF; 401 case SM4_OPCODE_LOG: return OP_LG2; 402 case SM4_OPCODE_LOOP: return OP_PRECONT; 403 case SM4_OPCODE_LT: return OP_SET; 404 case SM4_OPCODE_MAD: return OP_MAD; 405 case SM4_OPCODE_MIN: return OP_MIN; 406 case SM4_OPCODE_MAX: return OP_MAX; 407 case SM4_OPCODE_MOV: return OP_MOV; 408 case SM4_OPCODE_MOVC: return OP_MOV; 409 case SM4_OPCODE_MUL: return OP_MUL; 410 case SM4_OPCODE_NE: return OP_SET; 411 case SM4_OPCODE_NOP: return OP_NOP; 412 case SM4_OPCODE_NOT: return OP_NOT; 413 case SM4_OPCODE_OR: return OP_OR; 414 case SM4_OPCODE_RESINFO: return OP_TXQ; 415 case SM4_OPCODE_RET: return OP_RET; 416 case SM4_OPCODE_RETC: return OP_RET; 417 case SM4_OPCODE_ROUND_NE: return OP_CVT; 418 case SM4_OPCODE_ROUND_NI: return OP_FLOOR; 419 case SM4_OPCODE_ROUND_PI: return OP_CEIL; 420 case SM4_OPCODE_ROUND_Z: return OP_TRUNC; 421 case SM4_OPCODE_RSQ: return OP_RSQ; 422 case SM4_OPCODE_SAMPLE: return OP_TEX; 423 case SM4_OPCODE_SAMPLE_C: return OP_TEX; 424 case SM4_OPCODE_SAMPLE_C_LZ: return OP_TEX; 425 case SM4_OPCODE_SAMPLE_L: return OP_TXL; 426 case SM4_OPCODE_SAMPLE_D: return OP_TXD; 427 case SM4_OPCODE_SAMPLE_B: return OP_TXB; 428 case SM4_OPCODE_SQRT: return OP_SQRT; 429 case SM4_OPCODE_SWITCH: return OP_NOP; 430 case SM4_OPCODE_SINCOS: return OP_PRESIN; 431 case SM4_OPCODE_UDIV: return OP_DIV; 432 case SM4_OPCODE_ULT: return OP_SET; 433 case SM4_OPCODE_UGE: return OP_SET; 434 case SM4_OPCODE_UMUL: return OP_MUL; 435 case SM4_OPCODE_UMAD: return OP_MAD; 436 case SM4_OPCODE_UMAX: return OP_MAX; 437 case SM4_OPCODE_UMIN: return OP_MIN; 438 case SM4_OPCODE_USHR: return OP_SHR; 439 case SM4_OPCODE_UTOF: return OP_CVT; 440 case SM4_OPCODE_XOR: return OP_XOR; 441 442 case SM4_OPCODE_GATHER4: return OP_TXG; 443 case SM4_OPCODE_SAMPLE_POS: return OP_PIXLD; 444 case SM4_OPCODE_SAMPLE_INFO: return OP_PIXLD; 445 case SM4_OPCODE_EMIT_STREAM: return OP_EMIT; 446 case SM4_OPCODE_CUT_STREAM: return OP_RESTART; 447 case SM4_OPCODE_EMITTHENCUT_STREAM: return OP_EMIT; 448 case SM4_OPCODE_INTERFACE_CALL: return OP_CALL; 449 case SM4_OPCODE_BUFINFO: return OP_TXQ; 450 case SM4_OPCODE_DERIV_RTX_COARSE: return OP_DFDX; 451 case SM4_OPCODE_DERIV_RTX_FINE: return OP_DFDX; 452 case SM4_OPCODE_DERIV_RTY_COARSE: return OP_DFDY; 453 case SM4_OPCODE_DERIV_RTY_FINE: return OP_DFDY; 454 case SM4_OPCODE_GATHER4_C: return OP_TXG; 455 case SM4_OPCODE_GATHER4_PO: return OP_TXG; 456 case SM4_OPCODE_GATHER4_PO_C: return OP_TXG; 457 458 case SM4_OPCODE_RCP: return OP_RCP; 459 case SM4_OPCODE_F32TOF16: return OP_CVT; 460 case SM4_OPCODE_F16TOF32: return OP_CVT; 461 case SM4_OPCODE_UADDC: return OP_ADD; 462 case SM4_OPCODE_USUBB: return OP_SUB; 463 case SM4_OPCODE_COUNTBITS: return OP_POPCNT; 464 465 case SM4_OPCODE_ATOMIC_AND: return OP_AND; 466 case SM4_OPCODE_ATOMIC_OR: return OP_OR; 467 case SM4_OPCODE_ATOMIC_XOR: return OP_XOR; 468 case SM4_OPCODE_ATOMIC_CMP_STORE: return OP_STORE; 469 case SM4_OPCODE_ATOMIC_IADD: return OP_ADD; 470 case SM4_OPCODE_ATOMIC_IMAX: return OP_MAX; 471 case SM4_OPCODE_ATOMIC_IMIN: return OP_MIN; 472 case SM4_OPCODE_ATOMIC_UMAX: return OP_MAX; 473 case SM4_OPCODE_ATOMIC_UMIN: return OP_MIN; 474 475 case SM4_OPCODE_SYNC: return OP_MEMBAR; 476 case SM4_OPCODE_DADD: return OP_ADD; 477 case SM4_OPCODE_DMAX: return OP_MAX; 478 case SM4_OPCODE_DMIN: return OP_MIN; 479 case SM4_OPCODE_DMUL: return OP_MUL; 480 case SM4_OPCODE_DEQ: return OP_SET; 481 case SM4_OPCODE_DGE: return OP_SET; 482 case SM4_OPCODE_DLT: return OP_SET; 483 case SM4_OPCODE_DNE: return OP_SET; 484 case SM4_OPCODE_DMOV: return OP_MOV; 485 case SM4_OPCODE_DMOVC: return OP_MOV; 486 case SM4_OPCODE_DTOF: return OP_CVT; 487 case SM4_OPCODE_FTOD: return OP_CVT; 488 489 default: 490 return OP_NOP; 491 } 492 } 493 494 unsigned int 495 Converter::getDstOpndCount(enum sm4_opcode opcode) const 496 { 497 switch (opcode) { 498 case SM4_OPCODE_SINCOS: 499 case SM4_OPCODE_UDIV: 500 case SM4_OPCODE_IMUL: 501 case SM4_OPCODE_UMUL: 502 return 2; 503 case SM4_OPCODE_BREAK: 504 case SM4_OPCODE_BREAKC: 505 case SM4_OPCODE_CALL: 506 case SM4_OPCODE_CALLC: 507 case SM4_OPCODE_CONTINUE: 508 case SM4_OPCODE_CONTINUEC: 509 case SM4_OPCODE_DISCARD: 510 case SM4_OPCODE_EMIT: 511 case SM4_OPCODE_EMIT_STREAM: 512 case SM4_OPCODE_CUT: 513 case SM4_OPCODE_CUT_STREAM: 514 case SM4_OPCODE_EMITTHENCUT: 515 case SM4_OPCODE_EMITTHENCUT_STREAM: 516 case SM4_OPCODE_IF: 517 case SM4_OPCODE_ELSE: 518 case SM4_OPCODE_ENDIF: 519 case SM4_OPCODE_LOOP: 520 case SM4_OPCODE_ENDLOOP: 521 case SM4_OPCODE_RET: 522 case SM4_OPCODE_RETC: 523 case SM4_OPCODE_SYNC: 524 case SM4_OPCODE_SWITCH: 525 case SM4_OPCODE_CASE: 526 case SM4_OPCODE_HS_DECLS: 527 case SM4_OPCODE_HS_CONTROL_POINT_PHASE: 528 case SM4_OPCODE_HS_FORK_PHASE: 529 case SM4_OPCODE_HS_JOIN_PHASE: 530 return 0; 531 default: 532 return 1; 533 } 534 } 535 536 #define TARG_CASE_1(a, b) case SM4_TARGET_##a: return TEX_TARGET_##b; 537 #define TARG_CASE_2(a, b) case SM4_TARGET_##a: \ 538 return dc ? TEX_TARGET_##b##_SHADOW : TEX_TARGET_##b 539 540 TexTarget 541 Converter::cvtTexTarget(enum sm4_target targ, 542 enum sm4_opcode op, operation *opr) const 543 { 544 bool dc = (op == SM4_OPCODE_SAMPLE_C || 545 op == SM4_OPCODE_SAMPLE_C_LZ || 546 op == SM4_OPCODE_GATHER4_C || 547 op == SM4_OPCODE_GATHER4_PO_C); 548 549 if (opr) { 550 switch (targ) { 551 case SM4_TARGET_RAW_BUFFER: *opr = OP_LOAD; break; 552 case SM4_TARGET_STRUCTURED_BUFFER: *opr = OP_SULD; break; 553 default: 554 *opr = OP_TEX; 555 break; 556 } 557 } 558 559 switch (targ) { 560 TARG_CASE_1(UNKNOWN, 2D); 561 TARG_CASE_2(TEXTURE1D, 1D); 562 TARG_CASE_2(TEXTURE2D, 2D); 563 TARG_CASE_1(TEXTURE2DMS, 2D_MS); 564 TARG_CASE_1(TEXTURE3D, 3D); 565 TARG_CASE_2(TEXTURECUBE, CUBE); 566 TARG_CASE_2(TEXTURE1DARRAY, 1D_ARRAY); 567 TARG_CASE_2(TEXTURE2DARRAY, 2D_ARRAY); 568 TARG_CASE_1(TEXTURE2DMSARRAY, 2D_MS_ARRAY); 569 TARG_CASE_2(TEXTURECUBEARRAY, CUBE_ARRAY); 570 TARG_CASE_1(BUFFER, BUFFER); 571 TARG_CASE_1(RAW_BUFFER, BUFFER); 572 TARG_CASE_1(STRUCTURED_BUFFER, BUFFER); 573 default: 574 assert(!"invalid SM4 texture target"); 575 return dc ? TEX_TARGET_2D_SHADOW : TEX_TARGET_2D; 576 } 577 } 578 579 static inline uint32_t 580 getSVIndex(enum sm4_sv sv) 581 { 582 switch (sv) { 583 case SM4_SV_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: return 0; 584 case SM4_SV_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: return 1; 585 case SM4_SV_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: return 2; 586 case SM4_SV_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: return 3; 587 588 case SM4_SV_FINAL_QUAD_U_INSIDE_TESSFACTOR: return 4; 589 case SM4_SV_FINAL_QUAD_V_INSIDE_TESSFACTOR: return 5; 590 591 case SM4_SV_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: return 0; 592 case SM4_SV_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: return 1; 593 case SM4_SV_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: return 2; 594 595 case SM4_SV_FINAL_TRI_INSIDE_TESSFACTOR: return 4; 596 597 case SM4_SV_FINAL_LINE_DETAIL_TESSFACTOR: return 0; 598 599 case SM4_SV_FINAL_LINE_DENSITY_TESSFACTOR: return 4; 600 601 default: 602 return 0; 603 } 604 } 605 606 SVSemantic 607 Converter::cvtSemantic(enum sm4_sv sv, uint8_t &idx) const 608 { 609 idx = 0; 610 611 switch (sv) { 612 case SM4_SV_UNDEFINED: return SV_UNDEFINED; 613 case SM4_SV_POSITION: return SV_POSITION; 614 case SM4_SV_CLIP_DISTANCE: return SV_CLIP_DISTANCE; 615 case SM4_SV_CULL_DISTANCE: return SV_CLIP_DISTANCE; // XXX: distinction 616 case SM4_SV_RENDER_TARGET_ARRAY_INDEX: return SV_LAYER; 617 case SM4_SV_VIEWPORT_ARRAY_INDEX: return SV_VIEWPORT_INDEX; 618 case SM4_SV_VERTEX_ID: return SV_VERTEX_ID; 619 case SM4_SV_PRIMITIVE_ID: return SV_PRIMITIVE_ID; 620 case SM4_SV_INSTANCE_ID: return SV_INSTANCE_ID; 621 case SM4_SV_IS_FRONT_FACE: return SV_FACE; 622 case SM4_SV_SAMPLE_INDEX: return SV_SAMPLE_INDEX; 623 624 case SM4_SV_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: 625 case SM4_SV_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: 626 case SM4_SV_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: 627 case SM4_SV_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: 628 case SM4_SV_FINAL_QUAD_U_INSIDE_TESSFACTOR: 629 case SM4_SV_FINAL_QUAD_V_INSIDE_TESSFACTOR: 630 case SM4_SV_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: 631 case SM4_SV_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: 632 case SM4_SV_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: 633 case SM4_SV_FINAL_TRI_INSIDE_TESSFACTOR: 634 case SM4_SV_FINAL_LINE_DETAIL_TESSFACTOR: 635 case SM4_SV_FINAL_LINE_DENSITY_TESSFACTOR: 636 idx = getSVIndex(sv); 637 return SV_TESS_FACTOR; 638 639 default: 640 assert(!"invalid SM4 system value"); 641 return SV_UNDEFINED; 642 } 643 } 644 645 unsigned 646 Converter::tgsiSemantic(SVSemantic sv, int index) 647 { 648 switch (sv) { 649 case SV_POSITION: return TGSI_SEMANTIC_POSITION; 650 case SV_FACE: return TGSI_SEMANTIC_FACE; 651 case SV_LAYER: return NV50_SEMANTIC_LAYER; 652 case SV_VIEWPORT_INDEX: return NV50_SEMANTIC_VIEWPORTINDEX; 653 case SV_POINT_SIZE: return TGSI_SEMANTIC_PSIZE; 654 case SV_CLIP_DISTANCE: return NV50_SEMANTIC_CLIPDISTANCE; 655 case SV_VERTEX_ID: return TGSI_SEMANTIC_VERTEXID; 656 case SV_INSTANCE_ID: return TGSI_SEMANTIC_INSTANCEID; 657 case SV_PRIMITIVE_ID: return TGSI_SEMANTIC_PRIMID; 658 case SV_TESS_FACTOR: return NV50_SEMANTIC_TESSFACTOR; 659 case SV_TESS_COORD: return NV50_SEMANTIC_TESSCOORD; 660 case SV_INVOCATION_ID: return NV50_SEMANTIC_INVOCATIONID; 661 default: 662 return TGSI_SEMANTIC_GENERIC; 663 } 664 } 665 666 void 667 Converter::recordSV(unsigned sn, unsigned si, unsigned mask, bool input) 668 { 669 unsigned int i; 670 for (i = 0; i < info.numSysVals; ++i) 671 if (info.sv[i].sn == sn && 672 info.sv[i].si == si) 673 return; 674 info.numSysVals = i + 1; 675 info.sv[i].sn = sn; 676 info.sv[i].si = si; 677 info.sv[i].mask = mask; 678 info.sv[i].input = input ? 1 : 0; 679 } 680 681 bool 682 Converter::parseSignature() 683 { 684 struct nv50_ir_varying *patch; 685 unsigned int i, r, n; 686 687 info.numInputs = 0; 688 info.numOutputs = 0; 689 info.numPatchConstants = 0; 690 691 for (n = 0, i = 0; i < sm4.num_params_in; ++i) { 692 r = sm4.params_in[i].Register; 693 694 info.in[r].mask |= sm4.params_in[i].ReadWriteMask; 695 // mask might be uninitialized ... 696 if (!sm4.params_in[i].ReadWriteMask) 697 info.in[r].mask = 0xf; 698 info.in[r].id = r; 699 if (info.in[r].regular) // already assigned semantic name/index 700 continue; 701 info.in[r].regular = 1; 702 info.in[r].patch = 0; 703 704 info.numInputs = MAX2(info.numInputs, r + 1); 705 706 switch (sm4.params_in[i].SystemValueType) { 707 case D3D_NAME_UNDEFINED: 708 info.in[r].sn = TGSI_SEMANTIC_GENERIC; 709 info.in[r].si = n++; 710 break; 711 case D3D_NAME_POSITION: 712 info.in[r].sn = TGSI_SEMANTIC_POSITION; 713 break; 714 case D3D_NAME_VERTEX_ID: 715 info.in[r].sn = TGSI_SEMANTIC_VERTEXID; 716 break; 717 case D3D_NAME_PRIMITIVE_ID: 718 info.in[r].sn = TGSI_SEMANTIC_PRIMID; 719 // no corresponding output 720 recordSV(TGSI_SEMANTIC_PRIMID, 0, 1, true); 721 break; 722 case D3D_NAME_INSTANCE_ID: 723 info.in[r].sn = TGSI_SEMANTIC_INSTANCEID; 724 break; 725 case D3D_NAME_IS_FRONT_FACE: 726 info.in[r].sn = TGSI_SEMANTIC_FACE; 727 // no corresponding output 728 recordSV(TGSI_SEMANTIC_FACE, 0, 1, true); 729 break; 730 default: 731 assert(!"invalid/unsupported input linkage semantic"); 732 break; 733 } 734 } 735 736 for (n = 0, i = 0; i < sm4.num_params_out; ++i) { 737 r = sm4.params_out[i].Register; 738 739 info.out[r].mask |= ~sm4.params_out[i].ReadWriteMask; 740 info.out[r].id = r; 741 if (info.out[r].regular) // already assigned semantic name/index 742 continue; 743 info.out[r].regular = 1; 744 info.out[r].patch = 0; 745 746 info.numOutputs = MAX2(info.numOutputs, r + 1); 747 748 switch (sm4.params_out[i].SystemValueType) { 749 case D3D_NAME_UNDEFINED: 750 if (prog->getType() == Program::TYPE_FRAGMENT) { 751 info.out[r].sn = TGSI_SEMANTIC_COLOR; 752 info.out[r].si = info.prop.fp.numColourResults++; 753 } else { 754 info.out[r].sn = TGSI_SEMANTIC_GENERIC; 755 info.out[r].si = n++; 756 } 757 break; 758 case D3D_NAME_POSITION: 759 case D3D_NAME_DEPTH: 760 case D3D_NAME_DEPTH_GREATER_EQUAL: 761 case D3D_NAME_DEPTH_LESS_EQUAL: 762 info.out[r].sn = TGSI_SEMANTIC_POSITION; 763 info.io.fragDepth = r; 764 break; 765 case D3D_NAME_CULL_DISTANCE: 766 case D3D_NAME_CLIP_DISTANCE: 767 info.out[r].sn = NV50_SEMANTIC_CLIPDISTANCE; 768 info.out[r].si = sm4.params_out[i].SemanticIndex; 769 break; 770 case D3D_NAME_RENDER_TARGET_ARRAY_INDEX: 771 info.out[r].sn = NV50_SEMANTIC_LAYER; 772 break; 773 case D3D_NAME_VIEWPORT_ARRAY_INDEX: 774 info.out[r].sn = NV50_SEMANTIC_VIEWPORTINDEX; 775 break; 776 case D3D_NAME_PRIMITIVE_ID: 777 info.out[r].sn = TGSI_SEMANTIC_PRIMID; 778 break; 779 case D3D_NAME_TARGET: 780 info.out[r].sn = TGSI_SEMANTIC_COLOR; 781 info.out[r].si = sm4.params_out[i].SemanticIndex; 782 break; 783 case D3D_NAME_COVERAGE: 784 info.out[r].sn = NV50_SEMANTIC_SAMPLEMASK; 785 info.io.sampleMask = r; 786 break; 787 case D3D_NAME_SAMPLE_INDEX: 788 default: 789 assert(!"invalid/unsupported output linkage semantic"); 790 break; 791 } 792 } 793 794 if (prog->getType() == Program::TYPE_TESSELLATION_EVAL) 795 patch = &info.in[info.numInputs]; 796 else 797 patch = &info.out[info.numOutputs]; 798 799 for (n = 0, i = 0; i < sm4.num_params_patch; ++i) { 800 r = sm4.params_patch[i].Register; 801 802 patch[r].mask |= sm4.params_patch[i].Mask; 803 patch[r].id = r; 804 if (patch[r].regular) // already visited 805 continue; 806 patch[r].regular = 1; 807 patch[r].patch = 1; 808 809 info.numPatchConstants = MAX2(info.numPatchConstants, r + 1); 810 811 switch (sm4.params_patch[i].SystemValueType) { 812 case D3D_NAME_UNDEFINED: 813 patch[r].sn = TGSI_SEMANTIC_GENERIC; 814 patch[r].si = n++; 815 break; 816 case D3D_NAME_FINAL_QUAD_EDGE_TESSFACTOR: 817 case D3D_NAME_FINAL_TRI_EDGE_TESSFACTOR: 818 case D3D_NAME_FINAL_LINE_DETAIL_TESSFACTOR: 819 patch[r].sn = NV50_SEMANTIC_TESSFACTOR; 820 patch[r].si = sm4.params_patch[i].SemanticIndex; 821 break; 822 case D3D_NAME_FINAL_QUAD_INSIDE_TESSFACTOR: 823 case D3D_NAME_FINAL_TRI_INSIDE_TESSFACTOR: 824 case D3D_NAME_FINAL_LINE_DENSITY_TESSFACTOR: 825 patch[r].sn = NV50_SEMANTIC_TESSFACTOR; 826 patch[r].si = sm4.params_patch[i].SemanticIndex + 4; 827 break; 828 default: 829 assert(!"invalid patch-constant linkage semantic"); 830 break; 831 } 832 } 833 if (prog->getType() == Program::TYPE_TESSELLATION_EVAL) 834 info.numInputs += info.numPatchConstants; 835 else 836 info.numOutputs += info.numPatchConstants; 837 838 return true; 839 } 840 841 bool 842 Converter::inspectDeclaration(const sm4_dcl& dcl) 843 { 844 int idx = -1; 845 enum sm4_interpolation ipa_mode; 846 847 if (dcl.op.get() && dcl.op->is_index_simple(0)) 848 idx = dcl.op->indices[0].disp; 849 850 switch (dcl.opcode) { 851 case SM4_OPCODE_DCL_SAMPLER: 852 assert(idx >= 0); 853 shadow[idx] = dcl.dcl_sampler.shadow; 854 break; 855 case SM4_OPCODE_DCL_RESOURCE: 856 { 857 enum sm4_target targ = (enum sm4_target)dcl.dcl_resource.target; 858 859 assert(idx >= 0 && idx < NV50_IR_MAX_RESOURCES); 860 resourceType[idx][0] = cvtTexTarget(targ, SM4_OPCODE_SAMPLE, NULL); 861 resourceType[idx][1] = cvtTexTarget(targ, SM4_OPCODE_SAMPLE_C, NULL); 862 } 863 break; 864 case SM4_OPCODE_DCL_CONSTANT_BUFFER: 865 // nothing to do 866 break; 867 case SM4_OPCODE_CUSTOMDATA: 868 info.immd.bufSize = dcl.num * 4; 869 info.immd.buf = (uint32_t *)MALLOC(info.immd.bufSize); 870 memcpy(info.immd.buf, dcl.data, info.immd.bufSize); 871 break; 872 case SM4_OPCODE_DCL_INDEX_RANGE: 873 // XXX: ? 874 break; 875 case SM4_OPCODE_DCL_INPUT_PS_SGV: 876 case SM4_OPCODE_DCL_INPUT_PS_SIV: 877 case SM4_OPCODE_DCL_INPUT_PS: 878 { 879 assert(idx >= 0 && idx < info.numInputs); 880 ipa_mode = (enum sm4_interpolation)dcl.dcl_input_ps.interpolation; 881 interpMode[idx] = cvtInterpMode(ipa_mode); 882 setVaryingInterpMode(&info.in[idx], interpMode[idx]); 883 } 884 break; 885 case SM4_OPCODE_DCL_INPUT_SGV: 886 case SM4_OPCODE_DCL_INPUT_SIV: 887 case SM4_OPCODE_DCL_INPUT: 888 if (dcl.op->file == SM4_FILE_INPUT_DOMAIN_POINT) { 889 idx = info.numInputs++; 890 info.in[idx].sn = NV50_SEMANTIC_TESSCOORD; 891 info.in[idx].mask = dcl.op->mask; 892 } 893 // rest handled in parseSignature 894 break; 895 case SM4_OPCODE_DCL_OUTPUT_SGV: 896 case SM4_OPCODE_DCL_OUTPUT_SIV: 897 switch (dcl.sv) { 898 case SM4_SV_POSITION: 899 assert(prog->getType() != Program::TYPE_FRAGMENT); 900 break; 901 case SM4_SV_CULL_DISTANCE: // XXX: order ? 902 info.io.cullDistanceMask |= 1 << info.io.clipDistanceMask; 903 // fall through 904 case SM4_SV_CLIP_DISTANCE: 905 info.io.clipDistanceMask++; // abuse as count 906 break; 907 default: 908 break; 909 } 910 switch (dcl.op->file) { 911 case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL: 912 case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL: 913 case SM4_FILE_OUTPUT_DEPTH: 914 if (info.io.fragDepth < 0xff) 915 break; 916 idx = info.io.fragDepth = info.numOutputs++; 917 info.out[idx].sn = TGSI_SEMANTIC_POSITION; 918 break; 919 case SM4_FILE_OUTPUT_COVERAGE_MASK: 920 if (info.io.sampleMask < 0xff) 921 break; 922 idx = info.io.sampleMask = info.numOutputs++; 923 info.out[idx].sn = NV50_SEMANTIC_SAMPLEMASK; 924 break; 925 default: 926 break; 927 } 928 break; 929 case SM4_OPCODE_DCL_OUTPUT: 930 // handled in parseSignature 931 break; 932 case SM4_OPCODE_DCL_TEMPS: 933 nrRegVals += dcl.num; 934 break; 935 case SM4_OPCODE_DCL_INDEXABLE_TEMP: 936 nrArrays++; 937 break; 938 case SM4_OPCODE_DCL_GLOBAL_FLAGS: 939 if (prog->getType() == Program::TYPE_FRAGMENT) 940 info.prop.fp.earlyFragTests = dcl.dcl_global_flags.early_depth_stencil; 941 break; 942 943 case SM4_OPCODE_DCL_FUNCTION_BODY: 944 break; 945 case SM4_OPCODE_DCL_FUNCTION_TABLE: 946 break; 947 case SM4_OPCODE_DCL_INTERFACE: 948 break; 949 950 // GP 951 case SM4_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: 952 info.prop.gp.outputPrim = g3dPrim( 953 dcl.dcl_gs_output_primitive_topology.primitive_topology); 954 break; 955 case SM4_OPCODE_DCL_GS_INPUT_PRIMITIVE: 956 info.prop.gp.inputPrim = g3dPrim(dcl.dcl_gs_input_primitive.primitive); 957 break; 958 case SM4_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: 959 info.prop.gp.maxVertices = dcl.num; 960 break; 961 case SM4_OPCODE_DCL_GS_INSTANCE_COUNT: 962 info.prop.gp.instanceCount = dcl.num; 963 break; 964 case SM4_OPCODE_DCL_STREAM: 965 break; 966 967 // TCP/TEP 968 case SM4_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: 969 info.prop.tp.inputPatchSize = 970 dcl.dcl_input_control_point_count.control_points; 971 break; 972 case SM4_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: 973 info.prop.tp.outputPatchSize = 974 dcl.dcl_output_control_point_count.control_points; 975 break; 976 case SM4_OPCODE_DCL_TESS_DOMAIN: 977 switch (dcl.dcl_tess_domain.domain) { 978 case D3D_TESSELLATOR_DOMAIN_ISOLINE: 979 info.prop.tp.domain = PIPE_PRIM_LINES; 980 break; 981 case D3D_TESSELLATOR_DOMAIN_TRI: 982 info.prop.tp.domain = PIPE_PRIM_TRIANGLES; 983 break; 984 case D3D_TESSELLATOR_DOMAIN_QUAD: 985 info.prop.tp.domain = PIPE_PRIM_QUADS; 986 break; 987 case D3D_TESSELLATOR_DOMAIN_UNDEFINED: 988 default: 989 info.prop.tp.domain = PIPE_PRIM_MAX; 990 break; 991 } 992 break; 993 case SM4_OPCODE_DCL_TESS_PARTITIONING: 994 switch (dcl.dcl_tess_partitioning.partitioning) { 995 case D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: 996 info.prop.tp.partitioning = NV50_TESS_PART_FRACT_ODD; 997 break; 998 case D3D_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN: 999 info.prop.tp.partitioning = NV50_TESS_PART_FRACT_EVEN; 1000 break; 1001 case D3D_TESSELLATOR_PARTITIONING_POW2: 1002 info.prop.tp.partitioning = NV50_TESS_PART_POW2; 1003 break; 1004 case D3D_TESSELLATOR_PARTITIONING_INTEGER: 1005 case D3D_TESSELLATOR_PARTITIONING_UNDEFINED: 1006 default: 1007 info.prop.tp.partitioning = NV50_TESS_PART_INTEGER; 1008 break; 1009 } 1010 break; 1011 case SM4_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: 1012 switch (dcl.dcl_tess_output_primitive.primitive) { 1013 case D3D_TESSELLATOR_OUTPUT_LINE: 1014 info.prop.tp.outputPrim = PIPE_PRIM_LINES; 1015 break; 1016 case D3D_TESSELLATOR_OUTPUT_TRIANGLE_CW: 1017 info.prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; 1018 info.prop.tp.winding = +1; 1019 break; 1020 case D3D_TESSELLATOR_OUTPUT_TRIANGLE_CCW: 1021 info.prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; 1022 info.prop.tp.winding = -1; 1023 break; 1024 case D3D_TESSELLATOR_OUTPUT_POINT: 1025 info.prop.tp.outputPrim = PIPE_PRIM_POINTS; 1026 break; 1027 case D3D_TESSELLATOR_OUTPUT_UNDEFINED: 1028 default: 1029 info.prop.tp.outputPrim = PIPE_PRIM_MAX; 1030 break; 1031 } 1032 break; 1033 1034 case SM4_OPCODE_HS_FORK_PHASE: 1035 ++subPhaseCnt[0]; 1036 phase = 1; 1037 break; 1038 case SM4_OPCODE_HS_JOIN_PHASE: 1039 phase = 2; 1040 ++subPhaseCnt[1]; 1041 break; 1042 case SM4_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: 1043 case SM4_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: 1044 case SM4_OPCODE_DCL_HS_MAX_TESSFACTOR: 1045 break; 1046 1047 // weird stuff 1048 case SM4_OPCODE_DCL_THREAD_GROUP: 1049 case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: 1050 case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: 1051 case SM4_OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: 1052 case SM4_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: 1053 case SM4_OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: 1054 case SM4_OPCODE_DCL_RESOURCE_RAW: 1055 case SM4_OPCODE_DCL_RESOURCE_STRUCTURED: 1056 ERROR("unhandled declaration\n"); 1057 abort(); 1058 return false; 1059 1060 default: 1061 assert(!"invalid SM4 declaration"); 1062 return false; 1063 } 1064 return true; 1065 } 1066 1067 void 1068 Converter::allocateValues() 1069 { 1070 lData = new DataArray[nrArrays]; 1071 1072 for (unsigned int i = 0; i < nrArrays; ++i) 1073 lData[i].setParent(this); 1074 1075 tData32.setup(0, nrRegVals, 4, 4, FILE_GPR); 1076 tData64.setup(0, nrRegVals, 2, 8, FILE_GPR); 1077 1078 if (prog->getType() == Program::TYPE_FRAGMENT) 1079 oData.setup(0, info.numOutputs, 4, 4, FILE_GPR); 1080 } 1081 1082 bool Converter::handleDeclaration(const sm4_dcl& dcl) 1083 { 1084 switch (dcl.opcode) { 1085 case SM4_OPCODE_DCL_INDEXABLE_TEMP: 1086 lData[nrArrays++].setup(arrayVol, 1087 dcl.indexable_temp.num, dcl.indexable_temp.comps, 1088 4, FILE_MEMORY_LOCAL); 1089 arrayVol += dcl.indexable_temp.num * dcl.indexable_temp.comps * 4; 1090 break; 1091 case SM4_OPCODE_HS_FORK_PHASE: 1092 if (subPhaseCnt[0]) 1093 phaseInstCnt[0][subPhaseCnt[0]] = phaseInstCnt[0][subPhaseCnt[0] - 1]; 1094 ++subPhaseCnt[0]; 1095 break; 1096 case SM4_OPCODE_HS_JOIN_PHASE: 1097 if (subPhaseCnt[1]) 1098 phaseInstCnt[1][subPhaseCnt[1]] = phaseInstCnt[1][subPhaseCnt[1] - 1]; 1099 ++subPhaseCnt[1]; 1100 break; 1101 case SM4_OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: 1102 phaseInstCnt[0][subPhaseCnt[0] - 1] = dcl.num; 1103 break; 1104 case SM4_OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: 1105 phaseInstCnt[1][subPhaseCnt[1] - 1] = dcl.num; 1106 break; 1107 1108 default: 1109 break; // already handled in inspection 1110 } 1111 1112 return true; 1113 } 1114 1115 Symbol * 1116 Converter::iSym(int i, int c) 1117 { 1118 if (info.in[i].regular) { 1119 return mkSymbol(FILE_SHADER_INPUT, 0, sTy, info.in[i].slot[c] * 4); 1120 } else { 1121 return mkSysVal(tgsi::irSemantic(info.in[i].sn), info.in[i].si); 1122 } 1123 } 1124 1125 Symbol * 1126 Converter::oSym(int i, int c) 1127 { 1128 if (info.out[i].regular) { 1129 return mkSymbol(FILE_SHADER_OUTPUT, 0, dTy, info.out[i].slot[c] * 4); 1130 } else { 1131 return mkSysVal(tgsi::irSemantic(info.out[i].sn), info.out[i].si); 1132 } 1133 } 1134 1135 Value * 1136 Converter::getSrcPtr(int s, int dim, int shl) 1137 { 1138 if (srcPtr[s][dim]) 1139 return srcPtr[s][dim]; 1140 1141 sm4_op *op = insn->ops[s + nDstOpnds]->indices[dim].reg.get(); 1142 1143 if (!op) 1144 return NULL; 1145 1146 Value *index = src(*op, 0, s); 1147 1148 srcPtr[s][dim] = index; 1149 if (shl) 1150 srcPtr[s][dim] = mkOp2v(OP_SHL, TYPE_U32, getSSA(), index, mkImm(shl)); 1151 return srcPtr[s][dim]; 1152 } 1153 1154 Value * 1155 Converter::getDstPtr(int d, int dim, int shl) 1156 { 1157 assert(d == 0); 1158 if (dstPtr[dim]) 1159 return dstPtr[dim]; 1160 1161 sm4_op *op = insn->ops[d]->indices[dim].reg.get(); 1162 if (!op) 1163 return NULL; 1164 1165 Value *index = src(*op, 0, d); 1166 if (shl) 1167 index = mkOp2v(OP_SHL, TYPE_U32, getSSA(), index, mkImm(shl)); 1168 1169 return (dstPtr[dim] = index); 1170 } 1171 1172 Value * 1173 Converter::getVtxPtr(int s) 1174 { 1175 assert(s < 3); 1176 if (vtxBase[s]) 1177 return vtxBase[s]; 1178 1179 sm4_op *op = insn->ops[s + nDstOpnds].get(); 1180 if (!op) 1181 return NULL; 1182 int idx = op->indices[0].disp; 1183 1184 vtxBase[s] = getSrcPtr(s, 0, 0); 1185 vtxBase[s] = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(), mkImm(idx), vtxBase[s]); 1186 return vtxBase[s]; 1187 } 1188 1189 Value * 1190 Converter::src(int i, int c) 1191 { 1192 return src(*insn->ops[i + nDstOpnds], c, i); 1193 } 1194 1195 Value * 1196 Converter::dst(int i, int c) 1197 { 1198 return dst(*insn->ops[i], c, i); 1199 } 1200 1201 void 1202 Converter::saveDst(int i, int c, Value *value) 1203 { 1204 if (insn->insn.sat) 1205 mkOp1(OP_SAT, dTy, value, value); 1206 return saveDst(*insn->ops[i], c, value, i); 1207 } 1208 1209 Value * 1210 Converter::interpolate(const sm4_op& op, int c, int i) 1211 { 1212 int idx = op.indices[0].disp; 1213 int swz = op.swizzle[c]; 1214 operation opr = 1215 (info.in[idx].linear || info.in[idx].flat) ? OP_LINTERP : OP_PINTERP; 1216 1217 Value *ptr = getSrcPtr(i, 0, 4); 1218 1219 Instruction *insn = new_Instruction(func, opr, TYPE_F32); 1220 1221 insn->setDef(0, getScratch()); 1222 insn->setSrc(0, iSym(idx, swz)); 1223 if (opr == OP_PINTERP) 1224 insn->setSrc(1, fragCoord[3]); 1225 if (ptr) 1226 insn->setIndirect(0, 0, ptr); 1227 1228 insn->setInterpolate(interpMode[idx]); 1229 1230 bb->insertTail(insn); 1231 return insn->getDef(0); 1232 } 1233 1234 Value * 1235 Converter::src(const sm4_op& op, int c, int s) 1236 { 1237 const int size = typeSizeof(sTy); 1238 1239 Instruction *ld; 1240 Value *res, *ptr, *vtx; 1241 int idx, dim, off; 1242 const int swz = op.swizzle[c]; 1243 1244 switch (op.file) { 1245 case SM4_FILE_IMMEDIATE32: 1246 res = loadImm(NULL, (uint32_t)op.imm_values[swz].u32); 1247 break; 1248 case SM4_FILE_IMMEDIATE64: 1249 assert(c < 2); 1250 res = loadImm(NULL, op.imm_values[swz].u64); 1251 break; 1252 case SM4_FILE_TEMP: 1253 assert(op.is_index_simple(0)); 1254 idx = op.indices[0].disp; 1255 if (size == 8) 1256 res = tData64.load(idx, swz, NULL); 1257 else 1258 res = tData32.load(idx, swz, NULL); 1259 break; 1260 case SM4_FILE_INPUT: 1261 case SM4_FILE_INPUT_CONTROL_POINT: 1262 case SM4_FILE_INPUT_PATCH_CONSTANT: 1263 if (prog->getType() == Program::TYPE_FRAGMENT) 1264 return interpolate(op, c, s); 1265 1266 idx = 0; 1267 if (op.file == SM4_FILE_INPUT_PATCH_CONSTANT) 1268 idx = info.numInputs - info.numPatchConstants; 1269 1270 if (op.num_indices == 2) { 1271 vtx = getVtxPtr(s); 1272 ptr = getSrcPtr(s, 1, 4); 1273 idx += op.indices[1].disp; 1274 res = getSSA(); 1275 ld = mkOp1(OP_VFETCH, TYPE_U32, res, iSym(idx, swz)); 1276 ld->setIndirect(0, 0, ptr); 1277 ld->setIndirect(0, 1, vtx); 1278 } else { 1279 idx += op.indices[0].disp; 1280 res = mkLoad(sTy, iSym(idx, swz), getSrcPtr(s, 0, 4)); 1281 } 1282 if (op.file == SM4_FILE_INPUT_PATCH_CONSTANT) 1283 res->defs->getInsn()->perPatch = 1; 1284 break; 1285 case SM4_FILE_CONSTANT_BUFFER: 1286 assert(op.num_indices == 2); 1287 assert(op.is_index_simple(0)); 1288 1289 ptr = getSrcPtr(s, 1, 4); 1290 dim = op.indices[0].disp; 1291 off = (op.indices[1].disp * 4 + swz) * (sTy == TYPE_F64 ? 8 : 4); 1292 1293 res = mkLoad(sTy, mkSymbol(FILE_MEMORY_CONST, dim, sTy, off), ptr); 1294 break; 1295 case SM4_FILE_IMMEDIATE_CONSTANT_BUFFER: 1296 ptr = getSrcPtr(s, 0, 4); 1297 off = (op.indices[0].disp * 4 + swz) * 4; 1298 res = mkLoad(sTy, mkSymbol(FILE_MEMORY_CONST, 14, sTy, off), ptr); 1299 break; 1300 case SM4_FILE_INDEXABLE_TEMP: 1301 { 1302 assert(op.is_index_simple(0)); 1303 int a = op.indices[0].disp; 1304 idx = op.indices[1].disp; 1305 res = lData[a].load(idx, swz, getSrcPtr(s, 1, 4)); 1306 } 1307 break; 1308 case SM4_FILE_INPUT_PRIMITIVEID: 1309 recordSV(TGSI_SEMANTIC_PRIMID, 0, 1, true); 1310 res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_PRIMITIVE_ID, 0)); 1311 break; 1312 case SM4_FILE_INPUT_GS_INSTANCE_ID: 1313 case SM4_FILE_OUTPUT_CONTROL_POINT_ID: 1314 recordSV(NV50_SEMANTIC_INVOCATIONID, 0, 1, true); 1315 res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_INVOCATION_ID, 0)); 1316 break; 1317 case SM4_FILE_CYCLE_COUNTER: 1318 res = 1319 mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_CLOCK, swz ? 1 : 0)); 1320 break; 1321 case SM4_FILE_INPUT_FORK_INSTANCE_ID: 1322 case SM4_FILE_INPUT_JOIN_INSTANCE_ID: 1323 { 1324 phaseInstanceUsed = true; 1325 if (unrollPhase) 1326 return loadImm(NULL, phaseInstance); 1327 const unsigned int cnt = phaseInstCnt[phase - 1][subPhase]; 1328 res = getScratch(); 1329 res = mkOp1v(OP_RDSV, TYPE_U32, res, mkSysVal(SV_INVOCATION_ID, 0)); 1330 res = mkOp2v(OP_MIN, TYPE_U32, res, res, loadImm(NULL, cnt - 1)); 1331 } 1332 break; 1333 case SM4_FILE_INPUT_DOMAIN_POINT: 1334 assert(swz < 3); 1335 res = domainPt[swz]; 1336 break; 1337 case SM4_FILE_THREAD_GROUP_SHARED_MEMORY: 1338 off = (op.indices[0].disp * 4 + swz) * (sTy == TYPE_F64 ? 8 : 4); 1339 ptr = getSrcPtr(s, 0, 4); 1340 res = mkLoad(sTy, mkSymbol(FILE_MEMORY_SHARED, 0, sTy, off), ptr); 1341 break; 1342 case SM4_FILE_RESOURCE: 1343 case SM4_FILE_SAMPLER: 1344 case SM4_FILE_UNORDERED_ACCESS_VIEW: 1345 return NULL; 1346 case SM4_FILE_INPUT_THREAD_ID: 1347 res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_TID, swz)); 1348 break; 1349 case SM4_FILE_INPUT_THREAD_GROUP_ID: 1350 res = mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_CTAID, swz)); 1351 break; 1352 case SM4_FILE_FUNCTION_INPUT: 1353 case SM4_FILE_INPUT_THREAD_ID_IN_GROUP: 1354 assert(!"unhandled source file"); 1355 return NULL; 1356 default: 1357 assert(!"invalid source file"); 1358 return NULL; 1359 } 1360 1361 if (op.abs) 1362 res = mkOp1v(OP_ABS, sTy, getSSA(res->reg.size), res); 1363 if (op.neg) 1364 res = mkOp1v(OP_NEG, sTy, getSSA(res->reg.size), res); 1365 return res; 1366 } 1367 1368 Value * 1369 Converter::dst(const sm4_op &op, int c, int i) 1370 { 1371 switch (op.file) { 1372 case SM4_FILE_TEMP: 1373 return tData32.acquire(op.indices[0].disp, c); 1374 case SM4_FILE_INDEXABLE_TEMP: 1375 return getScratch(); 1376 case SM4_FILE_OUTPUT: 1377 if (prog->getType() == Program::TYPE_FRAGMENT) 1378 return oData.acquire(op.indices[0].disp, c); 1379 return getScratch(); 1380 case SM4_FILE_NULL: 1381 return NULL; 1382 case SM4_FILE_OUTPUT_DEPTH: 1383 case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL: 1384 case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL: 1385 case SM4_FILE_OUTPUT_COVERAGE_MASK: 1386 return getScratch(); 1387 case SM4_FILE_IMMEDIATE32: 1388 case SM4_FILE_IMMEDIATE64: 1389 case SM4_FILE_CONSTANT_BUFFER: 1390 case SM4_FILE_RESOURCE: 1391 case SM4_FILE_SAMPLER: 1392 case SM4_FILE_UNORDERED_ACCESS_VIEW: 1393 assert(!"invalid destination file"); 1394 return NULL; 1395 default: 1396 assert(!"invalid file"); 1397 return NULL; 1398 } 1399 } 1400 1401 void 1402 Converter::saveFragDepth(operation op, Value *value) 1403 { 1404 if (op == OP_MIN || op == OP_MAX) { 1405 Value *zIn; 1406 zIn = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_POSITION, 2)); 1407 value = mkOp2v(op, TYPE_F32, getSSA(), value, zIn); 1408 } 1409 oData.store(info.io.fragDepth, 2, NULL, value); 1410 } 1411 1412 void 1413 Converter::saveDst(const sm4_op &op, int c, Value *value, int s) 1414 { 1415 Symbol *sym; 1416 Instruction *st; 1417 int a, idx; 1418 1419 switch (op.file) { 1420 case SM4_FILE_TEMP: 1421 idx = op.indices[0].disp; 1422 tData32.store(idx, c, NULL, value); 1423 break; 1424 case SM4_FILE_INDEXABLE_TEMP: 1425 a = op.indices[0].disp; 1426 idx = op.indices[1].disp; 1427 // FIXME: shift is wrong, depends in lData 1428 lData[a].store(idx, c, getDstPtr(s, 1, 4), value); 1429 break; 1430 case SM4_FILE_OUTPUT: 1431 assert(op.num_indices == 1); 1432 idx = op.indices[0].disp; 1433 if (prog->getType() == Program::TYPE_FRAGMENT) { 1434 oData.store(idx, c, NULL, value); 1435 } else { 1436 if (phase) 1437 idx += info.numOutputs - info.numPatchConstants; 1438 const int shl = (info.out[idx].sn == NV50_SEMANTIC_TESSFACTOR) ? 2 : 4; 1439 sym = oSym(idx, c); 1440 if (sym->reg.file == FILE_SHADER_OUTPUT) 1441 st = mkStore(OP_EXPORT, dTy, sym, getDstPtr(s, 0, shl), value); 1442 else 1443 st = mkStore(OP_WRSV, dTy, sym, getDstPtr(s, 0, 2), value); 1444 st->perPatch = phase ? 1 : 0; 1445 } 1446 break; 1447 case SM4_FILE_OUTPUT_DEPTH_GREATER_EQUAL: 1448 saveFragDepth(OP_MAX, value); 1449 break; 1450 case SM4_FILE_OUTPUT_DEPTH_LESS_EQUAL: 1451 saveFragDepth(OP_MIN, value); 1452 break; 1453 case SM4_FILE_OUTPUT_DEPTH: 1454 saveFragDepth(OP_NOP, value); 1455 break; 1456 case SM4_FILE_OUTPUT_COVERAGE_MASK: 1457 oData.store(info.io.sampleMask, 0, NULL, value); 1458 break; 1459 case SM4_FILE_IMMEDIATE32: 1460 case SM4_FILE_IMMEDIATE64: 1461 case SM4_FILE_INPUT: 1462 case SM4_FILE_CONSTANT_BUFFER: 1463 case SM4_FILE_RESOURCE: 1464 case SM4_FILE_SAMPLER: 1465 assert(!"invalid destination file"); 1466 return; 1467 default: 1468 assert(!"invalid file"); 1469 return; 1470 } 1471 } 1472 1473 void 1474 Converter::emitTex(Value *dst0[4], TexInstruction *tex, const uint8_t swz[4]) 1475 { 1476 Value *res[4] = { NULL, NULL, NULL, NULL }; 1477 unsigned int c, d; 1478 1479 for (c = 0; c < 4; ++c) 1480 if (dst0[c]) 1481 tex->tex.mask |= 1 << swz[c]; 1482 for (d = 0, c = 0; c < 4; ++c) 1483 if (tex->tex.mask & (1 << c)) 1484 tex->setDef(d++, (res[c] = getScratch())); 1485 1486 bb->insertTail(tex); 1487 1488 if (insn->opcode == SM4_OPCODE_RESINFO) { 1489 if (tex->tex.target.getDim() == 1) { 1490 res[2] = loadImm(NULL, 0); 1491 if (!tex->tex.target.isArray()) 1492 res[1] = res[2]; 1493 } else 1494 if (tex->tex.target.getDim() == 2 && !tex->tex.target.isArray()) { 1495 res[2] = loadImm(NULL, 0); 1496 } 1497 for (c = 0; c < 4; ++c) { 1498 if (!dst0[c]) 1499 continue; 1500 Value *src = res[swz[c]]; 1501 assert(src); 1502 switch (insn->insn.resinfo_return_type) { 1503 case 0: 1504 mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_U32, src); 1505 break; 1506 case 1: 1507 mkCvt(OP_CVT, TYPE_F32, dst0[c], TYPE_U32, src); 1508 if (swz[c] < tex->tex.target.getDim()) 1509 mkOp1(OP_RCP, TYPE_F32, dst0[c], dst0[c]); 1510 break; 1511 default: 1512 mkMov(dst0[c], src); 1513 break; 1514 } 1515 } 1516 } else { 1517 for (c = 0; c < 4; ++c) 1518 if (dst0[c]) 1519 mkMov(dst0[c], res[swz[c]]); 1520 } 1521 } 1522 1523 void 1524 Converter::handleQUERY(Value *dst0[4], enum TexQuery query) 1525 { 1526 TexInstruction *texi = new_TexInstruction(func, OP_TXQ); 1527 texi->tex.query = query; 1528 1529 assert(insn->ops[2]->file == SM4_FILE_RESOURCE); // TODO: UAVs 1530 1531 const int rOp = (query == TXQ_DIMS) ? 2 : 1; 1532 const int sOp = (query == TXQ_DIMS) ? 0 : 1; 1533 1534 const int tR = insn->ops[rOp]->indices[0].disp; 1535 1536 texi->setTexture(resourceType[tR][0], tR, 0); 1537 1538 texi->setSrc(0, src(sOp, 0)); // mip level or sample index 1539 1540 emitTex(dst0, texi, insn->ops[rOp]->swizzle); 1541 } 1542 1543 void 1544 Converter::handleLOAD(Value *dst0[4]) 1545 { 1546 TexInstruction *texi = new_TexInstruction(func, OP_TXF); 1547 unsigned int c; 1548 1549 const int tR = insn->ops[2]->indices[0].disp; 1550 1551 texi->setTexture(resourceType[tR][0], tR, 0); 1552 1553 for (c = 0; c < texi->tex.target.getArgCount(); ++c) 1554 texi->setSrc(c, src(0, c)); 1555 1556 if (texi->tex.target == TEX_TARGET_BUFFER) { 1557 texi->tex.levelZero = true; 1558 } else { 1559 texi->setSrc(c++, src(0, 3)); 1560 for (c = 0; c < 3; ++c) { 1561 texi->tex.offset[0][c] = insn->sample_offset[c]; 1562 if (texi->tex.offset[0][c]) 1563 texi->tex.useOffsets = 1; 1564 } 1565 } 1566 1567 emitTex(dst0, texi, insn->ops[2]->swizzle); 1568 } 1569 1570 // order of nv50 ir sources: x y z/layer lod/bias dc 1571 void 1572 Converter::handleSAMPLE(operation opr, Value *dst0[4]) 1573 { 1574 TexInstruction *texi = new_TexInstruction(func, opr); 1575 unsigned int c, s; 1576 Value *arg[4], *src0[4]; 1577 Value *val; 1578 Value *lod = NULL, *dc = NULL; 1579 1580 const int tR = insn->ops[2]->indices[0].disp; 1581 const int tS = insn->ops[3]->indices[0].disp; 1582 1583 TexInstruction::Target tgt = resourceType[tR][shadow[tS] ? 1 : 0]; 1584 1585 for (c = 0; c < tgt.getArgCount(); ++c) 1586 arg[c] = src0[c] = src(0, c); 1587 1588 if (insn->opcode == SM4_OPCODE_SAMPLE_L || 1589 insn->opcode == SM4_OPCODE_SAMPLE_B) { 1590 lod = src(3, 0); 1591 } else 1592 if (insn->opcode == SM4_OPCODE_SAMPLE_C || 1593 insn->opcode == SM4_OPCODE_SAMPLE_C_LZ) { 1594 dc = src(3, 0); 1595 if (insn->opcode == SM4_OPCODE_SAMPLE_C_LZ) 1596 texi->tex.levelZero = true; 1597 } else 1598 if (insn->opcode == SM4_OPCODE_SAMPLE_D) { 1599 for (c = 0; c < tgt.getDim(); ++c) { 1600 texi->dPdx[c] = src(3, c); 1601 texi->dPdy[c] = src(4, c); 1602 } 1603 } 1604 1605 if (tgt.isCube()) { 1606 for (c = 0; c < 3; ++c) 1607 src0[c] = mkOp1v(OP_ABS, TYPE_F32, getSSA(), arg[c]); 1608 val = getScratch(); 1609 mkOp2(OP_MAX, TYPE_F32, val, src0[0], src0[1]); 1610 mkOp2(OP_MAX, TYPE_F32, val, src0[2], val); 1611 mkOp1(OP_RCP, TYPE_F32, val, val); 1612 for (c = 0; c < 3; ++c) 1613 src0[c] = mkOp2v(OP_MUL, TYPE_F32, getSSA(), arg[c], val); 1614 } 1615 1616 for (s = 0; s < tgt.getArgCount(); ++s) 1617 texi->setSrc(s, src0[s]); 1618 if (lod) 1619 texi->setSrc(s++, lod); 1620 if (dc) 1621 texi->setSrc(s++, dc); 1622 1623 for (c = 0; c < 3; ++c) { 1624 texi->tex.offset[0][c] = insn->sample_offset[c]; 1625 if (texi->tex.offset[0][c]) 1626 texi->tex.useOffsets = 1; 1627 } 1628 1629 texi->setTexture(tgt, tR, tS); 1630 1631 emitTex(dst0, texi, insn->ops[2]->swizzle); 1632 } 1633 1634 void 1635 Converter::handleDP(Value *dst0[4], int dim) 1636 { 1637 Value *src0 = src(0, 0), *src1 = src(1, 0); 1638 Value *dotp = getScratch(); 1639 1640 assert(dim > 0); 1641 1642 mkOp2(OP_MUL, TYPE_F32, dotp, src0, src1); 1643 for (int c = 1; c < dim; ++c) 1644 mkOp3(OP_MAD, TYPE_F32, dotp, src(0, c), src(1, c), dotp); 1645 1646 for (int c = 0; c < 4; ++c) 1647 dst0[c] = dotp; 1648 } 1649 1650 void 1651 Converter::insertConvergenceOps(BasicBlock *conv, BasicBlock *fork) 1652 { 1653 FlowInstruction *join = new_FlowInstruction(func, OP_JOIN, NULL); 1654 join->fixed = 1; 1655 conv->insertHead(join); 1656 1657 fork->joinAt = new_FlowInstruction(func, OP_JOINAT, conv); 1658 fork->insertBefore(fork->getExit(), fork->joinAt); 1659 } 1660 1661 void 1662 Converter::finalizeShader() 1663 { 1664 if (finalized) 1665 return; 1666 BasicBlock *epilogue = reinterpret_cast<BasicBlock *>(leaveBBs.pop().u.p); 1667 entryBBs.pop(); 1668 1669 finalized = true; 1670 1671 bb->cfg.attach(&epilogue->cfg, Graph::Edge::TREE); 1672 setPosition(epilogue, true); 1673 1674 if (prog->getType() == Program::TYPE_FRAGMENT) 1675 exportOutputs(); 1676 1677 mkOp(OP_EXIT, TYPE_NONE, NULL)->terminator = 1; 1678 } 1679 1680 #define FOR_EACH_DST0_ENABLED_CHANNEL32(chan) \ 1681 for ((chan) = 0; (chan) < 4; ++(chan)) \ 1682 if (insn->ops[0].get()->mask & (1 << (chan))) 1683 1684 #define FOR_EACH_DST0_ENABLED_CHANNEL64(chan) \ 1685 for ((chan) = 0; (chan) < 2; ++(chan)) \ 1686 if (insn->ops[0].get()->mask & (1 << (chan))) 1687 1688 bool 1689 Converter::checkDstSrcAliasing() const 1690 { 1691 for (unsigned int d = 0; d < nDstOpnds; ++d) { 1692 for (unsigned int s = nDstOpnds; s < insn->num_ops; ++s) { 1693 if (insn->ops[d]->file != insn->ops[s]->file) 1694 continue; 1695 int i = insn->ops[s]->num_indices - 1; 1696 if (i != insn->ops[d]->num_indices - 1) 1697 continue; 1698 if (insn->ops[d]->is_index_simple(i) && 1699 insn->ops[s]->is_index_simple(i) && 1700 insn->ops[d]->indices[i].disp == insn->ops[s]->indices[i].disp) 1701 return true; 1702 } 1703 } 1704 return false; 1705 } 1706 1707 bool 1708 Converter::handleInstruction(unsigned int pos) 1709 { 1710 Value *dst0[4], *rDst0[4]; 1711 Value *dst1[4], *rDst1[4]; 1712 int c, nc; 1713 1714 insn = sm4.insns[pos]; 1715 enum sm4_opcode opcode = static_cast<sm4_opcode>(insn->opcode); 1716 1717 operation op = cvtOpcode(opcode); 1718 1719 sTy = inferSrcType(opcode); 1720 dTy = inferDstType(opcode); 1721 1722 nc = dTy == TYPE_F64 ? 2 : 4; 1723 1724 nDstOpnds = getDstOpndCount(opcode); 1725 1726 bool useScratchDst = checkDstSrcAliasing(); 1727 1728 INFO("SM4_OPCODE_##%u, aliasing = %u\n", insn->opcode, useScratchDst); 1729 1730 if (nDstOpnds >= 1) { 1731 for (c = 0; c < nc; ++c) 1732 rDst0[c] = dst0[c] = 1733 insn->ops[0].get()->mask & (1 << c) ? dst(0, c) : NULL; 1734 if (useScratchDst) 1735 for (c = 0; c < nc; ++c) 1736 dst0[c] = rDst0[c] ? getScratch() : NULL; 1737 } 1738 1739 if (nDstOpnds >= 2) { 1740 for (c = 0; c < nc; ++c) 1741 rDst1[c] = dst1[c] = 1742 insn->ops[1].get()->mask & (1 << c) ? dst(1, c) : NULL; 1743 if (useScratchDst) 1744 for (c = 0; c < nc; ++c) 1745 dst1[c] = rDst1[c] ? getScratch() : NULL; 1746 } 1747 1748 switch (insn->opcode) { 1749 case SM4_OPCODE_ADD: 1750 case SM4_OPCODE_AND: 1751 case SM4_OPCODE_DIV: 1752 case SM4_OPCODE_IADD: 1753 case SM4_OPCODE_IMAX: 1754 case SM4_OPCODE_IMIN: 1755 case SM4_OPCODE_MIN: 1756 case SM4_OPCODE_MAX: 1757 case SM4_OPCODE_MUL: 1758 case SM4_OPCODE_OR: 1759 case SM4_OPCODE_UMAX: 1760 case SM4_OPCODE_UMIN: 1761 case SM4_OPCODE_XOR: 1762 FOR_EACH_DST0_ENABLED_CHANNEL32(c) { 1763 Instruction *insn = mkOp2(op, dTy, dst0[c], src(0, c), src(1, c)); 1764 if (dTy == TYPE_F32) 1765 insn->ftz = 1; 1766 } 1767 break; 1768 1769 case SM4_OPCODE_ISHL: 1770 case SM4_OPCODE_ISHR: 1771 case SM4_OPCODE_USHR: 1772 FOR_EACH_DST0_ENABLED_CHANNEL32(c) { 1773 Instruction *insn = mkOp2(op, dTy, dst0[c], src(0, c), src(1, c)); 1774 insn->subOp = NV50_IR_SUBOP_SHIFT_WRAP; 1775 } 1776 break; 1777 1778 case SM4_OPCODE_IMAD: 1779 case SM4_OPCODE_MAD: 1780 case SM4_OPCODE_UMAD: 1781 FOR_EACH_DST0_ENABLED_CHANNEL32(c) { 1782 mkOp3(OP_MAD, dTy, dst0[c], src(0, c), src(1, c), src(2, c)); 1783 } 1784 break; 1785 1786 case SM4_OPCODE_DADD: 1787 case SM4_OPCODE_DMAX: 1788 case SM4_OPCODE_DMIN: 1789 case SM4_OPCODE_DMUL: 1790 FOR_EACH_DST0_ENABLED_CHANNEL64(c) { 1791 mkOp2(op, dTy, dst0[c], src(0, c), src(1, c)); 1792 } 1793 break; 1794 1795 case SM4_OPCODE_UDIV: 1796 for (c = 0; c < 4; ++c) { 1797 Value *dvn, *dvs; 1798 if (dst0[c] || dst1[c]) { 1799 dvn = src(0, c); 1800 dvs = src(1, c); 1801 } 1802 if (dst0[c]) 1803 mkOp2(OP_DIV, TYPE_U32, dst0[c], dvn, dvs); 1804 if (dst1[c]) 1805 mkOp2(OP_MOD, TYPE_U32, dst1[c], dvn, dvs); 1806 } 1807 break; 1808 1809 case SM4_OPCODE_IMUL: 1810 case SM4_OPCODE_UMUL: 1811 for (c = 0; c < 4; ++c) { 1812 Value *a, *b; 1813 if (dst0[c] || dst1[c]) { 1814 a = src(0, c); 1815 b = src(1, c); 1816 } 1817 if (dst0[c]) 1818 mkOp2(OP_MUL, dTy, dst0[c], a, b)->subOp = 1819 NV50_IR_SUBOP_MUL_HIGH; 1820 if (dst1[c]) 1821 mkOp2(OP_MUL, dTy, dst1[c], a, b); 1822 } 1823 break; 1824 1825 case SM4_OPCODE_DP2: 1826 handleDP(dst0, 2); 1827 break; 1828 case SM4_OPCODE_DP3: 1829 handleDP(dst0, 3); 1830 break; 1831 case SM4_OPCODE_DP4: 1832 handleDP(dst0, 4); 1833 break; 1834 1835 case SM4_OPCODE_DERIV_RTX: 1836 case SM4_OPCODE_DERIV_RTX_COARSE: 1837 case SM4_OPCODE_DERIV_RTX_FINE: 1838 case SM4_OPCODE_DERIV_RTY: 1839 case SM4_OPCODE_DERIV_RTY_COARSE: 1840 case SM4_OPCODE_DERIV_RTY_FINE: 1841 case SM4_OPCODE_MOV: 1842 case SM4_OPCODE_INEG: 1843 case SM4_OPCODE_NOT: 1844 case SM4_OPCODE_SQRT: 1845 case SM4_OPCODE_COUNTBITS: 1846 case SM4_OPCODE_EXP: 1847 case SM4_OPCODE_LOG: 1848 case SM4_OPCODE_RCP: 1849 FOR_EACH_DST0_ENABLED_CHANNEL32(c) { 1850 mkOp1(op, dTy, dst0[c], src(0, c)); 1851 } 1852 break; 1853 1854 case SM4_OPCODE_FRC: 1855 FOR_EACH_DST0_ENABLED_CHANNEL32(c) { 1856 Value *val = getScratch(); 1857 Value *src0 = src(0, c); 1858 mkOp1(OP_FLOOR, TYPE_F32, val, src0); 1859 mkOp2(OP_SUB, TYPE_F32, dst0[c], src0, val); 1860 } 1861 break; 1862 1863 case SM4_OPCODE_MOVC: 1864 FOR_EACH_DST0_ENABLED_CHANNEL32(c) 1865 mkCmp(OP_SLCT, CC_NE, TYPE_U32, dst0[c], src(1, c), src(2, c), 1866 src(0, c)); 1867 break; 1868 1869 case SM4_OPCODE_ROUND_NE: 1870 case SM4_OPCODE_ROUND_NI: 1871 case SM4_OPCODE_ROUND_PI: 1872 case SM4_OPCODE_ROUND_Z: 1873 FOR_EACH_DST0_ENABLED_CHANNEL32(c) { 1874 Instruction *rnd = mkOp1(op, dTy, dst0[c], src(0, c)); 1875 rnd->ftz = 1; 1876 rnd->rnd = cvtRoundingMode(opcode); 1877 } 1878 break; 1879 1880 case SM4_OPCODE_RSQ: 1881 FOR_EACH_DST0_ENABLED_CHANNEL32(c) 1882 mkOp1(op, dTy, dst0[c], src(0, c)); 1883 break; 1884 1885 case SM4_OPCODE_SINCOS: 1886 for (c = 0; c < 4; ++c) { 1887 if (!dst0[c] && !dst1[c]) 1888 continue; 1889 Value *val = mkOp1v(OP_PRESIN, TYPE_F32, getScratch(), src(0, c)); 1890 if (dst0[c]) 1891 mkOp1(OP_SIN, TYPE_F32, dst0[c], val); 1892 if (dst1[c]) 1893 mkOp1(OP_COS, TYPE_F32, dst1[c], val); 1894 } 1895 break; 1896 1897 case SM4_OPCODE_EQ: 1898 case SM4_OPCODE_GE: 1899 case SM4_OPCODE_IEQ: 1900 case SM4_OPCODE_IGE: 1901 case SM4_OPCODE_ILT: 1902 case SM4_OPCODE_LT: 1903 case SM4_OPCODE_NE: 1904 case SM4_OPCODE_INE: 1905 case SM4_OPCODE_ULT: 1906 case SM4_OPCODE_UGE: 1907 case SM4_OPCODE_DEQ: 1908 case SM4_OPCODE_DGE: 1909 case SM4_OPCODE_DLT: 1910 case SM4_OPCODE_DNE: 1911 { 1912 CondCode cc = cvtCondCode(opcode); 1913 FOR_EACH_DST0_ENABLED_CHANNEL32(c) { 1914 CmpInstruction *set; 1915 set = mkCmp(op, cc, sTy, dst0[c], src(0, c), src(1, c), NULL); 1916 set->setType(dTy, sTy); 1917 if (sTy == TYPE_F32) 1918 set->ftz = 1; 1919 } 1920 } 1921 break; 1922 1923 case SM4_OPCODE_FTOI: 1924 case SM4_OPCODE_FTOU: 1925 FOR_EACH_DST0_ENABLED_CHANNEL32(c) 1926 mkCvt(op, dTy, dst0[c], sTy, src(0, c))->rnd = ROUND_Z; 1927 break; 1928 case SM4_OPCODE_ITOF: 1929 case SM4_OPCODE_UTOF: 1930 case SM4_OPCODE_F32TOF16: 1931 case SM4_OPCODE_F16TOF32: 1932 case SM4_OPCODE_DTOF: 1933 case SM4_OPCODE_FTOD: 1934 FOR_EACH_DST0_ENABLED_CHANNEL32(c) 1935 mkCvt(op, dTy, dst0[c], sTy, src(0, c)); 1936 break; 1937 1938 case SM4_OPCODE_CUT: 1939 case SM4_OPCODE_CUT_STREAM: 1940 mkOp1(OP_RESTART, TYPE_U32, NULL, mkImm(0))->fixed = 1; 1941 break; 1942 case SM4_OPCODE_EMIT: 1943 case SM4_OPCODE_EMIT_STREAM: 1944 mkOp1(OP_EMIT, TYPE_U32, NULL, mkImm(0))->fixed = 1; 1945 break; 1946 case SM4_OPCODE_EMITTHENCUT: 1947 case SM4_OPCODE_EMITTHENCUT_STREAM: 1948 { 1949 Instruction *cut = mkOp1(OP_EMIT, TYPE_U32, NULL, mkImm(0)); 1950 cut->fixed = 1; 1951 cut->subOp = NV50_IR_SUBOP_EMIT_RESTART; 1952 } 1953 break; 1954 1955 case SM4_OPCODE_DISCARD: 1956 info.prop.fp.usesDiscard = TRUE; 1957 mkOp(OP_DISCARD, TYPE_NONE, NULL)->setPredicate( 1958 insn->insn.test_nz ? CC_P : CC_NOT_P, src(0, 0)); 1959 break; 1960 1961 case SM4_OPCODE_CALL: 1962 case SM4_OPCODE_CALLC: 1963 assert(!"CALL/CALLC not implemented"); 1964 break; 1965 1966 case SM4_OPCODE_RET: 1967 // XXX: the following doesn't work with subroutines / early ret 1968 if (!haveNextPhase(pos)) 1969 finalizeShader(); 1970 else 1971 phaseEnded = phase + 1; 1972 break; 1973 1974 case SM4_OPCODE_IF: 1975 { 1976 BasicBlock *ifClause = new BasicBlock(func); 1977 1978 bb->cfg.attach(&ifClause->cfg, Graph::Edge::TREE); 1979 condBBs.push(bb); 1980 joinBBs.push(bb); 1981 1982 mkFlow(OP_BRA, NULL, insn->insn.test_nz ? CC_NOT_P : CC_P, src(0, 0)); 1983 1984 setPosition(ifClause, true); 1985 } 1986 break; 1987 case SM4_OPCODE_ELSE: 1988 { 1989 BasicBlock *elseClause = new BasicBlock(func); 1990 BasicBlock *forkPoint = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p); 1991 1992 forkPoint->cfg.attach(&elseClause->cfg, Graph::Edge::TREE); 1993 condBBs.push(bb); 1994 1995 forkPoint->getExit()->asFlow()->target.bb = elseClause; 1996 if (!bb->isTerminated()) 1997 mkFlow(OP_BRA, NULL, CC_ALWAYS, NULL); 1998 1999 setPosition(elseClause, true); 2000 } 2001 break; 2002 case SM4_OPCODE_ENDIF: 2003 { 2004 BasicBlock *convPoint = new BasicBlock(func); 2005 BasicBlock *lastBB = reinterpret_cast<BasicBlock *>(condBBs.pop().u.p); 2006 BasicBlock *forkPoint = reinterpret_cast<BasicBlock *>(joinBBs.pop().u.p); 2007 2008 if (!bb->isTerminated()) { 2009 // we only want join if none of the clauses ended with CONT/BREAK/RET 2010 if (lastBB->getExit()->op == OP_BRA && joinBBs.getSize() < 6) 2011 insertConvergenceOps(convPoint, forkPoint); 2012 mkFlow(OP_BRA, convPoint, CC_ALWAYS, NULL); 2013 bb->cfg.attach(&convPoint->cfg, Graph::Edge::FORWARD); 2014 } 2015 2016 if (lastBB->getExit()->op == OP_BRA) { 2017 lastBB->cfg.attach(&convPoint->cfg, Graph::Edge::FORWARD); 2018 lastBB->getExit()->asFlow()->target.bb = convPoint; 2019 } 2020 setPosition(convPoint, true); 2021 } 2022 break; 2023 2024 case SM4_OPCODE_SWITCH: 2025 case SM4_OPCODE_CASE: 2026 case SM4_OPCODE_ENDSWITCH: 2027 assert(!"SWITCH/CASE/ENDSWITCH not implemented"); 2028 break; 2029 2030 case SM4_OPCODE_LOOP: 2031 { 2032 BasicBlock *loopHeader = new BasicBlock(func); 2033 BasicBlock *loopBreak = new BasicBlock(func); 2034 2035 loopBBs.push(loopHeader); 2036 breakBBs.push(loopBreak); 2037 if (loopBBs.getSize() > func->loopNestingBound) 2038 func->loopNestingBound++; 2039 2040 mkFlow(OP_PREBREAK, loopBreak, CC_ALWAYS, NULL); 2041 2042 bb->cfg.attach(&loopHeader->cfg, Graph::Edge::TREE); 2043 setPosition(loopHeader, true); 2044 mkFlow(OP_PRECONT, loopHeader, CC_ALWAYS, NULL); 2045 } 2046 break; 2047 case SM4_OPCODE_ENDLOOP: 2048 { 2049 BasicBlock *loopBB = reinterpret_cast<BasicBlock *>(loopBBs.pop().u.p); 2050 2051 if (!bb->isTerminated()) { 2052 mkFlow(OP_CONT, loopBB, CC_ALWAYS, NULL); 2053 bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK); 2054 } 2055 setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true); 2056 } 2057 break; 2058 case SM4_OPCODE_BREAK: 2059 { 2060 if (bb->isTerminated()) 2061 break; 2062 BasicBlock *breakBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p); 2063 mkFlow(OP_BREAK, breakBB, CC_ALWAYS, NULL); 2064 bb->cfg.attach(&breakBB->cfg, Graph::Edge::CROSS); 2065 } 2066 break; 2067 case SM4_OPCODE_BREAKC: 2068 { 2069 BasicBlock *nextBB = new BasicBlock(func); 2070 BasicBlock *breakBB = reinterpret_cast<BasicBlock *>(breakBBs.peek().u.p); 2071 CondCode cc = insn->insn.test_nz ? CC_P : CC_NOT_P; 2072 mkFlow(OP_BREAK, breakBB, cc, src(0, 0)); 2073 bb->cfg.attach(&breakBB->cfg, Graph::Edge::CROSS); 2074 bb->cfg.attach(&nextBB->cfg, Graph::Edge::FORWARD); 2075 setPosition(nextBB, true); 2076 } 2077 break; 2078 case SM4_OPCODE_CONTINUE: 2079 { 2080 if (bb->isTerminated()) 2081 break; 2082 BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p); 2083 mkFlow(OP_CONT, contBB, CC_ALWAYS, NULL); 2084 contBB->explicitCont = true; 2085 bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK); 2086 } 2087 break; 2088 case SM4_OPCODE_CONTINUEC: 2089 { 2090 BasicBlock *nextBB = new BasicBlock(func); 2091 BasicBlock *contBB = reinterpret_cast<BasicBlock *>(loopBBs.peek().u.p); 2092 mkFlow(OP_CONT, contBB, insn->insn.test_nz ? CC_P : CC_NOT_P, src(0, 0)); 2093 bb->cfg.attach(&contBB->cfg, Graph::Edge::BACK); 2094 bb->cfg.attach(&nextBB->cfg, Graph::Edge::FORWARD); 2095 setPosition(nextBB, true); 2096 } 2097 break; 2098 2099 case SM4_OPCODE_SAMPLE: 2100 case SM4_OPCODE_SAMPLE_C: 2101 case SM4_OPCODE_SAMPLE_C_LZ: 2102 case SM4_OPCODE_SAMPLE_L: 2103 case SM4_OPCODE_SAMPLE_D: 2104 case SM4_OPCODE_SAMPLE_B: 2105 handleSAMPLE(op, dst0); 2106 break; 2107 case SM4_OPCODE_LD: 2108 case SM4_OPCODE_LD_MS: 2109 handleLOAD(dst0); 2110 break; 2111 2112 case SM4_OPCODE_GATHER4: 2113 assert(!"GATHER4 not implemented\n"); 2114 break; 2115 2116 case SM4_OPCODE_RESINFO: 2117 handleQUERY(dst0, TXQ_DIMS); 2118 break; 2119 case SM4_OPCODE_SAMPLE_POS: 2120 handleQUERY(dst0, TXQ_SAMPLE_POSITION); 2121 break; 2122 2123 case SM4_OPCODE_NOP: 2124 mkOp(OP_NOP, TYPE_NONE, NULL); 2125 break; 2126 2127 case SM4_OPCODE_HS_DECLS: 2128 // XXX: any significance ? 2129 break; 2130 case SM4_OPCODE_HS_CONTROL_POINT_PHASE: 2131 phase = 0; 2132 break; 2133 case SM4_OPCODE_HS_FORK_PHASE: 2134 if (phase != 1) 2135 subPhase = 0; 2136 phase = 1; 2137 phaseInstance = (phaseStart == pos) ? (phaseInstance + 1) : 0; 2138 phaseStart = pos; 2139 if (info.prop.tp.outputPatchSize < phaseInstCnt[0][subPhase]) 2140 unrollPhase = true; 2141 break; 2142 case SM4_OPCODE_HS_JOIN_PHASE: 2143 if (phase != 2) 2144 subPhase = 0; 2145 phase = 2; 2146 phaseInstance = (phaseStart == pos) ? (phaseInstance + 1) : 0; 2147 phaseStart = pos; 2148 if (info.prop.tp.outputPatchSize < phaseInstCnt[1][subPhase]) 2149 unrollPhase = true; 2150 break; 2151 2152 default: 2153 ERROR("SM4_OPCODE_#%u illegal / not supported\n", insn->opcode); 2154 abort(); 2155 return false; 2156 } 2157 2158 for (c = 0; c < nc; ++c) { 2159 if (nDstOpnds >= 1 && rDst0[c]) { 2160 if (dst0[c] != rDst0[c]) 2161 mkMov(rDst0[c], dst0[c]); 2162 saveDst(0, c, rDst0[c]); 2163 } 2164 if (nDstOpnds >= 2 && rDst1[c]) { 2165 if (dst1[c] != rDst1[c]) 2166 mkMov(rDst1[c], dst1[c]); 2167 saveDst(1, c, rDst1[c]); 2168 } 2169 } 2170 2171 memset(srcPtr, 0, sizeof(srcPtr)); 2172 memset(dstPtr, 0, sizeof(dstPtr)); 2173 memset(vtxBase, 0, sizeof(vtxBase)); 2174 return true; 2175 } 2176 2177 void 2178 Converter::exportOutputs() 2179 { 2180 for (int i = 0; i < info.numOutputs; ++i) { 2181 for (int c = 0; c < 4; ++c) { 2182 if (!oData.exists(i, c)) 2183 continue; 2184 Symbol *sym = mkSymbol(FILE_SHADER_OUTPUT, 0, TYPE_F32, 2185 info.out[i].slot[c] * 4); 2186 Value *val = oData.load(i, c, NULL); 2187 if (val) 2188 mkStore(OP_EXPORT, TYPE_F32, sym, NULL, val); 2189 } 2190 } 2191 } 2192 2193 Converter::Converter(Program *p, struct nv50_ir_prog_info *s) 2194 : tData32(this), 2195 tData64(this), 2196 oData(this), 2197 info(*s), 2198 sm4(*reinterpret_cast<const sm4_program *>(s->bin.source)), 2199 prog(p) 2200 { 2201 memset(srcPtr, 0, sizeof(srcPtr)); 2202 memset(dstPtr, 0, sizeof(dstPtr)); 2203 memset(vtxBase, 0, sizeof(vtxBase)); 2204 2205 memset(interpMode, 0, sizeof(interpMode)); 2206 2207 nrRegVals = nrArrays = arrayVol = 0; 2208 2209 for (phase = 3; phase > 0; --phase) 2210 for (unsigned int i = 0; i < PIPE_MAX_SHADER_OUTPUTS; ++i) 2211 out[phase - 1][i].sn = TGSI_SEMANTIC_COUNT; 2212 2213 unrollPhase = false; 2214 phaseStart = 0; 2215 subPhaseCnt[0] = subPhaseCnt[1] = 0; 2216 } 2217 2218 Converter::~Converter() 2219 { 2220 if (lData) 2221 delete[] lData; 2222 2223 if (subPhaseCnt[0]) 2224 delete[] phaseInstCnt[0]; 2225 if (subPhaseCnt[1]) 2226 delete[] phaseInstCnt[1]; 2227 } 2228 2229 bool 2230 Converter::haveNextPhase(unsigned int pos) const 2231 { 2232 ++pos; 2233 return (pos < sm4.insns.size()) && 2234 (sm4.insns[pos]->opcode == SM4_OPCODE_HS_FORK_PHASE || 2235 sm4.insns[pos]->opcode == SM4_OPCODE_HS_JOIN_PHASE); 2236 } 2237 2238 bool 2239 Converter::run() 2240 { 2241 parseSignature(); 2242 2243 for (unsigned int pos = 0; pos < sm4.dcls.size(); ++pos) 2244 inspectDeclaration(*sm4.dcls[pos]); 2245 2246 phaseInstCnt[0] = new unsigned int [subPhaseCnt[0]]; 2247 phaseInstCnt[1] = new unsigned int [subPhaseCnt[1]]; 2248 for (int i = 0; i < subPhaseCnt[0]; ++i) 2249 phaseInstCnt[0][i] = -1; 2250 for (int i = 0; i < subPhaseCnt[1]; ++i) 2251 phaseInstCnt[1][i] = -1; 2252 // re-increased in handleDeclaration: 2253 subPhaseCnt[0] = subPhaseCnt[1] = 0; 2254 2255 allocateValues(); 2256 nrArrays = 0; 2257 for (unsigned int pos = 0; pos < sm4.dcls.size(); ++pos) 2258 handleDeclaration(*sm4.dcls[pos]); 2259 2260 info.io.genUserClip = -1; // no UCPs permitted with SM4 shaders 2261 info.io.clipDistanceMask = (1 << info.io.clipDistanceMask) - 1; 2262 2263 info.assignSlots(&info); 2264 2265 if (sm4.dcls.size() == 0 && sm4.insns.size() == 0) 2266 return true; 2267 2268 BasicBlock *entry = new BasicBlock(prog->main); 2269 BasicBlock *leave = new BasicBlock(prog->main); 2270 2271 prog->main->setEntry(entry); 2272 prog->main->setExit(leave); 2273 2274 setPosition(entry, true); 2275 2276 entryBBs.push(entry); 2277 leaveBBs.push(leave); 2278 2279 if (prog->getType() == Program::TYPE_FRAGMENT) { 2280 Symbol *sv = mkSysVal(SV_POSITION, 3); 2281 fragCoord[3] = mkOp1v(OP_RDSV, TYPE_F32, getSSA(), sv); 2282 mkOp1(OP_RCP, TYPE_F32, fragCoord[3], fragCoord[3]); 2283 } else 2284 if (prog->getType() == Program::TYPE_TESSELLATION_EVAL) { 2285 const int n = (info.prop.tp.domain == PIPE_PRIM_TRIANGLES) ? 3 : 2; 2286 int c; 2287 for (c = 0; c < n; ++c) 2288 domainPt[c] = 2289 mkOp1v(OP_RDSV, TYPE_F32, getSSA(), mkSysVal(SV_TESS_COORD, c)); 2290 if (c == 2) 2291 domainPt[2] = loadImm(NULL, 0.0f); 2292 } 2293 2294 finalized = false; 2295 phaseEnded = 0; 2296 phase = 0; 2297 subPhase = 0; 2298 for (unsigned int pos = 0; pos < sm4.insns.size(); ++pos) { 2299 handleInstruction(pos); 2300 if (likely(phase == 0) || (phaseEnded < 2)) 2301 continue; 2302 phaseEnded = 0; 2303 if (!unrollPhase || !phaseInstanceUsed) { 2304 ++subPhase; 2305 continue; 2306 } 2307 phaseInstanceUsed = false; 2308 if (phaseInstance < (phaseInstCnt[phase - 1][subPhase] - 1)) 2309 pos = phaseStart - 1; 2310 else 2311 ++subPhase; 2312 } 2313 finalizeShader(); 2314 2315 return true; 2316 } 2317 2318 } // anonymous namespace 2319 2320 namespace nv50_ir { 2321 2322 bool 2323 Program::makeFromSM4(struct nv50_ir_prog_info *info) 2324 { 2325 Converter bld(this, info); 2326 return bld.run(); 2327 } 2328 2329 } // namespace nv50_ir 2330