1 /************************************************************************** 2 * 3 * Copyright 2010 Luca Barbieri 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining 6 * a copy of this software and associated documentation files (the 7 * "Software"), to deal in the Software without restriction, including 8 * without limitation the rights to use, copy, modify, merge, publish, 9 * distribute, sublicense, and/or sell copies of the Software, and to 10 * permit persons to whom the Software is furnished to do so, subject to 11 * the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the 14 * next paragraph) shall be included in all copies or substantial 15 * portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 * 25 **************************************************************************/ 26 27 #include <d3d11shader.h> 28 #include "d3d1xstutil.h" 29 #include "sm4.h" 30 #include "tgsi/tgsi_ureg.h" 31 #include <vector> 32 33 #if 1 34 #define check(x) assert(x) 35 #define fail(x) assert(0 && (x)) 36 #else 37 #define check(x) do {if(!(x)) throw(#x);} while(0) 38 #define fail(x) throw(x) 39 #endif 40 41 struct tgsi_interpolation 42 { 43 unsigned interpolation; 44 bool centroid; 45 }; 46 47 static tgsi_interpolation sm4_to_pipe_interpolation[] = 48 { 49 {TGSI_INTERPOLATE_PERSPECTIVE, false}, /* UNDEFINED */ 50 {TGSI_INTERPOLATE_CONSTANT, false}, 51 {TGSI_INTERPOLATE_PERSPECTIVE, false}, /* LINEAR */ 52 {TGSI_INTERPOLATE_PERSPECTIVE, true}, /* LINEAR_CENTROID */ 53 {TGSI_INTERPOLATE_LINEAR, false}, /* LINEAR_NOPERSPECTIVE */ 54 {TGSI_INTERPOLATE_LINEAR, true}, /* LINEAR_NOPERSPECTIVE_CENTROID */ 55 56 // Added in D3D10.1 57 {TGSI_INTERPOLATE_PERSPECTIVE, true}, /* LINEAR_SAMPLE */ 58 {TGSI_INTERPOLATE_LINEAR, true}, /* LINEAR_NOPERSPECTIVE_SAMPLE */ 59 }; 60 61 static int sm4_to_pipe_sv[] = 62 { 63 -1, 64 TGSI_SEMANTIC_POSITION, 65 -1, /*TGSI_SEMANTIC_CLIP_DISTANCE */ 66 -1, /*TGSI_SEMANTIC_CULL_DISTANCE */ 67 -1, /*TGSI_SEMANTIC_RENDER_TARGET_ARRAY_INDEX */ 68 -1, /*TGSI_SEMANTIC_VIEWPORT_ARRAY_INDEX */ 69 -1, /*TGSI_SEMANTIC_VERTEXID,*/ 70 TGSI_SEMANTIC_PRIMID, 71 TGSI_SEMANTIC_INSTANCEID, 72 TGSI_SEMANTIC_FACE, 73 -1, /*TGSI_SEMANTIC_SAMPLE_INDEX*/ 74 }; 75 76 struct sm4_to_tgsi_converter 77 { 78 struct ureg_program* ureg; 79 std::vector<struct ureg_dst> temps; 80 std::vector<struct ureg_dst> outputs; 81 std::vector<struct ureg_src> inputs; 82 std::vector<struct ureg_src> resources; 83 std::vector<struct ureg_src> samplers; 84 std::vector<std::pair<unsigned, unsigned> > targets; // first is normal, second shadow/comparison 85 std::vector<unsigned> sampler_modes; // 0 = normal, 1 = shadow/comparison 86 std::vector<std::pair<unsigned, unsigned> > loops; 87 sm4_insn* insn; 88 struct sm4_program& program; 89 std::vector<unsigned> sm4_to_tgsi_insn_num; 90 std::vector<std::pair<unsigned, unsigned> > label_to_sm4_insn_num; 91 bool in_sub; 92 bool avoid_txf; 93 bool avoid_int; 94 95 sm4_to_tgsi_converter(struct sm4_program& program) 96 : program(program) 97 { 98 avoid_txf = true; 99 avoid_int = false; 100 } 101 102 struct ureg_dst _reg(sm4_op& op) 103 { 104 switch(op.file) 105 { 106 case SM4_FILE_NULL: 107 { 108 struct ureg_dst d; 109 memset(&d, 0, sizeof(d)); 110 d.File = TGSI_FILE_NULL; 111 return d; 112 } 113 case SM4_FILE_TEMP: 114 check(op.has_simple_index()); 115 check(op.indices[0].disp < temps.size()); 116 return temps[op.indices[0].disp]; 117 case SM4_FILE_OUTPUT: 118 check(op.has_simple_index()); 119 check(op.indices[0].disp < outputs.size()); 120 return outputs[op.indices[0].disp]; 121 default: 122 check(0); 123 return ureg_dst_undef(); 124 } 125 } 126 127 struct ureg_dst _dst(unsigned i = 0) 128 { 129 check(i < insn->num_ops); 130 sm4_op& op = *insn->ops[i]; 131 check(op.mode == SM4_OPERAND_MODE_MASK || op.mode == SM4_OPERAND_MODE_SCALAR); 132 struct ureg_dst d = ureg_writemask(_reg(op), op.mask); 133 if(insn->insn.sat) 134 d = ureg_saturate(d); 135 return d; 136 } 137 138 struct ureg_src _src(unsigned i) 139 { 140 check(i < insn->num_ops); 141 sm4_op& op = *insn->ops[i]; 142 struct ureg_src s; 143 switch(op.file) 144 { 145 case SM4_FILE_IMMEDIATE32: 146 s = ureg_imm4f(ureg, op.imm_values[0].f32, op.imm_values[1].f32, op.imm_values[2].f32, op.imm_values[3].f32); 147 break; 148 case SM4_FILE_INPUT: 149 check(op.is_index_simple(0)); 150 check(op.num_indices == 1 || op.num_indices == 2); 151 // TODO: is this correct, or are incorrectly swapping the two indices in the GS case? 152 check(op.indices[op.num_indices - 1].disp < inputs.size()); 153 s = inputs[op.indices[op.num_indices - 1].disp]; 154 if(op.num_indices == 2) 155 { 156 s.Dimension = 1; 157 s.DimensionIndex = op.indices[0].disp; 158 } 159 break; 160 case SM4_FILE_CONSTANT_BUFFER: 161 // TODO: indirect addressing 162 check(op.num_indices == 2); 163 check(op.is_index_simple(0)); 164 check(op.is_index_simple(1)); 165 s = ureg_src_register(TGSI_FILE_CONSTANT, (unsigned)op.indices[1].disp); 166 s.Dimension = 1; 167 s.DimensionIndex = op.indices[0].disp; 168 break; 169 default: 170 s = ureg_src(_reg(op)); 171 break; 172 } 173 if(op.mode == SM4_OPERAND_MODE_SWIZZLE || op.mode == SM4_OPERAND_MODE_SCALAR) 174 s = ureg_swizzle(s, op.swizzle[0], op.swizzle[1], op.swizzle[2], op.swizzle[3]); 175 else 176 { 177 /* immediates are masked to show needed values */ 178 check(op.file == SM4_FILE_IMMEDIATE32 || op.file == SM4_FILE_IMMEDIATE64); 179 } 180 if(op.abs) 181 s = ureg_abs(s); 182 if(op.neg) 183 s = ureg_negate(s); 184 return s; 185 }; 186 187 int _idx(sm4_file file, unsigned i = 0) 188 { 189 check(i < insn->num_ops); 190 sm4_op& op = *insn->ops[i]; 191 check(op.file == file); 192 check(op.has_simple_index()); 193 return (int)op.indices[0].disp; 194 } 195 196 unsigned tex_target(unsigned resource, unsigned sampler) 197 { 198 unsigned shadow = sampler_modes[sampler]; 199 unsigned target = shadow ? targets[resource].second : targets[resource].first; 200 check(target); 201 return target; 202 } 203 204 enum pipe_type res_return_type(unsigned type) 205 { 206 switch(type) 207 { 208 case D3D_RETURN_TYPE_UNORM: return PIPE_TYPE_UNORM; 209 case D3D_RETURN_TYPE_SNORM: return PIPE_TYPE_SNORM; 210 case D3D_RETURN_TYPE_SINT: return PIPE_TYPE_SINT; 211 case D3D_RETURN_TYPE_UINT: return PIPE_TYPE_UINT; 212 case D3D_RETURN_TYPE_FLOAT: return PIPE_TYPE_FLOAT; 213 default: 214 fail("invalid resource return type"); 215 return PIPE_TYPE_FLOAT; 216 } 217 } 218 219 std::vector<struct ureg_dst> insn_tmps; 220 221 struct ureg_dst _tmp() 222 { 223 struct ureg_dst t = ureg_DECL_temporary(ureg); 224 insn_tmps.push_back(t); 225 return t; 226 } 227 228 struct ureg_dst _tmp(struct ureg_dst d) 229 { 230 if(d.File == TGSI_FILE_TEMPORARY) 231 return d; 232 else 233 return ureg_writemask(_tmp(), d.WriteMask); 234 } 235 236 #define OP1_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1)); break 237 #define OP2_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2)); break 238 #define OP3_(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, _dst(), _src(1), _src(2), _src(3)); break 239 #define OP1(n) OP1_(n, n) 240 #define OP2(n) OP2_(n, n) 241 #define OP3(n) OP3_(n, n) 242 #define OP_CF(d, g) case SM4_OPCODE_##d: ureg_##g(ureg, &label); label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num])); break; 243 244 void translate_insns(unsigned begin, unsigned end) 245 { 246 for(unsigned insn_num = begin; insn_num < end; ++insn_num) 247 { 248 sm4_to_tgsi_insn_num[insn_num] = ureg_get_instruction_number(ureg); 249 unsigned label; 250 insn = program.insns[insn_num]; 251 bool ok; 252 ok = true; 253 switch(insn->opcode) 254 { 255 // trivial instructions 256 case SM4_OPCODE_NOP: 257 break; 258 OP1(MOV); 259 260 // float 261 OP2(ADD); 262 OP2(MUL); 263 OP3(MAD); 264 OP2(DIV); 265 OP1(FRC); 266 OP1(RCP); 267 OP2(MIN); 268 OP2(MAX); 269 OP2_(LT, SLT); 270 OP2_(GE, SGE); 271 OP2_(EQ, SEQ); 272 OP2_(NE, SNE); 273 274 // bitwise 275 OP1(NOT); 276 OP2(AND); 277 OP2(OR); 278 OP2(XOR); 279 280 // special mathematical 281 OP2(DP2); 282 OP2(DP3); 283 OP2(DP4); 284 OP1(RSQ); 285 OP1_(LOG, LG2); 286 OP1_(EXP, EX2); 287 288 // rounding 289 OP1_(ROUND_NE, ROUND); 290 OP1_(ROUND_Z, TRUNC); 291 OP1_(ROUND_PI, CEIL); 292 OP1_(ROUND_NI, FLR); 293 294 // cross-thread 295 OP1_(DERIV_RTX, DDX); 296 OP1_(DERIV_RTX_COARSE, DDX); 297 OP1_(DERIV_RTX_FINE, DDX); 298 OP1_(DERIV_RTY, DDY); 299 OP1_(DERIV_RTY_COARSE, DDY); 300 OP1_(DERIV_RTY_FINE, DDY); 301 case SM4_OPCODE_EMIT: 302 ureg_EMIT(ureg); 303 break; 304 case SM4_OPCODE_CUT: 305 ureg_ENDPRIM(ureg); 306 break; 307 case SM4_OPCODE_EMITTHENCUT: 308 ureg_EMIT(ureg); 309 ureg_ENDPRIM(ureg); 310 break; 311 312 // non-trivial instructions 313 case SM4_OPCODE_MOVC: 314 /* CMP checks for < 0, but MOVC checks for != 0 315 * but fortunately, x != 0 is equivalent to -abs(x) < 0 316 * XXX: can test_nz apply to this?! 317 */ 318 ureg_CMP(ureg, _dst(), ureg_negate(ureg_abs(_src(1))), _src(2), _src(3)); 319 break; 320 case SM4_OPCODE_SQRT: 321 { 322 struct ureg_dst d = _dst(); 323 struct ureg_dst t = _tmp(d); 324 ureg_RSQ(ureg, t, _src(1)); 325 ureg_RCP(ureg, d, ureg_src(t)); 326 break; 327 } 328 case SM4_OPCODE_SINCOS: 329 { 330 struct ureg_dst s = _dst(0); 331 struct ureg_dst c = _dst(1); 332 struct ureg_src v = _src(2); 333 if(s.File != TGSI_FILE_NULL) 334 ureg_SIN(ureg, s, v); 335 if(c.File != TGSI_FILE_NULL) 336 ureg_COS(ureg, c, v); 337 break; 338 } 339 340 // control flow 341 case SM4_OPCODE_DISCARD: 342 ureg_KIL(ureg, _src(0)); 343 break; 344 OP_CF(LOOP, BGNLOOP); 345 OP_CF(ENDLOOP, ENDLOOP); 346 case SM4_OPCODE_BREAK: 347 ureg_BRK(ureg); 348 break; 349 case SM4_OPCODE_BREAKC: 350 // XXX: can test_nz apply to this?! 351 ureg_BREAKC(ureg, _src(0)); 352 break; 353 case SM4_OPCODE_CONTINUE: 354 ureg_CONT(ureg); 355 break; 356 case SM4_OPCODE_CONTINUEC: 357 // XXX: can test_nz apply to this?! 358 ureg_IF(ureg, _src(0), &label); 359 ureg_CONT(ureg); 360 ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg)); 361 ureg_ENDIF(ureg); 362 break; 363 case SM4_OPCODE_SWITCH: 364 ureg_SWITCH(ureg, _src(0)); 365 break; 366 case SM4_OPCODE_CASE: 367 ureg_CASE(ureg, _src(0)); 368 break; 369 case SM4_OPCODE_DEFAULT: 370 ureg_DEFAULT(ureg); 371 break; 372 case SM4_OPCODE_ENDSWITCH: 373 ureg_ENDSWITCH(ureg); 374 break; 375 case SM4_OPCODE_CALL: 376 ureg_CAL(ureg, &label); 377 label_to_sm4_insn_num.push_back(std::make_pair(label, program.label_to_insn_num[_idx(SM4_FILE_LABEL)])); 378 break; 379 case SM4_OPCODE_LABEL: 380 if(in_sub) 381 ureg_ENDSUB(ureg); 382 else 383 ureg_END(ureg); 384 ureg_BGNSUB(ureg); 385 in_sub = true; 386 break; 387 case SM4_OPCODE_RET: 388 if(in_sub || insn_num != (program.insns.size() - 1)) 389 ureg_RET(ureg); 390 break; 391 case SM4_OPCODE_RETC: 392 ureg_IF(ureg, _src(0), &label); 393 if(insn->insn.test_nz) 394 ureg_RET(ureg); 395 ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg)); 396 if(!insn->insn.test_nz) 397 { 398 ureg_ELSE(ureg, &label); 399 ureg_RET(ureg); 400 ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg)); 401 } 402 ureg_ENDIF(ureg); 403 break; 404 OP_CF(ELSE, ELSE); 405 case SM4_OPCODE_ENDIF: 406 ureg_ENDIF(ureg); 407 break; 408 case SM4_OPCODE_IF: 409 if(insn->insn.test_nz) 410 { 411 ureg_IF(ureg, _src(0), &label); 412 label_to_sm4_insn_num.push_back(std::make_pair(label, program.cf_insn_linked[insn_num])); 413 } 414 else 415 { 416 unsigned linked = program.cf_insn_linked[insn_num]; 417 if(program.insns[linked]->opcode == SM4_OPCODE_ENDIF) 418 { 419 ureg_IF(ureg, _src(0), &label); 420 ureg_fixup_label(ureg, label, ureg_get_instruction_number(ureg)); 421 ureg_ELSE(ureg, &label); 422 label_to_sm4_insn_num.push_back(std::make_pair(label, linked)); 423 } 424 else 425 { 426 /* we have to swap the branches in this case (fun!) 427 * TODO: maybe just emit a SEQ 0? 428 * */ 429 unsigned endif = program.cf_insn_linked[linked]; 430 431 ureg_IF(ureg, _src(0), &label); 432 label_to_sm4_insn_num.push_back(std::make_pair(label, linked)); 433 434 translate_insns(linked + 1, endif); 435 436 sm4_to_tgsi_insn_num[linked] = ureg_get_instruction_number(ureg); 437 ureg_ELSE(ureg, &label); 438 label_to_sm4_insn_num.push_back(std::make_pair(label, endif)); 439 440 translate_insns(insn_num + 1, linked); 441 442 insn_num = endif - 1; 443 goto next; 444 } 445 } 446 break; 447 case SM4_OPCODE_RESINFO: 448 // TODO: return type 449 ureg_SVIEWINFO(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]); 450 break; 451 // TODO: sample index, texture offset 452 case SM4_OPCODE_LD: // dst, coord_int, res; mipmap level in last coord_int arg 453 ureg_LOAD(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]); 454 break; 455 case SM4_OPCODE_LD_MS: 456 ureg_LOAD_MS(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)]); 457 break; 458 case SM4_OPCODE_SAMPLE: // dst, coord, res, samp 459 ureg_SAMPLE(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)]); 460 break; 461 case SM4_OPCODE_SAMPLE_B: // dst, coord, res, samp, bias.x 462 ureg_SAMPLE_B(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4)); 463 break; 464 case SM4_OPCODE_SAMPLE_C: // dst, coord, res, samp, comp.x 465 ureg_SAMPLE_C(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4)); 466 break; 467 case SM4_OPCODE_SAMPLE_C_LZ: // dst, coord, res, samp, comp.x 468 ureg_SAMPLE_C_LZ(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4)); 469 break; 470 case SM4_OPCODE_SAMPLE_D: // dst, coord, res, samp, ddx, ddy 471 ureg_SAMPLE_D(ureg, _dst(), _src(1), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)], _src(4), _src(5)); 472 break; 473 case SM4_OPCODE_SAMPLE_L: // dst, coord, res, samp, bias.x 474 { 475 struct ureg_dst tmp = _tmp(); 476 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), _src(1)); 477 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_swizzle(_src(4), 0, 0, 0, 0)); 478 ureg_SAMPLE_L(ureg, _dst(), ureg_src(tmp), resources[_idx(SM4_FILE_RESOURCE, 2)], samplers[_idx(SM4_FILE_SAMPLER, 3)]); 479 break; 480 } 481 default: 482 ok = false; 483 break; 484 } 485 486 if(!ok && !avoid_int) 487 { 488 ok = true; 489 switch(insn->opcode) 490 { 491 // integer 492 OP1_(ITOF, I2F); 493 OP1_(FTOI, F2I); 494 OP2_(IADD, UADD); 495 OP1(INEG); 496 OP2_(IMUL, UMUL); 497 OP3_(IMAD, UMAD); 498 OP2_(ISHL, SHL); 499 OP2_(ISHR, ISHR); 500 OP2(IMIN); 501 OP2(IMAX); 502 OP2_(ILT, ISLT); 503 OP2_(IGE, ISGE); 504 OP2_(IEQ, USEQ); 505 OP2_(INE, USNE); 506 507 // unsigned 508 OP1_(UTOF, U2F); 509 OP1_(FTOU, F2U); 510 OP2(UMUL); 511 OP3(UMAD); 512 OP2(UMIN); 513 OP2(UMAX); 514 OP2_(ULT, USLT); 515 OP2_(UGE, USGE); 516 OP2(USHR); 517 518 case SM4_OPCODE_UDIV: 519 { 520 struct ureg_dst q = _dst(0); 521 struct ureg_dst r = _dst(1); 522 struct ureg_src a = _src(2); 523 struct ureg_src b = _src(3); 524 if(q.File != TGSI_FILE_NULL) 525 ureg_UDIV(ureg, q, a, b); 526 if(r.File != TGSI_FILE_NULL) 527 ureg_UMOD(ureg, r, a, b); 528 break; 529 } 530 default: 531 ok = false; 532 } 533 } 534 535 if(!ok && avoid_int) 536 { 537 ok = true; 538 switch(insn->opcode) 539 { 540 case SM4_OPCODE_ITOF: 541 case SM4_OPCODE_UTOF: 542 break; 543 OP1_(FTOI, TRUNC); 544 OP1_(FTOU, FLR); 545 // integer 546 OP2_(IADD, ADD); 547 OP2_(IMUL, MUL); 548 OP3_(IMAD, MAD); 549 OP2_(MIN, MIN); 550 OP2_(MAX, MAX); 551 OP2_(ILT, SLT); 552 OP2_(IGE, SGE); 553 OP2_(IEQ, SEQ); 554 OP2_(INE, SNE); 555 556 // unsigned 557 OP2_(UMUL, MUL); 558 OP3_(UMAD, MAD); 559 OP2_(UMIN, MIN); 560 OP2_(UMAX, MAX); 561 OP2_(ULT, SLT); 562 OP2_(UGE, SGE); 563 564 case SM4_OPCODE_INEG: 565 ureg_MOV(ureg, _dst(), ureg_negate(_src(1))); 566 break; 567 case SM4_OPCODE_ISHL: 568 { 569 struct ureg_dst d = _dst(); 570 struct ureg_dst t = _tmp(d); 571 ureg_EX2(ureg, t, _src(2)); 572 ureg_MUL(ureg, d, ureg_src(t), _src(1)); 573 break; 574 } 575 case SM4_OPCODE_ISHR: 576 case SM4_OPCODE_USHR: 577 { 578 struct ureg_dst d = _dst(); 579 struct ureg_dst t = _tmp(d); 580 ureg_EX2(ureg, t, ureg_negate(_src(2))); 581 ureg_MUL(ureg, t, ureg_src(t), _src(1)); 582 ureg_FLR(ureg, d, ureg_src(t)); 583 break; 584 } 585 case SM4_OPCODE_UDIV: 586 { 587 struct ureg_dst q = _dst(0); 588 struct ureg_dst r = _dst(1); 589 struct ureg_src a = _src(2); 590 struct ureg_src b = _src(3); 591 struct ureg_dst f = _tmp(); 592 ureg_DIV(ureg, f, a, b); 593 if(q.File != TGSI_FILE_NULL) 594 ureg_FLR(ureg, q, ureg_src(f)); 595 if(r.File != TGSI_FILE_NULL) 596 { 597 ureg_FRC(ureg, f, ureg_src(f)); 598 ureg_MUL(ureg, r, ureg_src(f), b); 599 } 600 break; 601 } 602 default: 603 ok = false; 604 } 605 } 606 607 check(ok); 608 609 if(!insn_tmps.empty()) 610 { 611 for(unsigned i = 0; i < insn_tmps.size(); ++i) 612 ureg_release_temporary(ureg, insn_tmps[i]); 613 insn_tmps.clear(); 614 } 615 next:; 616 } 617 } 618 619 void* do_translate() 620 { 621 unsigned processor; 622 switch(program.version.type) 623 { 624 case 0: 625 processor = TGSI_PROCESSOR_FRAGMENT; 626 break; 627 case 1: 628 processor = TGSI_PROCESSOR_VERTEX; 629 break; 630 case 2: 631 processor = TGSI_PROCESSOR_GEOMETRY; 632 break; 633 default: 634 fail("Tessellation and compute shaders not yet supported"); 635 return 0; 636 } 637 638 if(!sm4_link_cf_insns(program)) 639 fail("Malformed control flow"); 640 if(!sm4_find_labels(program)) 641 fail("Failed to locate labels"); 642 643 ureg = ureg_create(processor); 644 645 in_sub = false; 646 647 sm4_to_tgsi_insn_num.resize(program.insns.size()); 648 for(unsigned insn_num = 0; insn_num < program.dcls.size(); ++insn_num) 649 { 650 sm4_dcl& dcl = *program.dcls[insn_num]; 651 int idx = -1; 652 if(dcl.op.get() && dcl.op->is_index_simple(0)) 653 idx = dcl.op->indices[0].disp; 654 switch(dcl.opcode) 655 { 656 case SM4_OPCODE_DCL_GLOBAL_FLAGS: 657 break; 658 case SM4_OPCODE_DCL_TEMPS: 659 for(unsigned i = 0; i < dcl.num; ++i) 660 temps.push_back(ureg_DECL_temporary(ureg)); 661 break; 662 case SM4_OPCODE_DCL_INPUT: 663 check(idx >= 0); 664 if(processor == TGSI_PROCESSOR_VERTEX) 665 { 666 if(inputs.size() <= (unsigned)idx) 667 inputs.resize(idx + 1); 668 inputs[idx] = ureg_DECL_vs_input(ureg, idx); 669 } 670 else if(processor == TGSI_PROCESSOR_GEOMETRY) 671 { 672 // TODO: is this correct? 673 unsigned gsidx = dcl.op->indices[1].disp; 674 if(inputs.size() <= (unsigned)gsidx) 675 inputs.resize(gsidx + 1); 676 inputs[gsidx] = ureg_DECL_gs_input(ureg, gsidx, TGSI_SEMANTIC_GENERIC, gsidx); 677 } 678 else 679 check(0); 680 break; 681 case SM4_OPCODE_DCL_INPUT_PS: 682 check(idx >= 0); 683 if(inputs.size() <= (unsigned)idx) 684 inputs.resize(idx + 1); 685 inputs[idx] = ureg_DECL_fs_input_cyl_centroid(ureg, TGSI_SEMANTIC_GENERIC, idx, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation].interpolation, 0, sm4_to_pipe_interpolation[dcl.dcl_input_ps.interpolation].centroid); 686 break; 687 case SM4_OPCODE_DCL_OUTPUT: 688 check(idx >= 0); 689 if(outputs.size() <= (unsigned)idx) 690 outputs.resize(idx + 1); 691 if(processor == TGSI_PROCESSOR_FRAGMENT) 692 outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, idx); 693 else 694 outputs[idx] = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, idx); 695 break; 696 case SM4_OPCODE_DCL_INPUT_SIV: 697 case SM4_OPCODE_DCL_INPUT_SGV: 698 case SM4_OPCODE_DCL_INPUT_PS_SIV: 699 case SM4_OPCODE_DCL_INPUT_PS_SGV: 700 check(idx >= 0); 701 if(inputs.size() <= (unsigned)idx) 702 inputs.resize(idx + 1); 703 // TODO: is this correct? 704 inputs[idx] = ureg_DECL_system_value(ureg, idx, sm4_to_pipe_sv[dcl.sv], 0); 705 break; 706 case SM4_OPCODE_DCL_OUTPUT_SIV: 707 case SM4_OPCODE_DCL_OUTPUT_SGV: 708 check(idx >= 0); 709 if(outputs.size() <= (unsigned)idx) 710 outputs.resize(idx + 1); 711 check(sm4_to_pipe_sv[dcl.sv] >= 0); 712 outputs[idx] = ureg_DECL_output(ureg, sm4_to_pipe_sv[dcl.sv], 0); 713 break; 714 case SM4_OPCODE_DCL_RESOURCE: 715 check(idx >= 0); 716 if(targets.size() <= (unsigned)idx) 717 targets.resize(idx + 1); 718 switch(dcl.dcl_resource.target) 719 { 720 case SM4_TARGET_TEXTURE1D: 721 targets[idx].first = TGSI_TEXTURE_1D; 722 targets[idx].second = TGSI_TEXTURE_SHADOW1D; 723 break; 724 case SM4_TARGET_TEXTURE1DARRAY: 725 targets[idx].first = TGSI_TEXTURE_1D_ARRAY; 726 targets[idx].second = TGSI_TEXTURE_SHADOW1D_ARRAY; 727 break; 728 case SM4_TARGET_TEXTURE2D: 729 targets[idx].first = TGSI_TEXTURE_2D; 730 targets[idx].second = TGSI_TEXTURE_SHADOW2D; 731 break; 732 case SM4_TARGET_TEXTURE2DARRAY: 733 targets[idx].first = TGSI_TEXTURE_2D_ARRAY; 734 targets[idx].second = TGSI_TEXTURE_SHADOW2D_ARRAY; 735 break; 736 case SM4_TARGET_TEXTURE3D: 737 targets[idx].first = TGSI_TEXTURE_3D; 738 targets[idx].second = 0; 739 break; 740 case SM4_TARGET_TEXTURECUBE: 741 targets[idx].first = TGSI_TEXTURE_CUBE; 742 targets[idx].second = 0; 743 break; 744 default: 745 // HACK to make SimpleSample10 work 746 //check(0); 747 targets[idx].first = TGSI_TEXTURE_2D; 748 targets[idx].second = TGSI_TEXTURE_SHADOW2D; 749 break; 750 } 751 if(resources.size() <= (unsigned)idx) 752 resources.resize(idx + 1); 753 resources[idx] = ureg_DECL_sampler_view( 754 ureg, idx, targets[idx].first, 755 res_return_type(dcl.rrt.x), 756 res_return_type(dcl.rrt.y), 757 res_return_type(dcl.rrt.z), 758 res_return_type(dcl.rrt.w)); 759 break; 760 case SM4_OPCODE_DCL_SAMPLER: 761 check(idx >= 0); 762 if(sampler_modes.size() <= (unsigned)idx) 763 sampler_modes.resize(idx + 1); 764 check(!dcl.dcl_sampler.mono); 765 sampler_modes[idx] = dcl.dcl_sampler.shadow; 766 if(samplers.size() <= (unsigned)idx) 767 samplers.resize(idx + 1); 768 samplers[idx] = ureg_DECL_sampler(ureg, idx); 769 break; 770 case SM4_OPCODE_DCL_CONSTANT_BUFFER: 771 check(dcl.op->num_indices == 2); 772 check(dcl.op->is_index_simple(0)); 773 check(dcl.op->is_index_simple(1)); 774 idx = dcl.op->indices[0].disp; 775 ureg_DECL_constant2D(ureg, 0, (unsigned)dcl.op->indices[1].disp - 1, idx); 776 break; 777 case SM4_OPCODE_DCL_GS_INPUT_PRIMITIVE: 778 ureg_property_gs_input_prim(ureg, d3d_to_pipe_prim_type[dcl.dcl_gs_input_primitive.primitive]); 779 break; 780 case SM4_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: 781 ureg_property_gs_output_prim(ureg, d3d_to_pipe_prim[dcl.dcl_gs_output_primitive_topology.primitive_topology]); 782 break; 783 case SM4_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: 784 ureg_property_gs_max_vertices(ureg, dcl.num); 785 break; 786 default: 787 check(0); 788 } 789 } 790 791 translate_insns(0, program.insns.size()); 792 sm4_to_tgsi_insn_num.push_back(ureg_get_instruction_number(ureg)); 793 if(in_sub) 794 ureg_ENDSUB(ureg); 795 else 796 ureg_END(ureg); 797 798 for(unsigned i = 0; i < label_to_sm4_insn_num.size(); ++i) 799 ureg_fixup_label(ureg, label_to_sm4_insn_num[i].first, sm4_to_tgsi_insn_num[label_to_sm4_insn_num[i].second]); 800 801 const struct tgsi_token * tokens = ureg_get_tokens(ureg, 0); 802 ureg_destroy(ureg); 803 return (void*)tokens; 804 } 805 806 void* translate() 807 { 808 try 809 { 810 return do_translate(); 811 } 812 catch(const char*) 813 { 814 return 0; 815 } 816 } 817 }; 818 819 void* sm4_to_tgsi(struct sm4_program& program) 820 { 821 sm4_to_tgsi_converter conv(program); 822 return conv.translate(); 823 } 824 825 void* sm4_to_tgsi_linkage_only(struct sm4_program& prog) 826 { 827 struct ureg_program* ureg = ureg_create(TGSI_PROCESSOR_GEOMETRY); 828 829 uint64_t already = 0; 830 for(unsigned n = 0, i = 0; i < prog.num_params_out; ++i) 831 { 832 unsigned sn, si; 833 834 if(already & (1ULL << prog.params_out[i].Register)) 835 continue; 836 already |= 1ULL << prog.params_out[i].Register; 837 838 switch(prog.params_out[i].SystemValueType) 839 { 840 case D3D_NAME_UNDEFINED: 841 sn = TGSI_SEMANTIC_GENERIC; 842 si = n++; 843 break; 844 case D3D_NAME_CULL_DISTANCE: 845 case D3D_NAME_CLIP_DISTANCE: 846 // FIXME 847 sn = 0; 848 si = prog.params_out[i].SemanticIndex; 849 assert(0); 850 break; 851 default: 852 continue; 853 } 854 855 ureg_DECL_output(ureg, sn, si); 856 } 857 858 const struct tgsi_token* tokens = ureg_get_tokens(ureg, 0); 859 ureg_destroy(ureg); 860 return (void*)tokens; 861 } 862