1 /* 2 * Copyright 2011 Joakim Sindholt <opensource (at) zhasha.com> 3 * Copyright 2013 Christoph Bumiller 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */ 23 24 #include "nine_shader.h" 25 26 #include "device9.h" 27 #include "nine_debug.h" 28 #include "nine_state.h" 29 #include "vertexdeclaration9.h" 30 31 #include "util/macros.h" 32 #include "util/u_memory.h" 33 #include "util/u_inlines.h" 34 #include "pipe/p_shader_tokens.h" 35 #include "tgsi/tgsi_ureg.h" 36 #include "tgsi/tgsi_dump.h" 37 38 #define DBG_CHANNEL DBG_SHADER 39 40 #define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args) 41 42 43 struct shader_translator; 44 45 typedef HRESULT (*translate_instruction_func)(struct shader_translator *); 46 47 static inline const char *d3dsio_to_string(unsigned opcode); 48 49 50 #define NINED3D_SM1_VS 0xfffe 51 #define NINED3D_SM1_PS 0xffff 52 53 #define NINE_MAX_COND_DEPTH 64 54 #define NINE_MAX_LOOP_DEPTH 64 55 56 #define NINED3DSP_END 0x0000ffff 57 58 #define NINED3DSPTYPE_FLOAT4 0 59 #define NINED3DSPTYPE_INT4 1 60 #define NINED3DSPTYPE_BOOL 2 61 62 #define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1) 63 64 #define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL 65 #define NINED3DSP_WRITEMASK_SHIFT 16 66 67 #define NINED3DSHADER_INST_PREDICATED (1 << 28) 68 69 #define NINED3DSHADER_REL_OP_GT 1 70 #define NINED3DSHADER_REL_OP_EQ 2 71 #define NINED3DSHADER_REL_OP_GE 3 72 #define NINED3DSHADER_REL_OP_LT 4 73 #define NINED3DSHADER_REL_OP_NE 5 74 #define NINED3DSHADER_REL_OP_LE 6 75 76 #define NINED3DSIO_OPCODE_FLAGS_SHIFT 16 77 #define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT) 78 79 #define NINED3DSI_TEXLD_PROJECT 0x1 80 #define NINED3DSI_TEXLD_BIAS 0x2 81 82 #define NINED3DSP_WRITEMASK_0 0x1 83 #define NINED3DSP_WRITEMASK_1 0x2 84 #define NINED3DSP_WRITEMASK_2 0x4 85 #define NINED3DSP_WRITEMASK_3 0x8 86 #define NINED3DSP_WRITEMASK_ALL 0xf 87 88 #define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6)) 89 90 #define NINE_SWIZZLE4(x,y,z,w) \ 91 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w 92 93 #define NINE_CONSTANT_SRC(index) \ 94 ureg_src_register(TGSI_FILE_CONSTANT, index) 95 96 #define NINE_APPLY_SWIZZLE(src, s) \ 97 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s)) 98 99 #define NINE_CONSTANT_SRC_SWIZZLE(index, s) \ 100 NINE_APPLY_SWIZZLE(NINE_CONSTANT_SRC(index), s) 101 102 #define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT) 103 #define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT) 104 #define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT) 105 106 /* 107 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4 108 * BIAS <= PS 1.4 (x-0.5) 109 * BIASNEG <= PS 1.4 (-(x-0.5)) 110 * SIGN <= PS 1.4 (2(x-0.5)) 111 * SIGNNEG <= PS 1.4 (-2(x-0.5)) 112 * COMP <= PS 1.4 (1-x) 113 * X2 = PS 1.4 (2x) 114 * X2NEG = PS 1.4 (-2x) 115 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11 116 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11 117 * ABS >= SM 3.0 (abs(x)) 118 * ABSNEG >= SM 3.0 (-abs(x)) 119 * NOT >= SM 2.0 pedication only 120 */ 121 #define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT) 122 #define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT) 123 #define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT) 124 #define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT) 125 #define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT) 126 #define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT) 127 #define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT) 128 #define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT) 129 #define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT) 130 #define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT) 131 #define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT) 132 #define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT) 133 #define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT) 134 #define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT) 135 136 static const char *sm1_mod_str[] = 137 { 138 [NINED3DSPSM_NONE] = "", 139 [NINED3DSPSM_NEG] = "-", 140 [NINED3DSPSM_BIAS] = "bias", 141 [NINED3DSPSM_BIASNEG] = "biasneg", 142 [NINED3DSPSM_SIGN] = "sign", 143 [NINED3DSPSM_SIGNNEG] = "signneg", 144 [NINED3DSPSM_COMP] = "comp", 145 [NINED3DSPSM_X2] = "x2", 146 [NINED3DSPSM_X2NEG] = "x2neg", 147 [NINED3DSPSM_DZ] = "dz", 148 [NINED3DSPSM_DW] = "dw", 149 [NINED3DSPSM_ABS] = "abs", 150 [NINED3DSPSM_ABSNEG] = "-abs", 151 [NINED3DSPSM_NOT] = "not" 152 }; 153 154 static void 155 sm1_dump_writemask(BYTE mask) 156 { 157 if (mask & 1) DUMP("x"); else DUMP("_"); 158 if (mask & 2) DUMP("y"); else DUMP("_"); 159 if (mask & 4) DUMP("z"); else DUMP("_"); 160 if (mask & 8) DUMP("w"); else DUMP("_"); 161 } 162 163 static void 164 sm1_dump_swizzle(BYTE s) 165 { 166 char c[4] = { 'x', 'y', 'z', 'w' }; 167 DUMP("%c%c%c%c", 168 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]); 169 } 170 171 static const char sm1_file_char[] = 172 { 173 [D3DSPR_TEMP] = 'r', 174 [D3DSPR_INPUT] = 'v', 175 [D3DSPR_CONST] = 'c', 176 [D3DSPR_ADDR] = 'A', 177 [D3DSPR_RASTOUT] = 'R', 178 [D3DSPR_ATTROUT] = 'D', 179 [D3DSPR_OUTPUT] = 'o', 180 [D3DSPR_CONSTINT] = 'I', 181 [D3DSPR_COLOROUT] = 'C', 182 [D3DSPR_DEPTHOUT] = 'D', 183 [D3DSPR_SAMPLER] = 's', 184 [D3DSPR_CONST2] = 'c', 185 [D3DSPR_CONST3] = 'c', 186 [D3DSPR_CONST4] = 'c', 187 [D3DSPR_CONSTBOOL] = 'B', 188 [D3DSPR_LOOP] = 'L', 189 [D3DSPR_TEMPFLOAT16] = 'h', 190 [D3DSPR_MISCTYPE] = 'M', 191 [D3DSPR_LABEL] = 'X', 192 [D3DSPR_PREDICATE] = 'p' 193 }; 194 195 static void 196 sm1_dump_reg(BYTE file, INT index) 197 { 198 switch (file) { 199 case D3DSPR_LOOP: 200 DUMP("aL"); 201 break; 202 case D3DSPR_COLOROUT: 203 DUMP("oC%i", index); 204 break; 205 case D3DSPR_DEPTHOUT: 206 DUMP("oDepth"); 207 break; 208 case D3DSPR_RASTOUT: 209 DUMP("oRast%i", index); 210 break; 211 case D3DSPR_CONSTINT: 212 DUMP("iconst[%i]", index); 213 break; 214 case D3DSPR_CONSTBOOL: 215 DUMP("bconst[%i]", index); 216 break; 217 default: 218 DUMP("%c%i", sm1_file_char[file], index); 219 break; 220 } 221 } 222 223 struct sm1_src_param 224 { 225 INT idx; 226 struct sm1_src_param *rel; 227 BYTE file; 228 BYTE swizzle; 229 BYTE mod; 230 BYTE type; 231 union { 232 DWORD d[4]; 233 float f[4]; 234 int i[4]; 235 BOOL b; 236 } imm; 237 }; 238 static void 239 sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *); 240 241 struct sm1_dst_param 242 { 243 INT idx; 244 struct sm1_src_param *rel; 245 BYTE file; 246 BYTE mask; 247 BYTE mod; 248 int8_t shift; /* sint4 */ 249 BYTE type; 250 }; 251 252 static inline void 253 assert_replicate_swizzle(const struct ureg_src *reg) 254 { 255 assert(reg->SwizzleY == reg->SwizzleX && 256 reg->SwizzleZ == reg->SwizzleX && 257 reg->SwizzleW == reg->SwizzleX); 258 } 259 260 static void 261 sm1_dump_immediate(const struct sm1_src_param *param) 262 { 263 switch (param->type) { 264 case NINED3DSPTYPE_FLOAT4: 265 DUMP("{ %f %f %f %f }", 266 param->imm.f[0], param->imm.f[1], 267 param->imm.f[2], param->imm.f[3]); 268 break; 269 case NINED3DSPTYPE_INT4: 270 DUMP("{ %i %i %i %i }", 271 param->imm.i[0], param->imm.i[1], 272 param->imm.i[2], param->imm.i[3]); 273 break; 274 case NINED3DSPTYPE_BOOL: 275 DUMP("%s", param->imm.b ? "TRUE" : "FALSE"); 276 break; 277 default: 278 assert(0); 279 break; 280 } 281 } 282 283 static void 284 sm1_dump_src_param(const struct sm1_src_param *param) 285 { 286 if (param->file == NINED3DSPR_IMMEDIATE) { 287 assert(!param->mod && 288 !param->rel && 289 param->swizzle == NINED3DSP_NOSWIZZLE); 290 sm1_dump_immediate(param); 291 return; 292 } 293 294 if (param->mod) 295 DUMP("%s(", sm1_mod_str[param->mod]); 296 if (param->rel) { 297 DUMP("%c[", sm1_file_char[param->file]); 298 sm1_dump_src_param(param->rel); 299 DUMP("+%i]", param->idx); 300 } else { 301 sm1_dump_reg(param->file, param->idx); 302 } 303 if (param->mod) 304 DUMP(")"); 305 if (param->swizzle != NINED3DSP_NOSWIZZLE) { 306 DUMP("."); 307 sm1_dump_swizzle(param->swizzle); 308 } 309 } 310 311 static void 312 sm1_dump_dst_param(const struct sm1_dst_param *param) 313 { 314 if (param->mod & NINED3DSPDM_SATURATE) 315 DUMP("sat "); 316 if (param->mod & NINED3DSPDM_PARTIALP) 317 DUMP("pp "); 318 if (param->mod & NINED3DSPDM_CENTROID) 319 DUMP("centroid "); 320 if (param->shift < 0) 321 DUMP("/%u ", 1 << -param->shift); 322 if (param->shift > 0) 323 DUMP("*%u ", 1 << param->shift); 324 325 if (param->rel) { 326 DUMP("%c[", sm1_file_char[param->file]); 327 sm1_dump_src_param(param->rel); 328 DUMP("+%i]", param->idx); 329 } else { 330 sm1_dump_reg(param->file, param->idx); 331 } 332 if (param->mask != NINED3DSP_WRITEMASK_ALL) { 333 DUMP("."); 334 sm1_dump_writemask(param->mask); 335 } 336 } 337 338 struct sm1_semantic 339 { 340 struct sm1_dst_param reg; 341 BYTE sampler_type; 342 D3DDECLUSAGE usage; 343 BYTE usage_idx; 344 }; 345 346 struct sm1_op_info 347 { 348 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter 349 * should be ignored completely */ 350 unsigned sio; 351 unsigned opcode; /* TGSI_OPCODE_x */ 352 353 /* versions are still set even handler is set */ 354 struct { 355 unsigned min; 356 unsigned max; 357 } vert_version, frag_version; 358 359 /* number of regs parsed outside of special handler */ 360 unsigned ndst; 361 unsigned nsrc; 362 363 /* some instructions don't map perfectly, so use a special handler */ 364 translate_instruction_func handler; 365 }; 366 367 struct sm1_instruction 368 { 369 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; 370 BYTE flags; 371 BOOL coissue; 372 BOOL predicated; 373 BYTE ndst; 374 BYTE nsrc; 375 struct sm1_src_param src[4]; 376 struct sm1_src_param src_rel[4]; 377 struct sm1_src_param pred; 378 struct sm1_src_param dst_rel[1]; 379 struct sm1_dst_param dst[1]; 380 381 struct sm1_op_info *info; 382 }; 383 384 static void 385 sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent) 386 { 387 unsigned i; 388 389 /* no info stored for these: */ 390 if (insn->opcode == D3DSIO_DCL) 391 return; 392 for (i = 0; i < indent; ++i) 393 DUMP(" "); 394 395 if (insn->predicated) { 396 DUMP("@"); 397 sm1_dump_src_param(&insn->pred); 398 DUMP(" "); 399 } 400 DUMP("%s", d3dsio_to_string(insn->opcode)); 401 if (insn->flags) { 402 switch (insn->opcode) { 403 case D3DSIO_TEX: 404 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b"); 405 break; 406 default: 407 DUMP("_%x", insn->flags); 408 break; 409 } 410 } 411 if (insn->coissue) 412 DUMP("_co"); 413 DUMP(" "); 414 415 for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) { 416 sm1_dump_dst_param(&insn->dst[i]); 417 DUMP(" "); 418 } 419 420 for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) { 421 sm1_dump_src_param(&insn->src[i]); 422 DUMP(" "); 423 } 424 if (insn->opcode == D3DSIO_DEF || 425 insn->opcode == D3DSIO_DEFI || 426 insn->opcode == D3DSIO_DEFB) 427 sm1_dump_immediate(&insn->src[0]); 428 429 DUMP("\n"); 430 } 431 432 struct sm1_local_const 433 { 434 INT idx; 435 struct ureg_src reg; 436 float f[4]; /* for indirect addressing of float constants */ 437 }; 438 439 struct shader_translator 440 { 441 const DWORD *byte_code; 442 const DWORD *parse; 443 const DWORD *parse_next; 444 445 struct ureg_program *ureg; 446 447 /* shader version */ 448 struct { 449 BYTE major; 450 BYTE minor; 451 } version; 452 unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */ 453 unsigned num_constf_allowed; 454 unsigned num_consti_allowed; 455 unsigned num_constb_allowed; 456 457 boolean native_integers; 458 boolean inline_subroutines; 459 boolean lower_preds; 460 boolean want_texcoord; 461 boolean shift_wpos; 462 boolean wpos_is_sysval; 463 boolean face_is_sysval_integer; 464 unsigned texcoord_sn; 465 466 struct sm1_instruction insn; /* current instruction */ 467 468 struct { 469 struct ureg_dst *r; 470 struct ureg_dst oPos; 471 struct ureg_dst oPos_out; /* the real output when doing streamout */ 472 struct ureg_dst oFog; 473 struct ureg_dst oPts; 474 struct ureg_dst oCol[4]; 475 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS]; 476 struct ureg_dst oDepth; 477 struct ureg_src v[PIPE_MAX_SHADER_INPUTS]; 478 struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */ 479 struct ureg_src vPos; 480 struct ureg_src vFace; 481 struct ureg_src s; 482 struct ureg_dst p; 483 struct ureg_dst address; 484 struct ureg_dst a0; 485 struct ureg_dst tS[8]; /* texture stage registers */ 486 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */ 487 struct ureg_dst t[5]; /* scratch TEMPs */ 488 struct ureg_src vC[2]; /* PS color in */ 489 struct ureg_src vT[8]; /* PS texcoord in */ 490 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */ 491 } regs; 492 unsigned num_temp; /* ARRAY_SIZE(regs.r) */ 493 unsigned num_scratch; 494 unsigned loop_depth; 495 unsigned loop_depth_max; 496 unsigned cond_depth; 497 unsigned loop_labels[NINE_MAX_LOOP_DEPTH]; 498 unsigned cond_labels[NINE_MAX_COND_DEPTH]; 499 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */ 500 501 unsigned *inst_labels; /* LABEL op */ 502 unsigned num_inst_labels; 503 504 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */ 505 506 struct sm1_local_const *lconstf; 507 unsigned num_lconstf; 508 struct sm1_local_const *lconsti; 509 unsigned num_lconsti; 510 struct sm1_local_const *lconstb; 511 unsigned num_lconstb; 512 513 boolean indirect_const_access; 514 boolean failure; 515 516 struct nine_vs_output_info output_info[16]; 517 int num_outputs; 518 519 struct nine_shader_info *info; 520 521 int16_t op_info_map[D3DSIO_BREAKP + 1]; 522 }; 523 524 #define IS_VS (tx->processor == PIPE_SHADER_VERTEX) 525 #define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT) 526 527 #define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;} 528 529 static void 530 sm1_read_semantic(struct shader_translator *, struct sm1_semantic *); 531 532 static void 533 sm1_instruction_check(const struct sm1_instruction *insn) 534 { 535 if (insn->opcode == D3DSIO_CRS) 536 { 537 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3) 538 { 539 DBG("CRS.mask.w\n"); 540 } 541 } 542 } 543 544 static void 545 nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex, 546 int mask, int output_index) 547 { 548 tx->output_info[tx->num_outputs].output_semantic = Usage; 549 tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex; 550 tx->output_info[tx->num_outputs].mask = mask; 551 tx->output_info[tx->num_outputs].output_index = output_index; 552 tx->num_outputs++; 553 } 554 555 static boolean 556 tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index) 557 { 558 INT i; 559 560 if (index < 0 || index >= tx->num_constf_allowed) { 561 tx->failure = TRUE; 562 return FALSE; 563 } 564 for (i = 0; i < tx->num_lconstf; ++i) { 565 if (tx->lconstf[i].idx == index) { 566 *src = tx->lconstf[i].reg; 567 return TRUE; 568 } 569 } 570 return FALSE; 571 } 572 static boolean 573 tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index) 574 { 575 int i; 576 577 if (index < 0 || index >= tx->num_consti_allowed) { 578 tx->failure = TRUE; 579 return FALSE; 580 } 581 for (i = 0; i < tx->num_lconsti; ++i) { 582 if (tx->lconsti[i].idx == index) { 583 *src = tx->lconsti[i].reg; 584 return TRUE; 585 } 586 } 587 return FALSE; 588 } 589 static boolean 590 tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index) 591 { 592 int i; 593 594 if (index < 0 || index >= tx->num_constb_allowed) { 595 tx->failure = TRUE; 596 return FALSE; 597 } 598 for (i = 0; i < tx->num_lconstb; ++i) { 599 if (tx->lconstb[i].idx == index) { 600 *src = tx->lconstb[i].reg; 601 return TRUE; 602 } 603 } 604 return FALSE; 605 } 606 607 static void 608 tx_set_lconstf(struct shader_translator *tx, INT index, float f[4]) 609 { 610 unsigned n; 611 612 FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed) 613 614 for (n = 0; n < tx->num_lconstf; ++n) 615 if (tx->lconstf[n].idx == index) 616 break; 617 if (n == tx->num_lconstf) { 618 if ((n % 8) == 0) { 619 tx->lconstf = REALLOC(tx->lconstf, 620 (n + 0) * sizeof(tx->lconstf[0]), 621 (n + 8) * sizeof(tx->lconstf[0])); 622 assert(tx->lconstf); 623 } 624 tx->num_lconstf++; 625 } 626 tx->lconstf[n].idx = index; 627 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]); 628 629 memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f)); 630 } 631 static void 632 tx_set_lconsti(struct shader_translator *tx, INT index, int i[4]) 633 { 634 unsigned n; 635 636 FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed) 637 638 for (n = 0; n < tx->num_lconsti; ++n) 639 if (tx->lconsti[n].idx == index) 640 break; 641 if (n == tx->num_lconsti) { 642 if ((n % 8) == 0) { 643 tx->lconsti = REALLOC(tx->lconsti, 644 (n + 0) * sizeof(tx->lconsti[0]), 645 (n + 8) * sizeof(tx->lconsti[0])); 646 assert(tx->lconsti); 647 } 648 tx->num_lconsti++; 649 } 650 651 tx->lconsti[n].idx = index; 652 tx->lconsti[n].reg = tx->native_integers ? 653 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) : 654 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]); 655 } 656 static void 657 tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b) 658 { 659 unsigned n; 660 661 FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed) 662 663 for (n = 0; n < tx->num_lconstb; ++n) 664 if (tx->lconstb[n].idx == index) 665 break; 666 if (n == tx->num_lconstb) { 667 if ((n % 8) == 0) { 668 tx->lconstb = REALLOC(tx->lconstb, 669 (n + 0) * sizeof(tx->lconstb[0]), 670 (n + 8) * sizeof(tx->lconstb[0])); 671 assert(tx->lconstb); 672 } 673 tx->num_lconstb++; 674 } 675 676 tx->lconstb[n].idx = index; 677 tx->lconstb[n].reg = tx->native_integers ? 678 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) : 679 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f); 680 } 681 682 static inline struct ureg_dst 683 tx_scratch(struct shader_translator *tx) 684 { 685 if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) { 686 tx->failure = TRUE; 687 return tx->regs.t[0]; 688 } 689 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch])) 690 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg); 691 return tx->regs.t[tx->num_scratch++]; 692 } 693 694 static inline struct ureg_dst 695 tx_scratch_scalar(struct shader_translator *tx) 696 { 697 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X); 698 } 699 700 static inline struct ureg_src 701 tx_src_scalar(struct ureg_dst dst) 702 { 703 struct ureg_src src = ureg_src(dst); 704 int c = ffs(dst.WriteMask) - 1; 705 if (dst.WriteMask == (1 << c)) 706 src = ureg_scalar(src, c); 707 return src; 708 } 709 710 static inline void 711 tx_temp_alloc(struct shader_translator *tx, INT idx) 712 { 713 assert(idx >= 0); 714 if (idx >= tx->num_temp) { 715 unsigned k = tx->num_temp; 716 unsigned n = idx + 1; 717 tx->regs.r = REALLOC(tx->regs.r, 718 k * sizeof(tx->regs.r[0]), 719 n * sizeof(tx->regs.r[0])); 720 for (; k < n; ++k) 721 tx->regs.r[k] = ureg_dst_undef(); 722 tx->num_temp = n; 723 } 724 if (ureg_dst_is_undef(tx->regs.r[idx])) 725 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg); 726 } 727 728 static inline void 729 tx_addr_alloc(struct shader_translator *tx, INT idx) 730 { 731 assert(idx == 0); 732 if (ureg_dst_is_undef(tx->regs.address)) 733 tx->regs.address = ureg_DECL_address(tx->ureg); 734 if (ureg_dst_is_undef(tx->regs.a0)) 735 tx->regs.a0 = ureg_DECL_temporary(tx->ureg); 736 } 737 738 static inline void 739 tx_pred_alloc(struct shader_translator *tx, INT idx) 740 { 741 assert(idx == 0); 742 if (ureg_dst_is_undef(tx->regs.p)) 743 tx->regs.p = ureg_DECL_predicate(tx->ureg); 744 } 745 746 /* NOTE: It's not very clear on which ps1.1-ps1.3 instructions 747 * the projection should be applied on the texture. It doesn't 748 * apply on texkill. 749 * The doc is very imprecise here (it says the projection is done 750 * before rasterization, thus in vs, which seems wrong since ps instructions 751 * are affected differently) 752 * For now we only apply to the ps TEX instruction and TEXBEM. 753 * Perhaps some other instructions would need it */ 754 static inline void 755 apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst, 756 struct ureg_src src, INT idx) 757 { 758 struct ureg_dst tmp; 759 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3); 760 761 /* no projection */ 762 if (dim == 1) { 763 ureg_MOV(tx->ureg, dst, src); 764 } else { 765 tmp = tx_scratch_scalar(tx); 766 ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1)); 767 ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src); 768 } 769 } 770 771 static inline void 772 TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst, 773 unsigned target, struct ureg_src src0, 774 struct ureg_src src1, INT idx) 775 { 776 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3); 777 struct ureg_dst tmp; 778 779 /* dim == 1: no projection 780 * Looks like must be disabled when it makes no 781 * sense according the texture dimensions 782 */ 783 if (dim == 1 || dim <= target) { 784 ureg_TEX(tx->ureg, dst, target, src0, src1); 785 } else if (dim == 4) { 786 ureg_TXP(tx->ureg, dst, target, src0, src1); 787 } else { 788 tmp = tx_scratch(tx); 789 apply_ps1x_projection(tx, tmp, src0, idx); 790 ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1); 791 } 792 } 793 794 static inline void 795 tx_texcoord_alloc(struct shader_translator *tx, INT idx) 796 { 797 assert(IS_PS); 798 assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT)); 799 if (ureg_src_is_undef(tx->regs.vT[idx])) 800 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx, 801 TGSI_INTERPOLATE_PERSPECTIVE); 802 } 803 804 static inline unsigned * 805 tx_bgnloop(struct shader_translator *tx) 806 { 807 tx->loop_depth++; 808 if (tx->loop_depth_max < tx->loop_depth) 809 tx->loop_depth_max = tx->loop_depth; 810 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH); 811 return &tx->loop_labels[tx->loop_depth - 1]; 812 } 813 814 static inline unsigned * 815 tx_endloop(struct shader_translator *tx) 816 { 817 assert(tx->loop_depth); 818 tx->loop_depth--; 819 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth], 820 ureg_get_instruction_number(tx->ureg)); 821 return &tx->loop_labels[tx->loop_depth]; 822 } 823 824 static struct ureg_dst 825 tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep) 826 { 827 const unsigned l = tx->loop_depth - 1; 828 829 if (!tx->loop_depth) 830 { 831 DBG("loop counter requested outside of loop\n"); 832 return ureg_dst_undef(); 833 } 834 835 if (ureg_dst_is_undef(tx->regs.rL[l])) { 836 /* loop or rep ctr creation */ 837 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg); 838 tx->loop_or_rep[l] = loop_or_rep; 839 } 840 /* loop - rep - endloop - endrep not allowed */ 841 assert(tx->loop_or_rep[l] == loop_or_rep); 842 843 return tx->regs.rL[l]; 844 } 845 846 static struct ureg_src 847 tx_get_loopal(struct shader_translator *tx) 848 { 849 int loop_level = tx->loop_depth - 1; 850 851 while (loop_level >= 0) { 852 /* handle loop - rep - endrep - endloop case */ 853 if (tx->loop_or_rep[loop_level]) 854 /* the value is in the loop counter y component (nine implementation) */ 855 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y); 856 loop_level--; 857 } 858 859 DBG("aL counter requested outside of loop\n"); 860 return ureg_src_undef(); 861 } 862 863 static inline unsigned * 864 tx_cond(struct shader_translator *tx) 865 { 866 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH); 867 tx->cond_depth++; 868 return &tx->cond_labels[tx->cond_depth - 1]; 869 } 870 871 static inline unsigned * 872 tx_elsecond(struct shader_translator *tx) 873 { 874 assert(tx->cond_depth); 875 return &tx->cond_labels[tx->cond_depth - 1]; 876 } 877 878 static inline void 879 tx_endcond(struct shader_translator *tx) 880 { 881 assert(tx->cond_depth); 882 tx->cond_depth--; 883 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth], 884 ureg_get_instruction_number(tx->ureg)); 885 } 886 887 static inline struct ureg_dst 888 nine_ureg_dst_register(unsigned file, int index) 889 { 890 return ureg_dst(ureg_src_register(file, index)); 891 } 892 893 static inline struct ureg_src 894 nine_get_position_input(struct shader_translator *tx) 895 { 896 struct ureg_program *ureg = tx->ureg; 897 898 if (tx->wpos_is_sysval) 899 return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0); 900 else 901 return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 902 0, TGSI_INTERPOLATE_LINEAR); 903 } 904 905 static struct ureg_src 906 tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param) 907 { 908 struct ureg_program *ureg = tx->ureg; 909 struct ureg_src src; 910 struct ureg_dst tmp; 911 912 switch (param->file) 913 { 914 case D3DSPR_TEMP: 915 assert(!param->rel); 916 tx_temp_alloc(tx, param->idx); 917 src = ureg_src(tx->regs.r[param->idx]); 918 break; 919 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */ 920 case D3DSPR_ADDR: 921 assert(!param->rel); 922 if (IS_VS) { 923 assert(param->idx == 0); 924 /* the address register (vs only) must be 925 * assigned before use */ 926 assert(!ureg_dst_is_undef(tx->regs.a0)); 927 /* Round to lowest for vs1.1 (contrary to the doc), else 928 * round to nearest */ 929 if (tx->version.major < 2 && tx->version.minor < 2) 930 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0)); 931 else 932 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0)); 933 src = ureg_src(tx->regs.address); 934 } else { 935 if (tx->version.major < 2 && tx->version.minor < 4) { 936 /* no subroutines, so should be defined */ 937 src = ureg_src(tx->regs.tS[param->idx]); 938 } else { 939 tx_texcoord_alloc(tx, param->idx); 940 src = tx->regs.vT[param->idx]; 941 } 942 } 943 break; 944 case D3DSPR_INPUT: 945 if (IS_VS) { 946 src = ureg_src_register(TGSI_FILE_INPUT, param->idx); 947 } else { 948 if (tx->version.major < 3) { 949 assert(!param->rel); 950 src = ureg_DECL_fs_input_cyl_centroid( 951 ureg, TGSI_SEMANTIC_COLOR, param->idx, 952 TGSI_INTERPOLATE_COLOR, 0, 953 tx->info->force_color_in_centroid ? 954 TGSI_INTERPOLATE_LOC_CENTROID : 0, 955 0, 1); 956 } else { 957 if(param->rel) { 958 /* Copy all inputs (non consecutive) 959 * to temp array (consecutive). 960 * This is not good for performance. 961 * A better way would be to have inputs 962 * consecutive (would need implement alternative 963 * way to match vs outputs and ps inputs). 964 * However even with the better way, the temp array 965 * copy would need to be used if some inputs 966 * are not GENERIC or if they have different 967 * interpolation flag. */ 968 if (ureg_src_is_undef(tx->regs.v_consecutive)) { 969 int i; 970 tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0)); 971 for (i = 0; i < 10; i++) { 972 if (!ureg_src_is_undef(tx->regs.v[i])) 973 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]); 974 else 975 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f)); 976 } 977 } 978 src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx); 979 } else { 980 assert(param->idx < ARRAY_SIZE(tx->regs.v)); 981 src = tx->regs.v[param->idx]; 982 } 983 } 984 } 985 break; 986 case D3DSPR_PREDICATE: 987 assert(!param->rel); 988 tx_pred_alloc(tx, param->idx); 989 src = ureg_src(tx->regs.p); 990 break; 991 case D3DSPR_SAMPLER: 992 assert(param->mod == NINED3DSPSM_NONE); 993 assert(param->swizzle == NINED3DSP_NOSWIZZLE); 994 assert(!param->rel); 995 src = ureg_src_register(TGSI_FILE_SAMPLER, param->idx); 996 break; 997 case D3DSPR_CONST: 998 assert(!param->rel || IS_VS); 999 if (param->rel) 1000 tx->indirect_const_access = TRUE; 1001 if (param->rel || !tx_lconstf(tx, &src, param->idx)) { 1002 if (!param->rel) 1003 nine_info_mark_const_f_used(tx->info, param->idx); 1004 /* vswp constant handling: we use two buffers 1005 * to fit all the float constants. The special handling 1006 * doesn't need to be elsewhere, because all the instructions 1007 * accessing the constants directly are VS1, and swvp 1008 * is VS >= 2 */ 1009 if (IS_VS && tx->info->swvp_on) { 1010 if (!param->rel) { 1011 if (param->idx < 4096) { 1012 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); 1013 src = ureg_src_dimension(src, 0); 1014 } else { 1015 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx - 4096); 1016 src = ureg_src_dimension(src, 1); 1017 } 1018 } else { 1019 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); /* TODO: swvp rel > 4096 */ 1020 src = ureg_src_dimension(src, 0); 1021 } 1022 } else 1023 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); 1024 } 1025 if (!IS_VS && tx->version.major < 2) { 1026 /* ps 1.X clamps constants */ 1027 tmp = tx_scratch(tx); 1028 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f)); 1029 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f)); 1030 src = ureg_src(tmp); 1031 } 1032 break; 1033 case D3DSPR_CONST2: 1034 case D3DSPR_CONST3: 1035 case D3DSPR_CONST4: 1036 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n"); 1037 assert(!"CONST2/3/4"); 1038 src = ureg_imm1f(ureg, 0.0f); 1039 break; 1040 case D3DSPR_CONSTINT: 1041 /* relative adressing only possible for float constants in vs */ 1042 assert(!param->rel); 1043 if (!tx_lconsti(tx, &src, param->idx)) { 1044 nine_info_mark_const_i_used(tx->info, param->idx); 1045 if (IS_VS && tx->info->swvp_on) { 1046 src = ureg_src_register(TGSI_FILE_CONSTANT, param->idx); 1047 src = ureg_src_dimension(src, 2); 1048 } else 1049 src = ureg_src_register(TGSI_FILE_CONSTANT, 1050 tx->info->const_i_base + param->idx); 1051 } 1052 break; 1053 case D3DSPR_CONSTBOOL: 1054 assert(!param->rel); 1055 if (!tx_lconstb(tx, &src, param->idx)) { 1056 char r = param->idx / 4; 1057 char s = param->idx & 3; 1058 nine_info_mark_const_b_used(tx->info, param->idx); 1059 if (IS_VS && tx->info->swvp_on) { 1060 src = ureg_src_register(TGSI_FILE_CONSTANT, r); 1061 src = ureg_src_dimension(src, 3); 1062 } else 1063 src = ureg_src_register(TGSI_FILE_CONSTANT, 1064 tx->info->const_b_base + r); 1065 src = ureg_swizzle(src, s, s, s, s); 1066 } 1067 break; 1068 case D3DSPR_LOOP: 1069 if (ureg_dst_is_undef(tx->regs.address)) 1070 tx->regs.address = ureg_DECL_address(ureg); 1071 if (!tx->native_integers) 1072 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx)); 1073 else 1074 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx)); 1075 src = ureg_src(tx->regs.address); 1076 break; 1077 case D3DSPR_MISCTYPE: 1078 switch (param->idx) { 1079 case D3DSMO_POSITION: 1080 if (ureg_src_is_undef(tx->regs.vPos)) 1081 tx->regs.vPos = nine_get_position_input(tx); 1082 if (tx->shift_wpos) { 1083 /* TODO: do this only once */ 1084 struct ureg_dst wpos = tx_scratch(tx); 1085 ureg_ADD(ureg, wpos, tx->regs.vPos, 1086 ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f)); 1087 src = ureg_src(wpos); 1088 } else { 1089 src = tx->regs.vPos; 1090 } 1091 break; 1092 case D3DSMO_FACE: 1093 if (ureg_src_is_undef(tx->regs.vFace)) { 1094 if (tx->face_is_sysval_integer) { 1095 tmp = tx_scratch(tx); 1096 tx->regs.vFace = 1097 ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0); 1098 1099 /* convert bool to float */ 1100 ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X), 1101 ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1)); 1102 tx->regs.vFace = ureg_src(tmp); 1103 } else { 1104 tx->regs.vFace = ureg_DECL_fs_input(ureg, 1105 TGSI_SEMANTIC_FACE, 0, 1106 TGSI_INTERPOLATE_CONSTANT); 1107 } 1108 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X); 1109 } 1110 src = tx->regs.vFace; 1111 break; 1112 default: 1113 assert(!"invalid src D3DSMO"); 1114 break; 1115 } 1116 assert(!param->rel); 1117 break; 1118 case D3DSPR_TEMPFLOAT16: 1119 break; 1120 default: 1121 assert(!"invalid src D3DSPR"); 1122 } 1123 if (param->rel) 1124 src = ureg_src_indirect(src, tx_src_param(tx, param->rel)); 1125 1126 switch (param->mod) { 1127 case NINED3DSPSM_DW: 1128 tmp = tx_scratch(tx); 1129 /* NOTE: app is not allowed to read w with this modifier */ 1130 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), src); 1131 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W))); 1132 src = ureg_src(tmp); 1133 break; 1134 case NINED3DSPSM_DZ: 1135 tmp = tx_scratch(tx); 1136 /* NOTE: app is not allowed to read z with this modifier */ 1137 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), src); 1138 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z))); 1139 src = ureg_src(tmp); 1140 break; 1141 default: 1142 break; 1143 } 1144 1145 if (param->swizzle != NINED3DSP_NOSWIZZLE) 1146 src = ureg_swizzle(src, 1147 (param->swizzle >> 0) & 0x3, 1148 (param->swizzle >> 2) & 0x3, 1149 (param->swizzle >> 4) & 0x3, 1150 (param->swizzle >> 6) & 0x3); 1151 1152 switch (param->mod) { 1153 case NINED3DSPSM_ABS: 1154 src = ureg_abs(src); 1155 break; 1156 case NINED3DSPSM_ABSNEG: 1157 src = ureg_negate(ureg_abs(src)); 1158 break; 1159 case NINED3DSPSM_NEG: 1160 src = ureg_negate(src); 1161 break; 1162 case NINED3DSPSM_BIAS: 1163 tmp = tx_scratch(tx); 1164 ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f)); 1165 src = ureg_src(tmp); 1166 break; 1167 case NINED3DSPSM_BIASNEG: 1168 tmp = tx_scratch(tx); 1169 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src)); 1170 src = ureg_src(tmp); 1171 break; 1172 case NINED3DSPSM_NOT: 1173 if (tx->native_integers) { 1174 tmp = tx_scratch(tx); 1175 ureg_NOT(ureg, tmp, src); 1176 src = ureg_src(tmp); 1177 break; 1178 } 1179 /* fall through */ 1180 case NINED3DSPSM_COMP: 1181 tmp = tx_scratch(tx); 1182 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src)); 1183 src = ureg_src(tmp); 1184 break; 1185 case NINED3DSPSM_DZ: 1186 case NINED3DSPSM_DW: 1187 /* Already handled*/ 1188 break; 1189 case NINED3DSPSM_SIGN: 1190 tmp = tx_scratch(tx); 1191 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f)); 1192 src = ureg_src(tmp); 1193 break; 1194 case NINED3DSPSM_SIGNNEG: 1195 tmp = tx_scratch(tx); 1196 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f)); 1197 src = ureg_src(tmp); 1198 break; 1199 case NINED3DSPSM_X2: 1200 tmp = tx_scratch(tx); 1201 ureg_ADD(ureg, tmp, src, src); 1202 src = ureg_src(tmp); 1203 break; 1204 case NINED3DSPSM_X2NEG: 1205 tmp = tx_scratch(tx); 1206 ureg_ADD(ureg, tmp, src, src); 1207 src = ureg_negate(ureg_src(tmp)); 1208 break; 1209 default: 1210 assert(param->mod == NINED3DSPSM_NONE); 1211 break; 1212 } 1213 1214 return src; 1215 } 1216 1217 static struct ureg_dst 1218 _tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param) 1219 { 1220 struct ureg_dst dst; 1221 1222 switch (param->file) 1223 { 1224 case D3DSPR_TEMP: 1225 assert(!param->rel); 1226 tx_temp_alloc(tx, param->idx); 1227 dst = tx->regs.r[param->idx]; 1228 break; 1229 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */ 1230 case D3DSPR_ADDR: 1231 assert(!param->rel); 1232 if (tx->version.major < 2 && !IS_VS) { 1233 if (ureg_dst_is_undef(tx->regs.tS[param->idx])) 1234 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg); 1235 dst = tx->regs.tS[param->idx]; 1236 } else 1237 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */ 1238 tx_texcoord_alloc(tx, param->idx); 1239 dst = ureg_dst(tx->regs.vT[param->idx]); 1240 } else { 1241 tx_addr_alloc(tx, param->idx); 1242 dst = tx->regs.a0; 1243 } 1244 break; 1245 case D3DSPR_RASTOUT: 1246 assert(!param->rel); 1247 switch (param->idx) { 1248 case 0: 1249 if (ureg_dst_is_undef(tx->regs.oPos)) 1250 tx->regs.oPos = 1251 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0); 1252 dst = tx->regs.oPos; 1253 break; 1254 case 1: 1255 if (ureg_dst_is_undef(tx->regs.oFog)) 1256 tx->regs.oFog = 1257 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0)); 1258 dst = tx->regs.oFog; 1259 break; 1260 case 2: 1261 if (ureg_dst_is_undef(tx->regs.oPts)) 1262 tx->regs.oPts = ureg_DECL_temporary(tx->ureg); 1263 dst = tx->regs.oPts; 1264 break; 1265 default: 1266 assert(0); 1267 break; 1268 } 1269 break; 1270 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */ 1271 case D3DSPR_OUTPUT: 1272 if (tx->version.major < 3) { 1273 assert(!param->rel); 1274 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx); 1275 } else { 1276 assert(!param->rel); /* TODO */ 1277 assert(param->idx < ARRAY_SIZE(tx->regs.o)); 1278 dst = tx->regs.o[param->idx]; 1279 } 1280 break; 1281 case D3DSPR_ATTROUT: /* VS */ 1282 case D3DSPR_COLOROUT: /* PS */ 1283 assert(param->idx >= 0 && param->idx < 4); 1284 assert(!param->rel); 1285 tx->info->rt_mask |= 1 << param->idx; 1286 if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) { 1287 /* ps < 3: oCol[0] will have fog blending afterward */ 1288 if (!IS_VS && tx->version.major < 3 && param->idx == 0) { 1289 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg); 1290 } else { 1291 tx->regs.oCol[param->idx] = 1292 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx); 1293 } 1294 } 1295 dst = tx->regs.oCol[param->idx]; 1296 if (IS_VS && tx->version.major < 3) 1297 dst = ureg_saturate(dst); 1298 break; 1299 case D3DSPR_DEPTHOUT: 1300 assert(!param->rel); 1301 if (ureg_dst_is_undef(tx->regs.oDepth)) 1302 tx->regs.oDepth = 1303 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0, 1304 TGSI_WRITEMASK_Z, 0, 1); 1305 dst = tx->regs.oDepth; /* XXX: must write .z component */ 1306 break; 1307 case D3DSPR_PREDICATE: 1308 assert(!param->rel); 1309 tx_pred_alloc(tx, param->idx); 1310 dst = tx->regs.p; 1311 break; 1312 case D3DSPR_TEMPFLOAT16: 1313 DBG("unhandled D3DSPR: %u\n", param->file); 1314 break; 1315 default: 1316 assert(!"invalid dst D3DSPR"); 1317 break; 1318 } 1319 if (param->rel) 1320 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel)); 1321 1322 if (param->mask != NINED3DSP_WRITEMASK_ALL) 1323 dst = ureg_writemask(dst, param->mask); 1324 if (param->mod & NINED3DSPDM_SATURATE) 1325 dst = ureg_saturate(dst); 1326 1327 return dst; 1328 } 1329 1330 static struct ureg_dst 1331 tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param) 1332 { 1333 if (param->shift) { 1334 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask); 1335 return tx->regs.tdst; 1336 } 1337 return _tx_dst_param(tx, param); 1338 } 1339 1340 static void 1341 tx_apply_dst0_modifiers(struct shader_translator *tx) 1342 { 1343 struct ureg_dst rdst; 1344 float f; 1345 1346 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL) 1347 return; 1348 rdst = _tx_dst_param(tx, &tx->insn.dst[0]); 1349 1350 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */ 1351 1352 if (tx->insn.dst[0].shift < 0) 1353 f = 1.0f / (1 << -tx->insn.dst[0].shift); 1354 else 1355 f = 1 << tx->insn.dst[0].shift; 1356 1357 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f)); 1358 } 1359 1360 static struct ureg_src 1361 tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param) 1362 { 1363 struct ureg_src src; 1364 1365 assert(!param->shift); 1366 assert(!(param->mod & NINED3DSPDM_SATURATE)); 1367 1368 switch (param->file) { 1369 case D3DSPR_INPUT: 1370 if (IS_VS) { 1371 src = ureg_src_register(TGSI_FILE_INPUT, param->idx); 1372 } else { 1373 assert(!param->rel); 1374 assert(param->idx < ARRAY_SIZE(tx->regs.v)); 1375 src = tx->regs.v[param->idx]; 1376 } 1377 break; 1378 default: 1379 src = ureg_src(tx_dst_param(tx, param)); 1380 break; 1381 } 1382 if (param->rel) 1383 src = ureg_src_indirect(src, tx_src_param(tx, param->rel)); 1384 1385 if (!param->mask) 1386 WARN("mask is 0, using identity swizzle\n"); 1387 1388 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) { 1389 char s[4]; 1390 int n; 1391 int c; 1392 for (n = 0, c = 0; c < 4; ++c) 1393 if (param->mask & (1 << c)) 1394 s[n++] = c; 1395 assert(n); 1396 for (c = n; c < 4; ++c) 1397 s[c] = s[n - 1]; 1398 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]); 1399 } 1400 return src; 1401 } 1402 1403 static HRESULT 1404 NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n) 1405 { 1406 struct ureg_program *ureg = tx->ureg; 1407 struct ureg_dst dst; 1408 struct ureg_src src[2]; 1409 struct sm1_src_param *src_mat = &tx->insn.src[1]; 1410 unsigned i; 1411 1412 dst = tx_dst_param(tx, &tx->insn.dst[0]); 1413 src[0] = tx_src_param(tx, &tx->insn.src[0]); 1414 1415 for (i = 0; i < n; i++) 1416 { 1417 const unsigned m = (1 << i); 1418 1419 src[1] = tx_src_param(tx, src_mat); 1420 src_mat->idx++; 1421 1422 if (!(dst.WriteMask & m)) 1423 continue; 1424 1425 /* XXX: src == dst case ? */ 1426 1427 switch (k) { 1428 case 3: 1429 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]); 1430 break; 1431 case 4: 1432 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]); 1433 break; 1434 default: 1435 DBG("invalid operation: M%ux%u\n", m, n); 1436 break; 1437 } 1438 } 1439 1440 return D3D_OK; 1441 } 1442 1443 #define VNOTSUPPORTED 0, 0 1444 #define V(maj, min) (((maj) << 8) | (min)) 1445 1446 static inline const char * 1447 d3dsio_to_string( unsigned opcode ) 1448 { 1449 static const char *names[] = { 1450 "NOP", 1451 "MOV", 1452 "ADD", 1453 "SUB", 1454 "MAD", 1455 "MUL", 1456 "RCP", 1457 "RSQ", 1458 "DP3", 1459 "DP4", 1460 "MIN", 1461 "MAX", 1462 "SLT", 1463 "SGE", 1464 "EXP", 1465 "LOG", 1466 "LIT", 1467 "DST", 1468 "LRP", 1469 "FRC", 1470 "M4x4", 1471 "M4x3", 1472 "M3x4", 1473 "M3x3", 1474 "M3x2", 1475 "CALL", 1476 "CALLNZ", 1477 "LOOP", 1478 "RET", 1479 "ENDLOOP", 1480 "LABEL", 1481 "DCL", 1482 "POW", 1483 "CRS", 1484 "SGN", 1485 "ABS", 1486 "NRM", 1487 "SINCOS", 1488 "REP", 1489 "ENDREP", 1490 "IF", 1491 "IFC", 1492 "ELSE", 1493 "ENDIF", 1494 "BREAK", 1495 "BREAKC", 1496 "MOVA", 1497 "DEFB", 1498 "DEFI", 1499 NULL, 1500 NULL, 1501 NULL, 1502 NULL, 1503 NULL, 1504 NULL, 1505 NULL, 1506 NULL, 1507 NULL, 1508 NULL, 1509 NULL, 1510 NULL, 1511 NULL, 1512 NULL, 1513 NULL, 1514 "TEXCOORD", 1515 "TEXKILL", 1516 "TEX", 1517 "TEXBEM", 1518 "TEXBEML", 1519 "TEXREG2AR", 1520 "TEXREG2GB", 1521 "TEXM3x2PAD", 1522 "TEXM3x2TEX", 1523 "TEXM3x3PAD", 1524 "TEXM3x3TEX", 1525 NULL, 1526 "TEXM3x3SPEC", 1527 "TEXM3x3VSPEC", 1528 "EXPP", 1529 "LOGP", 1530 "CND", 1531 "DEF", 1532 "TEXREG2RGB", 1533 "TEXDP3TEX", 1534 "TEXM3x2DEPTH", 1535 "TEXDP3", 1536 "TEXM3x3", 1537 "TEXDEPTH", 1538 "CMP", 1539 "BEM", 1540 "DP2ADD", 1541 "DSX", 1542 "DSY", 1543 "TEXLDD", 1544 "SETP", 1545 "TEXLDL", 1546 "BREAKP" 1547 }; 1548 1549 if (opcode < ARRAY_SIZE(names)) return names[opcode]; 1550 1551 switch (opcode) { 1552 case D3DSIO_PHASE: return "PHASE"; 1553 case D3DSIO_COMMENT: return "COMMENT"; 1554 case D3DSIO_END: return "END"; 1555 default: 1556 return NULL; 1557 } 1558 } 1559 1560 #define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL } 1561 #define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \ 1562 (inst).vert_version.max | \ 1563 (inst).frag_version.min | \ 1564 (inst).frag_version.max) 1565 1566 #define SPECIAL(name) \ 1567 NineTranslateInstruction_##name 1568 1569 #define DECL_SPECIAL(name) \ 1570 static HRESULT \ 1571 NineTranslateInstruction_##name( struct shader_translator *tx ) 1572 1573 static HRESULT 1574 NineTranslateInstruction_Generic(struct shader_translator *); 1575 1576 DECL_SPECIAL(NOP) 1577 { 1578 /* Nothing to do. NOP was used to avoid hangs 1579 * with very old d3d drivers. */ 1580 return D3D_OK; 1581 } 1582 1583 DECL_SPECIAL(SUB) 1584 { 1585 struct ureg_program *ureg = tx->ureg; 1586 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 1587 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]); 1588 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]); 1589 1590 ureg_ADD(ureg, dst, src0, ureg_negate(src1)); 1591 return D3D_OK; 1592 } 1593 1594 DECL_SPECIAL(ABS) 1595 { 1596 struct ureg_program *ureg = tx->ureg; 1597 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 1598 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 1599 1600 ureg_MOV(ureg, dst, ureg_abs(src)); 1601 return D3D_OK; 1602 } 1603 1604 DECL_SPECIAL(M4x4) 1605 { 1606 return NineTranslateInstruction_Mkxn(tx, 4, 4); 1607 } 1608 1609 DECL_SPECIAL(M4x3) 1610 { 1611 return NineTranslateInstruction_Mkxn(tx, 4, 3); 1612 } 1613 1614 DECL_SPECIAL(M3x4) 1615 { 1616 return NineTranslateInstruction_Mkxn(tx, 3, 4); 1617 } 1618 1619 DECL_SPECIAL(M3x3) 1620 { 1621 return NineTranslateInstruction_Mkxn(tx, 3, 3); 1622 } 1623 1624 DECL_SPECIAL(M3x2) 1625 { 1626 return NineTranslateInstruction_Mkxn(tx, 3, 2); 1627 } 1628 1629 DECL_SPECIAL(CMP) 1630 { 1631 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]), 1632 tx_src_param(tx, &tx->insn.src[0]), 1633 tx_src_param(tx, &tx->insn.src[2]), 1634 tx_src_param(tx, &tx->insn.src[1])); 1635 return D3D_OK; 1636 } 1637 1638 DECL_SPECIAL(CND) 1639 { 1640 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 1641 struct ureg_dst cgt; 1642 struct ureg_src cnd; 1643 1644 /* the coissue flag was a tip for compilers to advise to 1645 * execute two operations at the same time, in cases 1646 * the two executions had same dst with different channels. 1647 * It has no effect on current hw. However it seems CND 1648 * is affected. The handling of this very specific case 1649 * handled below mimick wine behaviour */ 1650 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) { 1651 ureg_MOV(tx->ureg, 1652 dst, tx_src_param(tx, &tx->insn.src[1])); 1653 return D3D_OK; 1654 } 1655 1656 cnd = tx_src_param(tx, &tx->insn.src[0]); 1657 cgt = tx_scratch(tx); 1658 1659 if (tx->version.major == 1 && tx->version.minor < 4) 1660 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W); 1661 1662 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f)); 1663 1664 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)), 1665 tx_src_param(tx, &tx->insn.src[1]), 1666 tx_src_param(tx, &tx->insn.src[2])); 1667 return D3D_OK; 1668 } 1669 1670 DECL_SPECIAL(CALL) 1671 { 1672 assert(tx->insn.src[0].idx < tx->num_inst_labels); 1673 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]); 1674 return D3D_OK; 1675 } 1676 1677 DECL_SPECIAL(CALLNZ) 1678 { 1679 struct ureg_program *ureg = tx->ureg; 1680 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]); 1681 1682 if (!tx->native_integers) 1683 ureg_IF(ureg, src, tx_cond(tx)); 1684 else 1685 ureg_UIF(ureg, src, tx_cond(tx)); 1686 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]); 1687 tx_endcond(tx); 1688 ureg_ENDIF(ureg); 1689 return D3D_OK; 1690 } 1691 1692 DECL_SPECIAL(LOOP) 1693 { 1694 struct ureg_program *ureg = tx->ureg; 1695 unsigned *label; 1696 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]); 1697 struct ureg_dst ctr; 1698 struct ureg_dst tmp; 1699 struct ureg_src ctrx; 1700 1701 label = tx_bgnloop(tx); 1702 ctr = tx_get_loopctr(tx, TRUE); 1703 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X); 1704 1705 /* src: num_iterations - start_value of al - step for al - 0 */ 1706 ureg_MOV(ureg, ctr, src); 1707 ureg_BGNLOOP(tx->ureg, label); 1708 tmp = tx_scratch_scalar(tx); 1709 /* Initially ctr.x contains the number of iterations. 1710 * ctr.y will contain the updated value of al. 1711 * We decrease ctr.x at the end of every iteration, 1712 * and stop when it reaches 0. */ 1713 1714 if (!tx->native_integers) { 1715 /* case src and ctr contain floats */ 1716 /* to avoid precision issue, we stop when ctr <= 0.5 */ 1717 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx); 1718 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx)); 1719 } else { 1720 /* case src and ctr contain integers */ 1721 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx); 1722 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx)); 1723 } 1724 ureg_BRK(ureg); 1725 tx_endcond(tx); 1726 ureg_ENDIF(ureg); 1727 return D3D_OK; 1728 } 1729 1730 DECL_SPECIAL(RET) 1731 { 1732 ureg_RET(tx->ureg); 1733 return D3D_OK; 1734 } 1735 1736 DECL_SPECIAL(ENDLOOP) 1737 { 1738 struct ureg_program *ureg = tx->ureg; 1739 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE); 1740 struct ureg_dst dst_ctrx, dst_al; 1741 struct ureg_src src_ctr, al_counter; 1742 1743 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0); 1744 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1); 1745 src_ctr = ureg_src(ctr); 1746 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z); 1747 1748 /* ctr.x -= 1 1749 * ctr.y (aL) += step */ 1750 if (!tx->native_integers) { 1751 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f)); 1752 ureg_ADD(ureg, dst_al, src_ctr, al_counter); 1753 } else { 1754 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1)); 1755 ureg_UADD(ureg, dst_al, src_ctr, al_counter); 1756 } 1757 ureg_ENDLOOP(tx->ureg, tx_endloop(tx)); 1758 return D3D_OK; 1759 } 1760 1761 DECL_SPECIAL(LABEL) 1762 { 1763 unsigned k = tx->num_inst_labels; 1764 unsigned n = tx->insn.src[0].idx; 1765 assert(n < 2048); 1766 if (n >= k) 1767 tx->inst_labels = REALLOC(tx->inst_labels, 1768 k * sizeof(tx->inst_labels[0]), 1769 n * sizeof(tx->inst_labels[0])); 1770 1771 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg); 1772 return D3D_OK; 1773 } 1774 1775 DECL_SPECIAL(SINCOS) 1776 { 1777 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 1778 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 1779 1780 assert(!(dst.WriteMask & 0xc)); 1781 1782 dst.WriteMask &= TGSI_WRITEMASK_XY; /* z undefined, w untouched */ 1783 ureg_SCS(tx->ureg, dst, src); 1784 return D3D_OK; 1785 } 1786 1787 DECL_SPECIAL(SGN) 1788 { 1789 ureg_SSG(tx->ureg, 1790 tx_dst_param(tx, &tx->insn.dst[0]), 1791 tx_src_param(tx, &tx->insn.src[0])); 1792 return D3D_OK; 1793 } 1794 1795 DECL_SPECIAL(REP) 1796 { 1797 struct ureg_program *ureg = tx->ureg; 1798 unsigned *label; 1799 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]); 1800 struct ureg_dst ctr; 1801 struct ureg_dst tmp; 1802 struct ureg_src ctrx; 1803 1804 label = tx_bgnloop(tx); 1805 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0); 1806 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X); 1807 1808 /* NOTE: rep must be constant, so we don't have to save the count */ 1809 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE); 1810 1811 /* rep: num_iterations - 0 - 0 - 0 */ 1812 ureg_MOV(ureg, ctr, rep); 1813 ureg_BGNLOOP(ureg, label); 1814 tmp = tx_scratch_scalar(tx); 1815 /* Initially ctr.x contains the number of iterations. 1816 * We decrease ctr.x at the end of every iteration, 1817 * and stop when it reaches 0. */ 1818 1819 if (!tx->native_integers) { 1820 /* case src and ctr contain floats */ 1821 /* to avoid precision issue, we stop when ctr <= 0.5 */ 1822 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx); 1823 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx)); 1824 } else { 1825 /* case src and ctr contain integers */ 1826 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx); 1827 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx)); 1828 } 1829 ureg_BRK(ureg); 1830 tx_endcond(tx); 1831 ureg_ENDIF(ureg); 1832 1833 return D3D_OK; 1834 } 1835 1836 DECL_SPECIAL(ENDREP) 1837 { 1838 struct ureg_program *ureg = tx->ureg; 1839 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE); 1840 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0); 1841 struct ureg_src src_ctr = ureg_src(ctr); 1842 1843 /* ctr.x -= 1 */ 1844 if (!tx->native_integers) 1845 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f)); 1846 else 1847 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1)); 1848 1849 ureg_ENDLOOP(tx->ureg, tx_endloop(tx)); 1850 return D3D_OK; 1851 } 1852 1853 DECL_SPECIAL(ENDIF) 1854 { 1855 tx_endcond(tx); 1856 ureg_ENDIF(tx->ureg); 1857 return D3D_OK; 1858 } 1859 1860 DECL_SPECIAL(IF) 1861 { 1862 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 1863 1864 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL) 1865 ureg_UIF(tx->ureg, src, tx_cond(tx)); 1866 else 1867 ureg_IF(tx->ureg, src, tx_cond(tx)); 1868 1869 return D3D_OK; 1870 } 1871 1872 static inline unsigned 1873 sm1_insn_flags_to_tgsi_setop(BYTE flags) 1874 { 1875 switch (flags) { 1876 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT; 1877 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ; 1878 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE; 1879 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT; 1880 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE; 1881 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE; 1882 default: 1883 assert(!"invalid comparison flags"); 1884 return TGSI_OPCODE_SGT; 1885 } 1886 } 1887 1888 DECL_SPECIAL(IFC) 1889 { 1890 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags); 1891 struct ureg_src src[2]; 1892 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X); 1893 src[0] = tx_src_param(tx, &tx->insn.src[0]); 1894 src[1] = tx_src_param(tx, &tx->insn.src[1]); 1895 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2); 1896 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx)); 1897 return D3D_OK; 1898 } 1899 1900 DECL_SPECIAL(ELSE) 1901 { 1902 ureg_ELSE(tx->ureg, tx_elsecond(tx)); 1903 return D3D_OK; 1904 } 1905 1906 DECL_SPECIAL(BREAKC) 1907 { 1908 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags); 1909 struct ureg_src src[2]; 1910 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X); 1911 src[0] = tx_src_param(tx, &tx->insn.src[0]); 1912 src[1] = tx_src_param(tx, &tx->insn.src[1]); 1913 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2); 1914 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx)); 1915 ureg_BRK(tx->ureg); 1916 tx_endcond(tx); 1917 ureg_ENDIF(tx->ureg); 1918 return D3D_OK; 1919 } 1920 1921 static const char *sm1_declusage_names[] = 1922 { 1923 [D3DDECLUSAGE_POSITION] = "POSITION", 1924 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT", 1925 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES", 1926 [D3DDECLUSAGE_NORMAL] = "NORMAL", 1927 [D3DDECLUSAGE_PSIZE] = "PSIZE", 1928 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD", 1929 [D3DDECLUSAGE_TANGENT] = "TANGENT", 1930 [D3DDECLUSAGE_BINORMAL] = "BINORMAL", 1931 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR", 1932 [D3DDECLUSAGE_POSITIONT] = "POSITIONT", 1933 [D3DDECLUSAGE_COLOR] = "COLOR", 1934 [D3DDECLUSAGE_FOG] = "FOG", 1935 [D3DDECLUSAGE_DEPTH] = "DEPTH", 1936 [D3DDECLUSAGE_SAMPLE] = "SAMPLE" 1937 }; 1938 1939 static inline unsigned 1940 sm1_to_nine_declusage(struct sm1_semantic *dcl) 1941 { 1942 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx); 1943 } 1944 1945 static void 1946 sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem, 1947 boolean tc, 1948 struct sm1_semantic *dcl) 1949 { 1950 BYTE index = dcl->usage_idx; 1951 1952 /* For everything that is not matching to a TGSI_SEMANTIC_****, 1953 * we match to a TGSI_SEMANTIC_GENERIC with index. 1954 * 1955 * The index can be anything UINT16 and usage_idx is BYTE, 1956 * so we can fit everything. It doesn't matter if indices 1957 * are close together or low. 1958 * 1959 * 1960 * POSITION >= 1: 10 * index + 6 1961 * COLOR >= 2: 10 * (index-1) + 7 1962 * TEXCOORD[0..15]: index 1963 * BLENDWEIGHT: 10 * index + 18 1964 * BLENDINDICES: 10 * index + 19 1965 * NORMAL: 10 * index + 20 1966 * TANGENT: 10 * index + 21 1967 * BINORMAL: 10 * index + 22 1968 * TESSFACTOR: 10 * index + 23 1969 */ 1970 1971 switch (dcl->usage) { 1972 case D3DDECLUSAGE_POSITION: 1973 case D3DDECLUSAGE_POSITIONT: 1974 case D3DDECLUSAGE_DEPTH: 1975 if (index == 0) { 1976 sem->Name = TGSI_SEMANTIC_POSITION; 1977 sem->Index = 0; 1978 } else { 1979 sem->Name = TGSI_SEMANTIC_GENERIC; 1980 sem->Index = 10 * index + 6; 1981 } 1982 break; 1983 case D3DDECLUSAGE_COLOR: 1984 if (index < 2) { 1985 sem->Name = TGSI_SEMANTIC_COLOR; 1986 sem->Index = index; 1987 } else { 1988 sem->Name = TGSI_SEMANTIC_GENERIC; 1989 sem->Index = 10 * (index-1) + 7; 1990 } 1991 break; 1992 case D3DDECLUSAGE_FOG: 1993 assert(index == 0); 1994 sem->Name = TGSI_SEMANTIC_FOG; 1995 sem->Index = 0; 1996 break; 1997 case D3DDECLUSAGE_PSIZE: 1998 assert(index == 0); 1999 sem->Name = TGSI_SEMANTIC_PSIZE; 2000 sem->Index = 0; 2001 break; 2002 case D3DDECLUSAGE_TEXCOORD: 2003 assert(index < 16); 2004 if (index < 8 && tc) 2005 sem->Name = TGSI_SEMANTIC_TEXCOORD; 2006 else 2007 sem->Name = TGSI_SEMANTIC_GENERIC; 2008 sem->Index = index; 2009 break; 2010 case D3DDECLUSAGE_BLENDWEIGHT: 2011 sem->Name = TGSI_SEMANTIC_GENERIC; 2012 sem->Index = 10 * index + 18; 2013 break; 2014 case D3DDECLUSAGE_BLENDINDICES: 2015 sem->Name = TGSI_SEMANTIC_GENERIC; 2016 sem->Index = 10 * index + 19; 2017 break; 2018 case D3DDECLUSAGE_NORMAL: 2019 sem->Name = TGSI_SEMANTIC_GENERIC; 2020 sem->Index = 10 * index + 20; 2021 break; 2022 case D3DDECLUSAGE_TANGENT: 2023 sem->Name = TGSI_SEMANTIC_GENERIC; 2024 sem->Index = 10 * index + 21; 2025 break; 2026 case D3DDECLUSAGE_BINORMAL: 2027 sem->Name = TGSI_SEMANTIC_GENERIC; 2028 sem->Index = 10 * index + 22; 2029 break; 2030 case D3DDECLUSAGE_TESSFACTOR: 2031 sem->Name = TGSI_SEMANTIC_GENERIC; 2032 sem->Index = 10 * index + 23; 2033 break; 2034 case D3DDECLUSAGE_SAMPLE: 2035 sem->Name = TGSI_SEMANTIC_COUNT; 2036 sem->Index = 0; 2037 break; 2038 default: 2039 unreachable("Invalid DECLUSAGE."); 2040 break; 2041 } 2042 } 2043 2044 #define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT) 2045 #define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT) 2046 #define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT) 2047 #define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT) 2048 static inline unsigned 2049 d3dstt_to_tgsi_tex(BYTE sampler_type) 2050 { 2051 switch (sampler_type) { 2052 case NINED3DSTT_1D: return TGSI_TEXTURE_1D; 2053 case NINED3DSTT_2D: return TGSI_TEXTURE_2D; 2054 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D; 2055 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE; 2056 default: 2057 assert(0); 2058 return TGSI_TEXTURE_UNKNOWN; 2059 } 2060 } 2061 static inline unsigned 2062 d3dstt_to_tgsi_tex_shadow(BYTE sampler_type) 2063 { 2064 switch (sampler_type) { 2065 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D; 2066 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D; 2067 case NINED3DSTT_VOLUME: 2068 case NINED3DSTT_CUBE: 2069 default: 2070 assert(0); 2071 return TGSI_TEXTURE_UNKNOWN; 2072 } 2073 } 2074 static inline unsigned 2075 ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage) 2076 { 2077 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) { 2078 case 1: return TGSI_TEXTURE_1D; 2079 case 0: return TGSI_TEXTURE_2D; 2080 case 3: return TGSI_TEXTURE_3D; 2081 default: 2082 return TGSI_TEXTURE_CUBE; 2083 } 2084 } 2085 2086 static const char * 2087 sm1_sampler_type_name(BYTE sampler_type) 2088 { 2089 switch (sampler_type) { 2090 case NINED3DSTT_1D: return "1D"; 2091 case NINED3DSTT_2D: return "2D"; 2092 case NINED3DSTT_VOLUME: return "VOLUME"; 2093 case NINED3DSTT_CUBE: return "CUBE"; 2094 default: 2095 return "(D3DSTT_?)"; 2096 } 2097 } 2098 2099 static inline unsigned 2100 nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem) 2101 { 2102 switch (sem->Name) { 2103 case TGSI_SEMANTIC_POSITION: 2104 case TGSI_SEMANTIC_NORMAL: 2105 return TGSI_INTERPOLATE_LINEAR; 2106 case TGSI_SEMANTIC_BCOLOR: 2107 case TGSI_SEMANTIC_COLOR: 2108 return TGSI_INTERPOLATE_COLOR; 2109 case TGSI_SEMANTIC_FOG: 2110 case TGSI_SEMANTIC_GENERIC: 2111 case TGSI_SEMANTIC_TEXCOORD: 2112 case TGSI_SEMANTIC_CLIPDIST: 2113 case TGSI_SEMANTIC_CLIPVERTEX: 2114 return TGSI_INTERPOLATE_PERSPECTIVE; 2115 case TGSI_SEMANTIC_EDGEFLAG: 2116 case TGSI_SEMANTIC_FACE: 2117 case TGSI_SEMANTIC_INSTANCEID: 2118 case TGSI_SEMANTIC_PCOORD: 2119 case TGSI_SEMANTIC_PRIMID: 2120 case TGSI_SEMANTIC_PSIZE: 2121 case TGSI_SEMANTIC_VERTEXID: 2122 return TGSI_INTERPOLATE_CONSTANT; 2123 default: 2124 assert(0); 2125 return TGSI_INTERPOLATE_CONSTANT; 2126 } 2127 } 2128 2129 DECL_SPECIAL(DCL) 2130 { 2131 struct ureg_program *ureg = tx->ureg; 2132 boolean is_input; 2133 boolean is_sampler; 2134 struct tgsi_declaration_semantic tgsi; 2135 struct sm1_semantic sem; 2136 sm1_read_semantic(tx, &sem); 2137 2138 is_input = sem.reg.file == D3DSPR_INPUT; 2139 is_sampler = 2140 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER; 2141 2142 DUMP("DCL "); 2143 sm1_dump_dst_param(&sem.reg); 2144 if (is_sampler) 2145 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type)); 2146 else 2147 if (tx->version.major >= 3) 2148 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx); 2149 else 2150 if (sem.usage | sem.usage_idx) 2151 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx); 2152 else 2153 DUMP("\n"); 2154 2155 if (is_sampler) { 2156 const unsigned m = 1 << sem.reg.idx; 2157 ureg_DECL_sampler(ureg, sem.reg.idx); 2158 tx->info->sampler_mask |= m; 2159 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ? 2160 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) : 2161 d3dstt_to_tgsi_tex(sem.sampler_type); 2162 return D3D_OK; 2163 } 2164 2165 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem); 2166 if (IS_VS) { 2167 if (is_input) { 2168 /* linkage outside of shader with vertex declaration */ 2169 ureg_DECL_vs_input(ureg, sem.reg.idx); 2170 assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map)); 2171 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem); 2172 tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1); 2173 /* NOTE: preserving order in case of indirect access */ 2174 } else 2175 if (tx->version.major >= 3) { 2176 /* SM2 output semantic determined by file */ 2177 assert(sem.reg.mask != 0); 2178 if (sem.usage == D3DDECLUSAGE_POSITIONT) 2179 tx->info->position_t = TRUE; 2180 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o)); 2181 assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing"); 2182 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked( 2183 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1); 2184 nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx); 2185 if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) { 2186 tx->regs.oPos_out = tx->regs.o[sem.reg.idx]; 2187 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg); 2188 tx->regs.oPos = tx->regs.o[sem.reg.idx]; 2189 } 2190 2191 if (tgsi.Name == TGSI_SEMANTIC_PSIZE) { 2192 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg); 2193 tx->regs.oPts = tx->regs.o[sem.reg.idx]; 2194 } 2195 } 2196 } else { 2197 if (is_input && tx->version.major >= 3) { 2198 unsigned interp_location = 0; 2199 /* SM3 only, SM2 input semantic determined by file */ 2200 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v)); 2201 assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing"); 2202 /* PositionT and tessfactor forbidden */ 2203 if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR) 2204 return D3DERR_INVALIDCALL; 2205 2206 if (tgsi.Name == TGSI_SEMANTIC_POSITION) { 2207 /* Position0 is forbidden (likely because vPos already does that) */ 2208 if (sem.usage == D3DDECLUSAGE_POSITION) 2209 return D3DERR_INVALIDCALL; 2210 /* Following code is for depth */ 2211 tx->regs.v[sem.reg.idx] = nine_get_position_input(tx); 2212 return D3D_OK; 2213 } 2214 2215 if (sem.reg.mod & NINED3DSPDM_CENTROID || 2216 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid)) 2217 interp_location = TGSI_INTERPOLATE_LOC_CENTROID; 2218 2219 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_cyl_centroid( 2220 ureg, tgsi.Name, tgsi.Index, 2221 nine_tgsi_to_interp_mode(&tgsi), 2222 0, /* cylwrap */ 2223 interp_location, 0, 1); 2224 } else 2225 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */ 2226 /* FragColor or FragDepth */ 2227 assert(sem.reg.mask != 0); 2228 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 2229 0, 1); 2230 } 2231 } 2232 return D3D_OK; 2233 } 2234 2235 DECL_SPECIAL(DEF) 2236 { 2237 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f); 2238 return D3D_OK; 2239 } 2240 2241 DECL_SPECIAL(DEFB) 2242 { 2243 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b); 2244 return D3D_OK; 2245 } 2246 2247 DECL_SPECIAL(DEFI) 2248 { 2249 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i); 2250 return D3D_OK; 2251 } 2252 2253 DECL_SPECIAL(POW) 2254 { 2255 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2256 struct ureg_src src[2] = { 2257 tx_src_param(tx, &tx->insn.src[0]), 2258 tx_src_param(tx, &tx->insn.src[1]) 2259 }; 2260 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]); 2261 return D3D_OK; 2262 } 2263 2264 DECL_SPECIAL(RSQ) 2265 { 2266 struct ureg_program *ureg = tx->ureg; 2267 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2268 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 2269 struct ureg_dst tmp = tx_scratch(tx); 2270 ureg_RSQ(ureg, tmp, ureg_abs(src)); 2271 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp)); 2272 return D3D_OK; 2273 } 2274 2275 DECL_SPECIAL(LOG) 2276 { 2277 struct ureg_program *ureg = tx->ureg; 2278 struct ureg_dst tmp = tx_scratch_scalar(tx); 2279 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2280 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 2281 ureg_LG2(ureg, tmp, ureg_abs(src)); 2282 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp)); 2283 return D3D_OK; 2284 } 2285 2286 DECL_SPECIAL(LIT) 2287 { 2288 struct ureg_program *ureg = tx->ureg; 2289 struct ureg_dst tmp = tx_scratch(tx); 2290 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2291 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 2292 ureg_LIT(ureg, tmp, src); 2293 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9 2294 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign 2295 * it 0^0 if src.w=0, which value is driver dependent. */ 2296 ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), 2297 ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)), 2298 ureg_src(tmp), ureg_imm1f(ureg, 0.0f)); 2299 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp)); 2300 return D3D_OK; 2301 } 2302 2303 DECL_SPECIAL(NRM) 2304 { 2305 struct ureg_program *ureg = tx->ureg; 2306 struct ureg_dst tmp = tx_scratch_scalar(tx); 2307 struct ureg_src nrm = tx_src_scalar(tmp); 2308 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2309 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 2310 ureg_DP3(ureg, tmp, src, src); 2311 ureg_RSQ(ureg, tmp, nrm); 2312 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm); 2313 ureg_MUL(ureg, dst, src, nrm); 2314 return D3D_OK; 2315 } 2316 2317 DECL_SPECIAL(DP2ADD) 2318 { 2319 struct ureg_dst tmp = tx_scratch_scalar(tx); 2320 struct ureg_src dp2 = tx_src_scalar(tmp); 2321 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2322 struct ureg_src src[3]; 2323 int i; 2324 for (i = 0; i < 3; ++i) 2325 src[i] = tx_src_param(tx, &tx->insn.src[i]); 2326 assert_replicate_swizzle(&src[2]); 2327 2328 ureg_DP2(tx->ureg, tmp, src[0], src[1]); 2329 ureg_ADD(tx->ureg, dst, src[2], dp2); 2330 2331 return D3D_OK; 2332 } 2333 2334 DECL_SPECIAL(TEXCOORD) 2335 { 2336 struct ureg_program *ureg = tx->ureg; 2337 const unsigned s = tx->insn.dst[0].idx; 2338 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2339 2340 tx_texcoord_alloc(tx, s); 2341 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]); 2342 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f)); 2343 2344 return D3D_OK; 2345 } 2346 2347 DECL_SPECIAL(TEXCOORD_ps14) 2348 { 2349 struct ureg_program *ureg = tx->ureg; 2350 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 2351 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2352 2353 assert(tx->insn.src[0].file == D3DSPR_TEXTURE); 2354 2355 ureg_MOV(ureg, dst, src); 2356 2357 return D3D_OK; 2358 } 2359 2360 DECL_SPECIAL(TEXKILL) 2361 { 2362 struct ureg_src reg; 2363 2364 if (tx->version.major > 1 || tx->version.minor > 3) { 2365 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]); 2366 } else { 2367 tx_texcoord_alloc(tx, tx->insn.dst[0].idx); 2368 reg = tx->regs.vT[tx->insn.dst[0].idx]; 2369 } 2370 if (tx->version.major < 2) 2371 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z)); 2372 ureg_KILL_IF(tx->ureg, reg); 2373 2374 return D3D_OK; 2375 } 2376 2377 DECL_SPECIAL(TEXBEM) 2378 { 2379 struct ureg_program *ureg = tx->ureg; 2380 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2381 struct ureg_dst tmp, tmp2, texcoord; 2382 struct ureg_src sample, m00, m01, m10, m11; 2383 struct ureg_src bumpenvlscale, bumpenvloffset; 2384 const int m = tx->insn.dst[0].idx; 2385 const int n = tx->insn.src[0].idx; 2386 2387 assert(tx->version.major == 1); 2388 2389 sample = ureg_DECL_sampler(ureg, m); 2390 tx->info->sampler_mask |= 1 << m; 2391 2392 tx_texcoord_alloc(tx, m); 2393 2394 tmp = tx_scratch(tx); 2395 tmp2 = tx_scratch(tx); 2396 texcoord = tx_scratch(tx); 2397 /* 2398 * Bump-env-matrix: 2399 * 00 is X 2400 * 01 is Y 2401 * 10 is Z 2402 * 11 is W 2403 */ 2404 nine_info_mark_const_f_used(tx->info, 8 + 8 + m/2); 2405 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X); 2406 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y); 2407 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z); 2408 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W); 2409 2410 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */ 2411 if (m % 2 == 0) { 2412 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, X); 2413 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Y); 2414 } else { 2415 bumpenvlscale = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, Z); 2416 bumpenvloffset = NINE_CONSTANT_SRC_SWIZZLE(8 + 8 + m / 2, W); 2417 } 2418 2419 apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m); 2420 2421 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */ 2422 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, 2423 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord)); 2424 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */ 2425 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10, 2426 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y), 2427 NINE_APPLY_SWIZZLE(ureg_src(tmp), X)); 2428 2429 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */ 2430 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, 2431 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), X), ureg_src(texcoord)); 2432 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/ 2433 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11, 2434 NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Y), 2435 NINE_APPLY_SWIZZLE(ureg_src(tmp), Y)); 2436 2437 /* Now the texture coordinates are in tmp.xy */ 2438 2439 if (tx->insn.opcode == D3DSIO_TEXBEM) { 2440 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); 2441 } else if (tx->insn.opcode == D3DSIO_TEXBEML) { 2442 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */ 2443 ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); 2444 ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(ureg_src(tx->regs.tS[n]), Z), 2445 bumpenvlscale, bumpenvloffset); 2446 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2)); 2447 } 2448 2449 tx->info->bumpenvmat_needed = 1; 2450 2451 return D3D_OK; 2452 } 2453 2454 DECL_SPECIAL(TEXREG2AR) 2455 { 2456 struct ureg_program *ureg = tx->ureg; 2457 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2458 struct ureg_src sample; 2459 const int m = tx->insn.dst[0].idx; 2460 const int n = tx->insn.src[0].idx; 2461 assert(m >= 0 && m > n); 2462 2463 sample = ureg_DECL_sampler(ureg, m); 2464 tx->info->sampler_mask |= 1 << m; 2465 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(W,X,X,X)), sample); 2466 2467 return D3D_OK; 2468 } 2469 2470 DECL_SPECIAL(TEXREG2GB) 2471 { 2472 struct ureg_program *ureg = tx->ureg; 2473 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2474 struct ureg_src sample; 2475 const int m = tx->insn.dst[0].idx; 2476 const int n = tx->insn.src[0].idx; 2477 assert(m >= 0 && m > n); 2478 2479 sample = ureg_DECL_sampler(ureg, m); 2480 tx->info->sampler_mask |= 1 << m; 2481 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(ureg_src(tx->regs.tS[n]), NINE_SWIZZLE4(Y,Z,Z,Z)), sample); 2482 2483 return D3D_OK; 2484 } 2485 2486 DECL_SPECIAL(TEXM3x2PAD) 2487 { 2488 return D3D_OK; /* this is just padding */ 2489 } 2490 2491 DECL_SPECIAL(TEXM3x2TEX) 2492 { 2493 struct ureg_program *ureg = tx->ureg; 2494 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2495 struct ureg_src sample; 2496 const int m = tx->insn.dst[0].idx - 1; 2497 const int n = tx->insn.src[0].idx; 2498 assert(m >= 0 && m > n); 2499 2500 tx_texcoord_alloc(tx, m); 2501 tx_texcoord_alloc(tx, m+1); 2502 2503 /* performs the matrix multiplication */ 2504 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n])); 2505 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n])); 2506 2507 sample = ureg_DECL_sampler(ureg, m + 1); 2508 tx->info->sampler_mask |= 1 << (m + 1); 2509 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample); 2510 2511 return D3D_OK; 2512 } 2513 2514 DECL_SPECIAL(TEXM3x3PAD) 2515 { 2516 return D3D_OK; /* this is just padding */ 2517 } 2518 2519 DECL_SPECIAL(TEXM3x3SPEC) 2520 { 2521 struct ureg_program *ureg = tx->ureg; 2522 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2523 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]); 2524 struct ureg_src sample; 2525 struct ureg_dst tmp; 2526 const int m = tx->insn.dst[0].idx - 2; 2527 const int n = tx->insn.src[0].idx; 2528 assert(m >= 0 && m > n); 2529 2530 tx_texcoord_alloc(tx, m); 2531 tx_texcoord_alloc(tx, m+1); 2532 tx_texcoord_alloc(tx, m+2); 2533 2534 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n])); 2535 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n])); 2536 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n])); 2537 2538 sample = ureg_DECL_sampler(ureg, m + 2); 2539 tx->info->sampler_mask |= 1 << (m + 2); 2540 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ); 2541 2542 /* At this step, dst = N = (u', w', z'). 2543 * We want dst to be the texture sampled at (u'', w'', z''), with 2544 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */ 2545 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst)); 2546 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); 2547 /* at this step tmp.x = 1/N.N */ 2548 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E); 2549 /* at this step tmp.y = N.E */ 2550 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); 2551 /* at this step tmp.x = N.E/N.N */ 2552 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f)); 2553 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst)); 2554 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */ 2555 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E)); 2556 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample); 2557 2558 return D3D_OK; 2559 } 2560 2561 DECL_SPECIAL(TEXREG2RGB) 2562 { 2563 struct ureg_program *ureg = tx->ureg; 2564 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2565 struct ureg_src sample; 2566 const int m = tx->insn.dst[0].idx; 2567 const int n = tx->insn.src[0].idx; 2568 assert(m >= 0 && m > n); 2569 2570 sample = ureg_DECL_sampler(ureg, m); 2571 tx->info->sampler_mask |= 1 << m; 2572 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tx->regs.tS[n]), sample); 2573 2574 return D3D_OK; 2575 } 2576 2577 DECL_SPECIAL(TEXDP3TEX) 2578 { 2579 struct ureg_program *ureg = tx->ureg; 2580 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2581 struct ureg_dst tmp; 2582 struct ureg_src sample; 2583 const int m = tx->insn.dst[0].idx; 2584 const int n = tx->insn.src[0].idx; 2585 assert(m >= 0 && m > n); 2586 2587 tx_texcoord_alloc(tx, m); 2588 2589 tmp = tx_scratch(tx); 2590 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n])); 2591 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f)); 2592 2593 sample = ureg_DECL_sampler(ureg, m); 2594 tx->info->sampler_mask |= 1 << m; 2595 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); 2596 2597 return D3D_OK; 2598 } 2599 2600 DECL_SPECIAL(TEXM3x2DEPTH) 2601 { 2602 struct ureg_program *ureg = tx->ureg; 2603 struct ureg_dst tmp; 2604 const int m = tx->insn.dst[0].idx - 1; 2605 const int n = tx->insn.src[0].idx; 2606 assert(m >= 0 && m > n); 2607 2608 tx_texcoord_alloc(tx, m); 2609 tx_texcoord_alloc(tx, m+1); 2610 2611 tmp = tx_scratch(tx); 2612 2613 /* performs the matrix multiplication */ 2614 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n])); 2615 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n])); 2616 2617 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); 2618 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */ 2619 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z)); 2620 /* res = 'w' == 0 ? 1.0 : z/w */ 2621 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))), 2622 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f)); 2623 /* replace the depth for depth testing with the result */ 2624 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, 2625 TGSI_WRITEMASK_Z, 0, 1); 2626 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); 2627 /* note that we write nothing to the destination, since it's disallowed to use it afterward */ 2628 return D3D_OK; 2629 } 2630 2631 DECL_SPECIAL(TEXDP3) 2632 { 2633 struct ureg_program *ureg = tx->ureg; 2634 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2635 const int m = tx->insn.dst[0].idx; 2636 const int n = tx->insn.src[0].idx; 2637 assert(m >= 0 && m > n); 2638 2639 tx_texcoord_alloc(tx, m); 2640 2641 ureg_DP3(ureg, dst, tx->regs.vT[m], ureg_src(tx->regs.tS[n])); 2642 2643 return D3D_OK; 2644 } 2645 2646 DECL_SPECIAL(TEXM3x3) 2647 { 2648 struct ureg_program *ureg = tx->ureg; 2649 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2650 struct ureg_src sample; 2651 struct ureg_dst E, tmp; 2652 const int m = tx->insn.dst[0].idx - 2; 2653 const int n = tx->insn.src[0].idx; 2654 assert(m >= 0 && m > n); 2655 2656 tx_texcoord_alloc(tx, m); 2657 tx_texcoord_alloc(tx, m+1); 2658 tx_texcoord_alloc(tx, m+2); 2659 2660 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], ureg_src(tx->regs.tS[n])); 2661 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], ureg_src(tx->regs.tS[n])); 2662 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], ureg_src(tx->regs.tS[n])); 2663 2664 switch (tx->insn.opcode) { 2665 case D3DSIO_TEXM3x3: 2666 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); 2667 break; 2668 case D3DSIO_TEXM3x3TEX: 2669 sample = ureg_DECL_sampler(ureg, m + 2); 2670 tx->info->sampler_mask |= 1 << (m + 2); 2671 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample); 2672 break; 2673 case D3DSIO_TEXM3x3VSPEC: 2674 sample = ureg_DECL_sampler(ureg, m + 2); 2675 tx->info->sampler_mask |= 1 << (m + 2); 2676 E = tx_scratch(tx); 2677 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ); 2678 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W)); 2679 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W)); 2680 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W)); 2681 /* At this step, dst = N = (u', w', z'). 2682 * We want dst to be the texture sampled at (u'', w'', z''), with 2683 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */ 2684 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst)); 2685 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); 2686 /* at this step tmp.x = 1/N.N */ 2687 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E)); 2688 /* at this step tmp.y = N.E */ 2689 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); 2690 /* at this step tmp.x = N.E/N.N */ 2691 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f)); 2692 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst)); 2693 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */ 2694 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E))); 2695 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample); 2696 break; 2697 default: 2698 return D3DERR_INVALIDCALL; 2699 } 2700 return D3D_OK; 2701 } 2702 2703 DECL_SPECIAL(TEXDEPTH) 2704 { 2705 struct ureg_program *ureg = tx->ureg; 2706 struct ureg_dst r5; 2707 struct ureg_src r5r, r5g; 2708 2709 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */ 2710 2711 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g. 2712 * r5 won't be used afterward, thus we can use r5.ba */ 2713 r5 = tx->regs.r[5]; 2714 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X); 2715 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y); 2716 2717 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g); 2718 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z)); 2719 /* r5.r = r/g */ 2720 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)), 2721 r5r, ureg_imm1f(ureg, 1.0f)); 2722 /* replace the depth for depth testing with the result */ 2723 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, 2724 TGSI_WRITEMASK_Z, 0, 1); 2725 ureg_MOV(ureg, tx->regs.oDepth, r5r); 2726 2727 return D3D_OK; 2728 } 2729 2730 DECL_SPECIAL(BEM) 2731 { 2732 struct ureg_program *ureg = tx->ureg; 2733 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2734 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]); 2735 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]); 2736 struct ureg_src m00, m01, m10, m11; 2737 const int m = tx->insn.dst[0].idx; 2738 struct ureg_dst tmp; 2739 /* 2740 * Bump-env-matrix: 2741 * 00 is X 2742 * 01 is Y 2743 * 10 is Z 2744 * 11 is W 2745 */ 2746 nine_info_mark_const_f_used(tx->info, 8 + m); 2747 m00 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, X); 2748 m01 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Y); 2749 m10 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, Z); 2750 m11 = NINE_CONSTANT_SRC_SWIZZLE(8 + m, W); 2751 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */ 2752 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, 2753 NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X)); 2754 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */ 2755 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10, 2756 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X)); 2757 2758 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */ 2759 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, 2760 NINE_APPLY_SWIZZLE(src1, X), src0); 2761 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */ 2762 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11, 2763 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y)); 2764 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp)); 2765 2766 tx->info->bumpenvmat_needed = 1; 2767 2768 return D3D_OK; 2769 } 2770 2771 DECL_SPECIAL(TEXLD) 2772 { 2773 struct ureg_program *ureg = tx->ureg; 2774 unsigned target; 2775 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2776 struct ureg_src src[2] = { 2777 tx_src_param(tx, &tx->insn.src[0]), 2778 tx_src_param(tx, &tx->insn.src[1]) 2779 }; 2780 assert(tx->insn.src[1].idx >= 0 && 2781 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets)); 2782 target = tx->sampler_targets[tx->insn.src[1].idx]; 2783 2784 switch (tx->insn.flags) { 2785 case 0: 2786 ureg_TEX(ureg, dst, target, src[0], src[1]); 2787 break; 2788 case NINED3DSI_TEXLD_PROJECT: 2789 ureg_TXP(ureg, dst, target, src[0], src[1]); 2790 break; 2791 case NINED3DSI_TEXLD_BIAS: 2792 ureg_TXB(ureg, dst, target, src[0], src[1]); 2793 break; 2794 default: 2795 assert(0); 2796 return D3DERR_INVALIDCALL; 2797 } 2798 return D3D_OK; 2799 } 2800 2801 DECL_SPECIAL(TEXLD_14) 2802 { 2803 struct ureg_program *ureg = tx->ureg; 2804 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2805 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 2806 const unsigned s = tx->insn.dst[0].idx; 2807 const unsigned t = ps1x_sampler_type(tx->info, s); 2808 2809 tx->info->sampler_mask |= 1 << s; 2810 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s)); 2811 2812 return D3D_OK; 2813 } 2814 2815 DECL_SPECIAL(TEX) 2816 { 2817 struct ureg_program *ureg = tx->ureg; 2818 const unsigned s = tx->insn.dst[0].idx; 2819 const unsigned t = ps1x_sampler_type(tx->info, s); 2820 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2821 struct ureg_src src[2]; 2822 2823 tx_texcoord_alloc(tx, s); 2824 2825 src[0] = tx->regs.vT[s]; 2826 src[1] = ureg_DECL_sampler(ureg, s); 2827 tx->info->sampler_mask |= 1 << s; 2828 2829 TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s); 2830 2831 return D3D_OK; 2832 } 2833 2834 DECL_SPECIAL(TEXLDD) 2835 { 2836 unsigned target; 2837 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2838 struct ureg_src src[4] = { 2839 tx_src_param(tx, &tx->insn.src[0]), 2840 tx_src_param(tx, &tx->insn.src[1]), 2841 tx_src_param(tx, &tx->insn.src[2]), 2842 tx_src_param(tx, &tx->insn.src[3]) 2843 }; 2844 assert(tx->insn.src[1].idx >= 0 && 2845 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets)); 2846 target = tx->sampler_targets[tx->insn.src[1].idx]; 2847 2848 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]); 2849 return D3D_OK; 2850 } 2851 2852 DECL_SPECIAL(TEXLDL) 2853 { 2854 unsigned target; 2855 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2856 struct ureg_src src[2] = { 2857 tx_src_param(tx, &tx->insn.src[0]), 2858 tx_src_param(tx, &tx->insn.src[1]) 2859 }; 2860 assert(tx->insn.src[1].idx >= 0 && 2861 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets)); 2862 target = tx->sampler_targets[tx->insn.src[1].idx]; 2863 2864 ureg_TXL(tx->ureg, dst, target, src[0], src[1]); 2865 return D3D_OK; 2866 } 2867 2868 DECL_SPECIAL(SETP) 2869 { 2870 STUB(D3DERR_INVALIDCALL); 2871 } 2872 2873 DECL_SPECIAL(BREAKP) 2874 { 2875 STUB(D3DERR_INVALIDCALL); 2876 } 2877 2878 DECL_SPECIAL(PHASE) 2879 { 2880 return D3D_OK; /* we don't care about phase */ 2881 } 2882 2883 DECL_SPECIAL(COMMENT) 2884 { 2885 return D3D_OK; /* nothing to do */ 2886 } 2887 2888 2889 #define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \ 2890 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h } 2891 2892 struct sm1_op_info inst_table[] = 2893 { 2894 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */ 2895 _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), 2896 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */ 2897 _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */ 2898 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */ 2899 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */ 2900 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 6 */ 2901 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */ 2902 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */ 2903 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */ 2904 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */ 2905 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */ 2906 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */ 2907 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */ 2908 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */ 2909 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */ 2910 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */ 2911 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */ 2912 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */ 2913 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */ 2914 2915 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)), 2916 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)), 2917 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)), 2918 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)), 2919 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)), 2920 2921 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)), 2922 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)), 2923 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)), 2924 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)), 2925 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)), 2926 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)), 2927 2928 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)), 2929 2930 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)), 2931 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */ 2932 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */ 2933 _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)), 2934 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */ 2935 2936 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)), 2937 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)), 2938 2939 /* More flow control */ 2940 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)), 2941 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)), 2942 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)), 2943 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)), 2944 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)), 2945 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)), 2946 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL), 2947 _OPI(BREAKC, BREAKC, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)), 2948 /* we don't write to the address register, but a normal register (copied 2949 * when needed to the address register), thus we don't use ARR */ 2950 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), 2951 2952 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)), 2953 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)), 2954 2955 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)), 2956 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)), 2957 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)), 2958 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)), 2959 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)), 2960 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)), 2961 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)), 2962 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)), 2963 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)), 2964 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)), 2965 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)), 2966 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)), 2967 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)), 2968 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)), 2969 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)), 2970 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)), 2971 2972 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL), 2973 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), 2974 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)), 2975 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)), 2976 2977 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)), 2978 2979 /* More tex stuff */ 2980 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)), 2981 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)), 2982 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)), 2983 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)), 2984 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)), 2985 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)), 2986 2987 /* Misc */ 2988 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */ 2989 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)), 2990 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)), 2991 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL), 2992 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL), 2993 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)), 2994 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)), 2995 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)), 2996 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP)) 2997 }; 2998 2999 struct sm1_op_info inst_phase = 3000 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE)); 3001 3002 struct sm1_op_info inst_comment = 3003 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT)); 3004 3005 static void 3006 create_op_info_map(struct shader_translator *tx) 3007 { 3008 const unsigned version = (tx->version.major << 8) | tx->version.minor; 3009 unsigned i; 3010 3011 for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i) 3012 tx->op_info_map[i] = -1; 3013 3014 if (tx->processor == PIPE_SHADER_VERTEX) { 3015 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) { 3016 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map)); 3017 if (inst_table[i].vert_version.min <= version && 3018 inst_table[i].vert_version.max >= version) 3019 tx->op_info_map[inst_table[i].sio] = i; 3020 } 3021 } else { 3022 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) { 3023 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map)); 3024 if (inst_table[i].frag_version.min <= version && 3025 inst_table[i].frag_version.max >= version) 3026 tx->op_info_map[inst_table[i].sio] = i; 3027 } 3028 } 3029 } 3030 3031 static inline HRESULT 3032 NineTranslateInstruction_Generic(struct shader_translator *tx) 3033 { 3034 struct ureg_dst dst[1]; 3035 struct ureg_src src[4]; 3036 unsigned i; 3037 3038 for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i) 3039 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]); 3040 for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i) 3041 src[i] = tx_src_param(tx, &tx->insn.src[i]); 3042 3043 ureg_insn(tx->ureg, tx->insn.info->opcode, 3044 dst, tx->insn.ndst, 3045 src, tx->insn.nsrc); 3046 return D3D_OK; 3047 } 3048 3049 static inline DWORD 3050 TOKEN_PEEK(struct shader_translator *tx) 3051 { 3052 return *(tx->parse); 3053 } 3054 3055 static inline DWORD 3056 TOKEN_NEXT(struct shader_translator *tx) 3057 { 3058 return *(tx->parse)++; 3059 } 3060 3061 static inline void 3062 TOKEN_JUMP(struct shader_translator *tx) 3063 { 3064 if (tx->parse_next && tx->parse != tx->parse_next) { 3065 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next); 3066 tx->parse = tx->parse_next; 3067 } 3068 } 3069 3070 static inline boolean 3071 sm1_parse_eof(struct shader_translator *tx) 3072 { 3073 return TOKEN_PEEK(tx) == NINED3DSP_END; 3074 } 3075 3076 static void 3077 sm1_read_version(struct shader_translator *tx) 3078 { 3079 const DWORD tok = TOKEN_NEXT(tx); 3080 3081 tx->version.major = D3DSHADER_VERSION_MAJOR(tok); 3082 tx->version.minor = D3DSHADER_VERSION_MINOR(tok); 3083 3084 switch (tok >> 16) { 3085 case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break; 3086 case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break; 3087 default: 3088 DBG("Invalid shader type: %x\n", tok); 3089 tx->processor = ~0; 3090 break; 3091 } 3092 } 3093 3094 /* This is just to check if we parsed the instruction properly. */ 3095 static void 3096 sm1_parse_get_skip(struct shader_translator *tx) 3097 { 3098 const DWORD tok = TOKEN_PEEK(tx); 3099 3100 if (tx->version.major >= 2) { 3101 tx->parse_next = tx->parse + 1 /* this */ + 3102 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT); 3103 } else { 3104 tx->parse_next = NULL; /* TODO: determine from param count */ 3105 } 3106 } 3107 3108 static void 3109 sm1_print_comment(const char *comment, UINT size) 3110 { 3111 if (!size) 3112 return; 3113 /* TODO */ 3114 } 3115 3116 static void 3117 sm1_parse_comments(struct shader_translator *tx, BOOL print) 3118 { 3119 DWORD tok = TOKEN_PEEK(tx); 3120 3121 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT) 3122 { 3123 const char *comment = ""; 3124 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT; 3125 tx->parse += size + 1; 3126 3127 if (print) 3128 sm1_print_comment(comment, size); 3129 3130 tok = TOKEN_PEEK(tx); 3131 } 3132 } 3133 3134 static void 3135 sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel) 3136 { 3137 *reg = TOKEN_NEXT(tx); 3138 3139 if (*reg & D3DSHADER_ADDRMODE_RELATIVE) 3140 { 3141 if (tx->version.major < 2) 3142 *rel = (1 << 31) | 3143 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) | 3144 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) | 3145 D3DSP_NOSWIZZLE; 3146 else 3147 *rel = TOKEN_NEXT(tx); 3148 } 3149 } 3150 3151 static void 3152 sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok) 3153 { 3154 int8_t shift; 3155 dst->file = 3156 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT | 3157 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2; 3158 dst->type = TGSI_RETURN_TYPE_FLOAT; 3159 dst->idx = tok & D3DSP_REGNUM_MASK; 3160 dst->rel = NULL; 3161 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT; 3162 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT; 3163 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT; 3164 dst->shift = (shift & 0x7) - (shift & 0x8); 3165 } 3166 3167 static void 3168 sm1_parse_src_param(struct sm1_src_param *src, DWORD tok) 3169 { 3170 src->file = 3171 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | 3172 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2); 3173 src->type = TGSI_RETURN_TYPE_FLOAT; 3174 src->idx = tok & D3DSP_REGNUM_MASK; 3175 src->rel = NULL; 3176 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT; 3177 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT; 3178 3179 switch (src->file) { 3180 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break; 3181 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break; 3182 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break; 3183 default: 3184 break; 3185 } 3186 } 3187 3188 static void 3189 sm1_parse_immediate(struct shader_translator *tx, 3190 struct sm1_src_param *imm) 3191 { 3192 imm->file = NINED3DSPR_IMMEDIATE; 3193 imm->idx = INT_MIN; 3194 imm->rel = NULL; 3195 imm->swizzle = NINED3DSP_NOSWIZZLE; 3196 imm->mod = 0; 3197 switch (tx->insn.opcode) { 3198 case D3DSIO_DEF: 3199 imm->type = NINED3DSPTYPE_FLOAT4; 3200 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD)); 3201 tx->parse += 4; 3202 break; 3203 case D3DSIO_DEFI: 3204 imm->type = NINED3DSPTYPE_INT4; 3205 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD)); 3206 tx->parse += 4; 3207 break; 3208 case D3DSIO_DEFB: 3209 imm->type = NINED3DSPTYPE_BOOL; 3210 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD)); 3211 tx->parse += 1; 3212 break; 3213 default: 3214 assert(0); 3215 break; 3216 } 3217 } 3218 3219 static void 3220 sm1_read_dst_param(struct shader_translator *tx, 3221 struct sm1_dst_param *dst, 3222 struct sm1_src_param *rel) 3223 { 3224 DWORD tok_dst, tok_rel = 0; 3225 3226 sm1_parse_get_param(tx, &tok_dst, &tok_rel); 3227 sm1_parse_dst_param(dst, tok_dst); 3228 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) { 3229 sm1_parse_src_param(rel, tok_rel); 3230 dst->rel = rel; 3231 } 3232 } 3233 3234 static void 3235 sm1_read_src_param(struct shader_translator *tx, 3236 struct sm1_src_param *src, 3237 struct sm1_src_param *rel) 3238 { 3239 DWORD tok_src, tok_rel = 0; 3240 3241 sm1_parse_get_param(tx, &tok_src, &tok_rel); 3242 sm1_parse_src_param(src, tok_src); 3243 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) { 3244 assert(rel); 3245 sm1_parse_src_param(rel, tok_rel); 3246 src->rel = rel; 3247 } 3248 } 3249 3250 static void 3251 sm1_read_semantic(struct shader_translator *tx, 3252 struct sm1_semantic *sem) 3253 { 3254 const DWORD tok_usg = TOKEN_NEXT(tx); 3255 const DWORD tok_dst = TOKEN_NEXT(tx); 3256 3257 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT; 3258 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT; 3259 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT; 3260 3261 sm1_parse_dst_param(&sem->reg, tok_dst); 3262 } 3263 3264 static void 3265 sm1_parse_instruction(struct shader_translator *tx) 3266 { 3267 struct sm1_instruction *insn = &tx->insn; 3268 HRESULT hr; 3269 DWORD tok; 3270 struct sm1_op_info *info = NULL; 3271 unsigned i; 3272 3273 sm1_parse_comments(tx, TRUE); 3274 sm1_parse_get_skip(tx); 3275 3276 tok = TOKEN_NEXT(tx); 3277 3278 insn->opcode = tok & D3DSI_OPCODE_MASK; 3279 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT; 3280 insn->coissue = !!(tok & D3DSI_COISSUE); 3281 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED); 3282 3283 if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) { 3284 int k = tx->op_info_map[insn->opcode]; 3285 if (k >= 0) { 3286 assert(k < ARRAY_SIZE(inst_table)); 3287 info = &inst_table[k]; 3288 } 3289 } else { 3290 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase; 3291 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment; 3292 } 3293 if (!info) { 3294 DBG("illegal or unhandled opcode: %08x\n", insn->opcode); 3295 TOKEN_JUMP(tx); 3296 return; 3297 } 3298 insn->info = info; 3299 insn->ndst = info->ndst; 3300 insn->nsrc = info->nsrc; 3301 3302 assert(!insn->predicated && "TODO: predicated instructions"); 3303 3304 /* check version */ 3305 { 3306 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min; 3307 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max; 3308 unsigned ver = (tx->version.major << 8) | tx->version.minor; 3309 if (ver < min || ver > max) { 3310 DBG("opcode not supported in this shader version: %x <= %x <= %x\n", 3311 min, ver, max); 3312 return; 3313 } 3314 } 3315 3316 for (i = 0; i < insn->ndst; ++i) 3317 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]); 3318 if (insn->predicated) 3319 sm1_read_src_param(tx, &insn->pred, NULL); 3320 for (i = 0; i < insn->nsrc; ++i) 3321 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]); 3322 3323 /* parse here so we can dump them before processing */ 3324 if (insn->opcode == D3DSIO_DEF || 3325 insn->opcode == D3DSIO_DEFI || 3326 insn->opcode == D3DSIO_DEFB) 3327 sm1_parse_immediate(tx, &tx->insn.src[0]); 3328 3329 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth); 3330 sm1_instruction_check(insn); 3331 3332 if (info->handler) 3333 hr = info->handler(tx); 3334 else 3335 hr = NineTranslateInstruction_Generic(tx); 3336 tx_apply_dst0_modifiers(tx); 3337 3338 if (hr != D3D_OK) 3339 tx->failure = TRUE; 3340 tx->num_scratch = 0; /* reset */ 3341 3342 TOKEN_JUMP(tx); 3343 } 3344 3345 static void 3346 tx_ctor(struct shader_translator *tx, struct nine_shader_info *info) 3347 { 3348 unsigned i; 3349 3350 tx->info = info; 3351 3352 tx->byte_code = info->byte_code; 3353 tx->parse = info->byte_code; 3354 3355 for (i = 0; i < ARRAY_SIZE(info->input_map); ++i) 3356 info->input_map[i] = NINE_DECLUSAGE_NONE; 3357 info->num_inputs = 0; 3358 3359 info->position_t = FALSE; 3360 info->point_size = FALSE; 3361 3362 tx->info->const_float_slots = 0; 3363 tx->info->const_int_slots = 0; 3364 tx->info->const_bool_slots = 0; 3365 3366 info->sampler_mask = 0x0; 3367 info->rt_mask = 0x0; 3368 3369 info->lconstf.data = NULL; 3370 info->lconstf.ranges = NULL; 3371 3372 info->bumpenvmat_needed = 0; 3373 3374 for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) { 3375 tx->regs.rL[i] = ureg_dst_undef(); 3376 } 3377 tx->regs.address = ureg_dst_undef(); 3378 tx->regs.a0 = ureg_dst_undef(); 3379 tx->regs.p = ureg_dst_undef(); 3380 tx->regs.oDepth = ureg_dst_undef(); 3381 tx->regs.vPos = ureg_src_undef(); 3382 tx->regs.vFace = ureg_src_undef(); 3383 for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i) 3384 tx->regs.o[i] = ureg_dst_undef(); 3385 for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i) 3386 tx->regs.oCol[i] = ureg_dst_undef(); 3387 for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i) 3388 tx->regs.vC[i] = ureg_src_undef(); 3389 for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i) 3390 tx->regs.vT[i] = ureg_src_undef(); 3391 3392 sm1_read_version(tx); 3393 3394 info->version = (tx->version.major << 4) | tx->version.minor; 3395 3396 tx->num_outputs = 0; 3397 3398 create_op_info_map(tx); 3399 } 3400 3401 static void 3402 tx_dtor(struct shader_translator *tx) 3403 { 3404 if (tx->num_inst_labels) 3405 FREE(tx->inst_labels); 3406 FREE(tx->lconstf); 3407 FREE(tx->regs.r); 3408 FREE(tx); 3409 } 3410 3411 /* CONST[0].xyz = width/2, -height/2, zmax-zmin 3412 * CONST[1].xyz = x+width/2, y+height/2, zmin */ 3413 static void 3414 shader_add_vs_viewport_transform(struct shader_translator *tx) 3415 { 3416 struct ureg_program *ureg = tx->ureg; 3417 struct ureg_src c0 = NINE_CONSTANT_SRC(0); 3418 struct ureg_src c1 = NINE_CONSTANT_SRC(1); 3419 /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/ 3420 3421 c0 = ureg_src_dimension(c0, 4); 3422 c1 = ureg_src_dimension(c1, 4); 3423 /* TODO: find out when we need to apply the viewport transformation or not. 3424 * Likely will be XYZ vs XYZRHW in vdecl_out 3425 * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0); 3426 * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1); 3427 */ 3428 ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos)); 3429 } 3430 3431 static void 3432 shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col) 3433 { 3434 struct ureg_program *ureg = tx->ureg; 3435 struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); 3436 struct ureg_src fog_end, fog_coeff, fog_density; 3437 struct ureg_src fog_vs, depth, fog_color; 3438 struct ureg_dst fog_factor; 3439 3440 if (!tx->info->fog_enable) { 3441 ureg_MOV(ureg, oCol0, src_col); 3442 return; 3443 } 3444 3445 if (tx->info->fog_mode != D3DFOG_NONE) { 3446 depth = nine_get_position_input(tx); 3447 depth = ureg_scalar(depth, TGSI_SWIZZLE_Z); 3448 } 3449 3450 nine_info_mark_const_f_used(tx->info, 33); 3451 fog_color = NINE_CONSTANT_SRC(32); 3452 fog_factor = tx_scratch_scalar(tx); 3453 3454 if (tx->info->fog_mode == D3DFOG_LINEAR) { 3455 fog_end = NINE_CONSTANT_SRC_SWIZZLE(33, X); 3456 fog_coeff = NINE_CONSTANT_SRC_SWIZZLE(33, Y); 3457 ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(depth)); 3458 ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff); 3459 } else if (tx->info->fog_mode == D3DFOG_EXP) { 3460 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X); 3461 ureg_MUL(ureg, fog_factor, depth, fog_density); 3462 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f)); 3463 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor)); 3464 } else if (tx->info->fog_mode == D3DFOG_EXP2) { 3465 fog_density = NINE_CONSTANT_SRC_SWIZZLE(33, X); 3466 ureg_MUL(ureg, fog_factor, depth, fog_density); 3467 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor)); 3468 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f)); 3469 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor)); 3470 } else { 3471 fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0, 3472 TGSI_INTERPOLATE_PERSPECTIVE), 3473 TGSI_SWIZZLE_X); 3474 ureg_MOV(ureg, fog_factor, fog_vs); 3475 } 3476 3477 ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ), 3478 tx_src_scalar(fog_factor), src_col, fog_color); 3479 ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col); 3480 } 3481 3482 #define GET_CAP(n) screen->get_param( \ 3483 screen, PIPE_CAP_##n) 3484 #define GET_SHADER_CAP(n) screen->get_shader_param( \ 3485 screen, info->type, PIPE_SHADER_CAP_##n) 3486 3487 HRESULT 3488 nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe) 3489 { 3490 struct shader_translator *tx; 3491 HRESULT hr = D3D_OK; 3492 const unsigned processor = info->type; 3493 struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen; 3494 3495 user_assert(processor != ~0, D3DERR_INVALIDCALL); 3496 3497 tx = CALLOC_STRUCT(shader_translator); 3498 if (!tx) 3499 return E_OUTOFMEMORY; 3500 tx_ctor(tx, info); 3501 3502 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) { 3503 hr = D3DERR_INVALIDCALL; 3504 DBG("Unsupported shader version: %u.%u !\n", 3505 tx->version.major, tx->version.minor); 3506 goto out; 3507 } 3508 if (tx->processor != processor) { 3509 hr = D3DERR_INVALIDCALL; 3510 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor); 3511 goto out; 3512 } 3513 DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS", 3514 tx->version.major, tx->version.minor); 3515 3516 tx->ureg = ureg_create(processor); 3517 if (!tx->ureg) { 3518 hr = E_OUTOFMEMORY; 3519 goto out; 3520 } 3521 3522 tx->native_integers = GET_SHADER_CAP(INTEGERS); 3523 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES); 3524 tx->lower_preds = !GET_SHADER_CAP(MAX_PREDS); 3525 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD); 3526 tx->shift_wpos = !GET_CAP(TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 3527 tx->texcoord_sn = tx->want_texcoord ? 3528 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC; 3529 tx->wpos_is_sysval = GET_CAP(TGSI_FS_POSITION_IS_SYSVAL); 3530 tx->face_is_sysval_integer = GET_CAP(TGSI_FS_FACE_IS_INTEGER_SYSVAL); 3531 3532 if (IS_VS) { 3533 tx->num_constf_allowed = NINE_MAX_CONST_F; 3534 } else if (tx->version.major < 2) {/* IS_PS v1 */ 3535 tx->num_constf_allowed = 8; 3536 } else if (tx->version.major == 2) {/* IS_PS v2 */ 3537 tx->num_constf_allowed = 32; 3538 } else {/* IS_PS v3 */ 3539 tx->num_constf_allowed = NINE_MAX_CONST_F_PS3; 3540 } 3541 3542 if (tx->version.major < 2) { 3543 tx->num_consti_allowed = 0; 3544 tx->num_constb_allowed = 0; 3545 } else { 3546 tx->num_consti_allowed = NINE_MAX_CONST_I; 3547 tx->num_constb_allowed = NINE_MAX_CONST_B; 3548 } 3549 3550 if (IS_VS && tx->version.major >= 2 && info->swvp_on) { 3551 tx->num_constf_allowed = 8192; 3552 tx->num_consti_allowed = 2048; 3553 tx->num_constb_allowed = 2048; 3554 } 3555 3556 /* VS must always write position. Declare it here to make it the 1st output. 3557 * (Some drivers like nv50 are buggy and rely on that.) 3558 */ 3559 if (IS_VS) { 3560 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0); 3561 } else { 3562 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT); 3563 if (!tx->shift_wpos) 3564 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 3565 } 3566 3567 while (!sm1_parse_eof(tx) && !tx->failure) 3568 sm1_parse_instruction(tx); 3569 tx->parse++; /* for byte_size */ 3570 3571 if (tx->failure) { 3572 /* For VS shaders, we print the warning later, 3573 * we first try with swvp. */ 3574 if (IS_PS) 3575 ERR("Encountered buggy shader\n"); 3576 ureg_destroy(tx->ureg); 3577 hr = D3DERR_INVALIDCALL; 3578 goto out; 3579 } 3580 3581 if (IS_PS && tx->version.major < 3) { 3582 if (tx->version.major < 2) { 3583 assert(tx->num_temp); /* there must be color output */ 3584 info->rt_mask |= 0x1; 3585 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0])); 3586 } else { 3587 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0])); 3588 } 3589 } 3590 3591 if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) { 3592 tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_FOG, 0); 3593 ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f)); 3594 } 3595 3596 if (info->position_t) 3597 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE); 3598 3599 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) { 3600 struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0); 3601 ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min)); 3602 ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max)); 3603 info->point_size = TRUE; 3604 } 3605 3606 if (info->process_vertices) 3607 shader_add_vs_viewport_transform(tx); 3608 3609 ureg_END(tx->ureg); 3610 3611 /* record local constants */ 3612 if (tx->num_lconstf && tx->indirect_const_access) { 3613 struct nine_range *ranges; 3614 float *data; 3615 int *indices; 3616 unsigned i, k, n; 3617 3618 hr = E_OUTOFMEMORY; 3619 3620 data = MALLOC(tx->num_lconstf * 4 * sizeof(float)); 3621 if (!data) 3622 goto out; 3623 info->lconstf.data = data; 3624 3625 indices = MALLOC(tx->num_lconstf * sizeof(indices[0])); 3626 if (!indices) 3627 goto out; 3628 3629 /* lazy sort, num_lconstf should be small */ 3630 for (n = 0; n < tx->num_lconstf; ++n) { 3631 for (k = 0, i = 0; i < tx->num_lconstf; ++i) { 3632 if (tx->lconstf[i].idx < tx->lconstf[k].idx) 3633 k = i; 3634 } 3635 indices[n] = tx->lconstf[k].idx; 3636 memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float)); 3637 tx->lconstf[k].idx = INT_MAX; 3638 } 3639 3640 /* count ranges */ 3641 for (n = 1, i = 1; i < tx->num_lconstf; ++i) 3642 if (indices[i] != indices[i - 1] + 1) 3643 ++n; 3644 ranges = MALLOC(n * sizeof(ranges[0])); 3645 if (!ranges) { 3646 FREE(indices); 3647 goto out; 3648 } 3649 info->lconstf.ranges = ranges; 3650 3651 k = 0; 3652 ranges[k].bgn = indices[0]; 3653 for (i = 1; i < tx->num_lconstf; ++i) { 3654 if (indices[i] != indices[i - 1] + 1) { 3655 ranges[k].next = &ranges[k + 1]; 3656 ranges[k].end = indices[i - 1] + 1; 3657 ++k; 3658 ranges[k].bgn = indices[i]; 3659 } 3660 } 3661 ranges[k].end = indices[i - 1] + 1; 3662 ranges[k].next = NULL; 3663 assert(n == (k + 1)); 3664 3665 FREE(indices); 3666 hr = D3D_OK; 3667 } 3668 3669 /* r500 */ 3670 if (info->const_float_slots > device->max_vs_const_f && 3671 (info->const_int_slots || info->const_bool_slots) && 3672 (!IS_VS || !info->swvp_on)) 3673 ERR("Overlapping constant slots. The shader is likely to be buggy\n"); 3674 3675 3676 if (tx->indirect_const_access) /* vs only */ 3677 info->const_float_slots = device->max_vs_const_f; 3678 3679 if (!IS_VS || !info->swvp_on) { 3680 unsigned s, slot_max; 3681 unsigned max_const_f = IS_VS ? device->max_vs_const_f : device->max_ps_const_f; 3682 3683 slot_max = info->const_bool_slots > 0 ? 3684 max_const_f + NINE_MAX_CONST_I 3685 + DIV_ROUND_UP(info->const_bool_slots, 4) : 3686 info->const_int_slots > 0 ? 3687 max_const_f + info->const_int_slots : 3688 info->const_float_slots; 3689 3690 info->const_used_size = sizeof(float[4]) * slot_max; /* slots start from 1 */ 3691 3692 for (s = 0; s < slot_max; s++) 3693 ureg_DECL_constant(tx->ureg, s); 3694 } else { 3695 ureg_DECL_constant2D(tx->ureg, 0, 4095, 0); 3696 ureg_DECL_constant2D(tx->ureg, 0, 4095, 1); 3697 ureg_DECL_constant2D(tx->ureg, 0, 2047, 2); 3698 ureg_DECL_constant2D(tx->ureg, 0, 511, 3); 3699 } 3700 3701 if (info->process_vertices) 3702 ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */ 3703 3704 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) { 3705 unsigned count; 3706 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, &count); 3707 tgsi_dump(toks, 0); 3708 ureg_free_tokens(toks); 3709 } 3710 3711 if (info->process_vertices) { 3712 NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out, 3713 tx->output_info, 3714 tx->num_outputs, 3715 &(info->so)); 3716 info->cso = ureg_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so)); 3717 } else 3718 info->cso = ureg_create_shader_and_destroy(tx->ureg, pipe); 3719 if (!info->cso) { 3720 hr = D3DERR_DRIVERINTERNALERROR; 3721 FREE(info->lconstf.data); 3722 FREE(info->lconstf.ranges); 3723 goto out; 3724 } 3725 3726 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD); 3727 out: 3728 tx_dtor(tx); 3729 return hr; 3730 } 3731