1 /********************************************************** 2 * Copyright 2008-2009 VMware, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **********************************************************/ 25 26 27 #include "pipe/p_shader_tokens.h" 28 #include "tgsi/tgsi_dump.h" 29 #include "tgsi/tgsi_parse.h" 30 #include "util/u_memory.h" 31 #include "util/u_math.h" 32 #include "util/u_pstipple.h" 33 34 #include "svga_tgsi_emit.h" 35 #include "svga_context.h" 36 37 38 static boolean emit_vs_postamble( struct svga_shader_emitter *emit ); 39 static boolean emit_ps_postamble( struct svga_shader_emitter *emit ); 40 41 42 static unsigned 43 translate_opcode(uint opcode) 44 { 45 switch (opcode) { 46 case TGSI_OPCODE_ADD: return SVGA3DOP_ADD; 47 case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD; 48 case TGSI_OPCODE_DP3: return SVGA3DOP_DP3; 49 case TGSI_OPCODE_DP4: return SVGA3DOP_DP4; 50 case TGSI_OPCODE_FRC: return SVGA3DOP_FRC; 51 case TGSI_OPCODE_MAD: return SVGA3DOP_MAD; 52 case TGSI_OPCODE_MAX: return SVGA3DOP_MAX; 53 case TGSI_OPCODE_MIN: return SVGA3DOP_MIN; 54 case TGSI_OPCODE_MOV: return SVGA3DOP_MOV; 55 case TGSI_OPCODE_MUL: return SVGA3DOP_MUL; 56 case TGSI_OPCODE_NOP: return SVGA3DOP_NOP; 57 default: 58 assert(!"svga: unexpected opcode in translate_opcode()"); 59 return SVGA3DOP_LAST_INST; 60 } 61 } 62 63 64 static unsigned 65 translate_file(unsigned file) 66 { 67 switch (file) { 68 case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP; 69 case TGSI_FILE_INPUT: return SVGA3DREG_INPUT; 70 case TGSI_FILE_OUTPUT: return SVGA3DREG_OUTPUT; /* VS3.0+ only */ 71 case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST; 72 case TGSI_FILE_CONSTANT: return SVGA3DREG_CONST; 73 case TGSI_FILE_SAMPLER: return SVGA3DREG_SAMPLER; 74 case TGSI_FILE_ADDRESS: return SVGA3DREG_ADDR; 75 default: 76 assert(!"svga: unexpected register file in translate_file()"); 77 return SVGA3DREG_TEMP; 78 } 79 } 80 81 82 /** 83 * Translate a TGSI destination register to an SVGA3DShaderDestToken. 84 * \param insn the TGSI instruction 85 * \param idx which TGSI dest register to translate (usually (always?) zero) 86 */ 87 static SVGA3dShaderDestToken 88 translate_dst_register( struct svga_shader_emitter *emit, 89 const struct tgsi_full_instruction *insn, 90 unsigned idx ) 91 { 92 const struct tgsi_full_dst_register *reg = &insn->Dst[idx]; 93 SVGA3dShaderDestToken dest; 94 95 switch (reg->Register.File) { 96 case TGSI_FILE_OUTPUT: 97 /* Output registers encode semantic information in their name. 98 * Need to lookup a table built at decl time: 99 */ 100 dest = emit->output_map[reg->Register.Index]; 101 emit->num_output_writes++; 102 break; 103 104 default: 105 { 106 unsigned index = reg->Register.Index; 107 assert(index < SVGA3D_TEMPREG_MAX); 108 index = MIN2(index, SVGA3D_TEMPREG_MAX - 1); 109 dest = dst_register(translate_file(reg->Register.File), index); 110 } 111 break; 112 } 113 114 if (reg->Register.Indirect) { 115 debug_warning("Indirect indexing of dest registers is not supported!\n"); 116 } 117 118 dest.mask = reg->Register.WriteMask; 119 assert(dest.mask); 120 121 if (insn->Instruction.Saturate) 122 dest.dstMod = SVGA3DDSTMOD_SATURATE; 123 124 return dest; 125 } 126 127 128 /** 129 * Apply a swizzle to a src_register, returning a new src_register 130 * Ex: swizzle(SRC.ZZYY, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_X, SWIZZLE_Y) 131 * would return SRC.YYZZ 132 */ 133 static struct src_register 134 swizzle(struct src_register src, 135 unsigned x, unsigned y, unsigned z, unsigned w) 136 { 137 assert(x < 4); 138 assert(y < 4); 139 assert(z < 4); 140 assert(w < 4); 141 x = (src.base.swizzle >> (x * 2)) & 0x3; 142 y = (src.base.swizzle >> (y * 2)) & 0x3; 143 z = (src.base.swizzle >> (z * 2)) & 0x3; 144 w = (src.base.swizzle >> (w * 2)) & 0x3; 145 146 src.base.swizzle = TRANSLATE_SWIZZLE(x, y, z, w); 147 148 return src; 149 } 150 151 152 /** 153 * Apply a "scalar" swizzle to a src_register returning a new 154 * src_register where all the swizzle terms are the same. 155 * Ex: scalar(SRC.WZYX, SWIZZLE_Y) would return SRC.ZZZZ 156 */ 157 static struct src_register 158 scalar(struct src_register src, unsigned comp) 159 { 160 assert(comp < 4); 161 return swizzle( src, comp, comp, comp, comp ); 162 } 163 164 165 static boolean 166 svga_arl_needs_adjustment( const struct svga_shader_emitter *emit ) 167 { 168 unsigned i; 169 170 for (i = 0; i < emit->num_arl_consts; ++i) { 171 if (emit->arl_consts[i].arl_num == emit->current_arl) 172 return TRUE; 173 } 174 return FALSE; 175 } 176 177 178 static int 179 svga_arl_adjustment( const struct svga_shader_emitter *emit ) 180 { 181 unsigned i; 182 183 for (i = 0; i < emit->num_arl_consts; ++i) { 184 if (emit->arl_consts[i].arl_num == emit->current_arl) 185 return emit->arl_consts[i].number; 186 } 187 return 0; 188 } 189 190 191 /** 192 * Translate a TGSI src register to a src_register. 193 */ 194 static struct src_register 195 translate_src_register( const struct svga_shader_emitter *emit, 196 const struct tgsi_full_src_register *reg ) 197 { 198 struct src_register src; 199 200 switch (reg->Register.File) { 201 case TGSI_FILE_INPUT: 202 /* Input registers are referred to by their semantic name rather 203 * than by index. Use the mapping build up from the decls: 204 */ 205 src = emit->input_map[reg->Register.Index]; 206 break; 207 208 case TGSI_FILE_IMMEDIATE: 209 /* Immediates are appended after TGSI constants in the D3D 210 * constant buffer. 211 */ 212 src = src_register( translate_file( reg->Register.File ), 213 reg->Register.Index + emit->imm_start ); 214 break; 215 216 default: 217 src = src_register( translate_file( reg->Register.File ), 218 reg->Register.Index ); 219 break; 220 } 221 222 /* Indirect addressing. 223 */ 224 if (reg->Register.Indirect) { 225 if (emit->unit == PIPE_SHADER_FRAGMENT) { 226 /* Pixel shaders have only loop registers for relative 227 * addressing into inputs. Ignore the redundant address 228 * register, the contents of aL should be in sync with it. 229 */ 230 if (reg->Register.File == TGSI_FILE_INPUT) { 231 src.base.relAddr = 1; 232 src.indirect = src_token(SVGA3DREG_LOOP, 0); 233 } 234 } 235 else { 236 /* Constant buffers only. 237 */ 238 if (reg->Register.File == TGSI_FILE_CONSTANT) { 239 /* we shift the offset towards the minimum */ 240 if (svga_arl_needs_adjustment( emit )) { 241 src.base.num -= svga_arl_adjustment( emit ); 242 } 243 src.base.relAddr = 1; 244 245 /* Not really sure what should go in the second token: 246 */ 247 src.indirect = src_token( SVGA3DREG_ADDR, 248 reg->Indirect.Index ); 249 250 src.indirect.swizzle = SWIZZLE_XXXX; 251 } 252 } 253 } 254 255 src = swizzle( src, 256 reg->Register.SwizzleX, 257 reg->Register.SwizzleY, 258 reg->Register.SwizzleZ, 259 reg->Register.SwizzleW ); 260 261 /* src.mod isn't a bitfield, unfortunately: 262 * See tgsi_util_get_full_src_register_sign_mode for implementation details. 263 */ 264 if (reg->Register.Absolute) { 265 if (reg->Register.Negate) 266 src.base.srcMod = SVGA3DSRCMOD_ABSNEG; 267 else 268 src.base.srcMod = SVGA3DSRCMOD_ABS; 269 } 270 else { 271 if (reg->Register.Negate) 272 src.base.srcMod = SVGA3DSRCMOD_NEG; 273 else 274 src.base.srcMod = SVGA3DSRCMOD_NONE; 275 } 276 277 return src; 278 } 279 280 281 /* 282 * Get a temporary register. 283 * Note: if we exceed the temporary register limit we just use 284 * register SVGA3D_TEMPREG_MAX - 1. 285 */ 286 static SVGA3dShaderDestToken 287 get_temp( struct svga_shader_emitter *emit ) 288 { 289 int i = emit->nr_hw_temp + emit->internal_temp_count++; 290 if (i >= SVGA3D_TEMPREG_MAX) { 291 debug_warn_once("svga: Too many temporary registers used in shader\n"); 292 i = SVGA3D_TEMPREG_MAX - 1; 293 } 294 return dst_register( SVGA3DREG_TEMP, i ); 295 } 296 297 298 /** 299 * Release a single temp. Currently only effective if it was the last 300 * allocated temp, otherwise release will be delayed until the next 301 * call to reset_temp_regs(). 302 */ 303 static void 304 release_temp( struct svga_shader_emitter *emit, 305 SVGA3dShaderDestToken temp ) 306 { 307 if (temp.num == emit->internal_temp_count - 1) 308 emit->internal_temp_count--; 309 } 310 311 312 /** 313 * Release all temps. 314 */ 315 static void 316 reset_temp_regs(struct svga_shader_emitter *emit) 317 { 318 emit->internal_temp_count = 0; 319 } 320 321 322 /** Emit bytecode for a src_register */ 323 static boolean 324 emit_src(struct svga_shader_emitter *emit, const struct src_register src) 325 { 326 if (src.base.relAddr) { 327 assert(src.base.reserved0); 328 assert(src.indirect.reserved0); 329 return (svga_shader_emit_dword( emit, src.base.value ) && 330 svga_shader_emit_dword( emit, src.indirect.value )); 331 } 332 else { 333 assert(src.base.reserved0); 334 return svga_shader_emit_dword( emit, src.base.value ); 335 } 336 } 337 338 339 /** Emit bytecode for a dst_register */ 340 static boolean 341 emit_dst(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dest) 342 { 343 assert(dest.reserved0); 344 assert(dest.mask); 345 return svga_shader_emit_dword( emit, dest.value ); 346 } 347 348 349 /** Emit bytecode for a 1-operand instruction */ 350 static boolean 351 emit_op1(struct svga_shader_emitter *emit, 352 SVGA3dShaderInstToken inst, 353 SVGA3dShaderDestToken dest, 354 struct src_register src0) 355 { 356 return (emit_instruction(emit, inst) && 357 emit_dst(emit, dest) && 358 emit_src(emit, src0)); 359 } 360 361 362 /** Emit bytecode for a 2-operand instruction */ 363 static boolean 364 emit_op2(struct svga_shader_emitter *emit, 365 SVGA3dShaderInstToken inst, 366 SVGA3dShaderDestToken dest, 367 struct src_register src0, 368 struct src_register src1) 369 { 370 return (emit_instruction(emit, inst) && 371 emit_dst(emit, dest) && 372 emit_src(emit, src0) && 373 emit_src(emit, src1)); 374 } 375 376 377 /** Emit bytecode for a 3-operand instruction */ 378 static boolean 379 emit_op3(struct svga_shader_emitter *emit, 380 SVGA3dShaderInstToken inst, 381 SVGA3dShaderDestToken dest, 382 struct src_register src0, 383 struct src_register src1, 384 struct src_register src2) 385 { 386 return (emit_instruction(emit, inst) && 387 emit_dst(emit, dest) && 388 emit_src(emit, src0) && 389 emit_src(emit, src1) && 390 emit_src(emit, src2)); 391 } 392 393 394 /** Emit bytecode for a 4-operand instruction */ 395 static boolean 396 emit_op4(struct svga_shader_emitter *emit, 397 SVGA3dShaderInstToken inst, 398 SVGA3dShaderDestToken dest, 399 struct src_register src0, 400 struct src_register src1, 401 struct src_register src2, 402 struct src_register src3) 403 { 404 return (emit_instruction(emit, inst) && 405 emit_dst(emit, dest) && 406 emit_src(emit, src0) && 407 emit_src(emit, src1) && 408 emit_src(emit, src2) && 409 emit_src(emit, src3)); 410 } 411 412 413 /** 414 * Apply the absolute value modifier to the given src_register, returning 415 * a new src_register. 416 */ 417 static struct src_register 418 absolute(struct src_register src) 419 { 420 src.base.srcMod = SVGA3DSRCMOD_ABS; 421 return src; 422 } 423 424 425 /** 426 * Apply the negation modifier to the given src_register, returning 427 * a new src_register. 428 */ 429 static struct src_register 430 negate(struct src_register src) 431 { 432 switch (src.base.srcMod) { 433 case SVGA3DSRCMOD_ABS: 434 src.base.srcMod = SVGA3DSRCMOD_ABSNEG; 435 break; 436 case SVGA3DSRCMOD_ABSNEG: 437 src.base.srcMod = SVGA3DSRCMOD_ABS; 438 break; 439 case SVGA3DSRCMOD_NEG: 440 src.base.srcMod = SVGA3DSRCMOD_NONE; 441 break; 442 case SVGA3DSRCMOD_NONE: 443 src.base.srcMod = SVGA3DSRCMOD_NEG; 444 break; 445 } 446 return src; 447 } 448 449 450 451 /* Replace the src with the temporary specified in the dst, but copying 452 * only the necessary channels, and preserving the original swizzle (which is 453 * important given that several opcodes have constraints in the allowed 454 * swizzles). 455 */ 456 static boolean 457 emit_repl(struct svga_shader_emitter *emit, 458 SVGA3dShaderDestToken dst, 459 struct src_register *src0) 460 { 461 unsigned src0_swizzle; 462 unsigned chan; 463 464 assert(SVGA3dShaderGetRegType(dst.value) == SVGA3DREG_TEMP); 465 466 src0_swizzle = src0->base.swizzle; 467 468 dst.mask = 0; 469 for (chan = 0; chan < 4; ++chan) { 470 unsigned swizzle = (src0_swizzle >> (chan *2)) & 0x3; 471 dst.mask |= 1 << swizzle; 472 } 473 assert(dst.mask); 474 475 src0->base.swizzle = SVGA3DSWIZZLE_NONE; 476 477 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, *src0 )) 478 return FALSE; 479 480 *src0 = src( dst ); 481 src0->base.swizzle = src0_swizzle; 482 483 return TRUE; 484 } 485 486 487 /** 488 * Submit/emit an instruction with zero operands. 489 */ 490 static boolean 491 submit_op0(struct svga_shader_emitter *emit, 492 SVGA3dShaderInstToken inst, 493 SVGA3dShaderDestToken dest) 494 { 495 return (emit_instruction( emit, inst ) && 496 emit_dst( emit, dest )); 497 } 498 499 500 /** 501 * Submit/emit an instruction with one operand. 502 */ 503 static boolean 504 submit_op1(struct svga_shader_emitter *emit, 505 SVGA3dShaderInstToken inst, 506 SVGA3dShaderDestToken dest, 507 struct src_register src0) 508 { 509 return emit_op1( emit, inst, dest, src0 ); 510 } 511 512 513 /** 514 * Submit/emit an instruction with two operands. 515 * 516 * SVGA shaders may not refer to >1 constant register in a single 517 * instruction. This function checks for that usage and inserts a 518 * move to temporary if detected. 519 * 520 * The same applies to input registers -- at most a single input 521 * register may be read by any instruction. 522 */ 523 static boolean 524 submit_op2(struct svga_shader_emitter *emit, 525 SVGA3dShaderInstToken inst, 526 SVGA3dShaderDestToken dest, 527 struct src_register src0, 528 struct src_register src1) 529 { 530 SVGA3dShaderDestToken temp; 531 SVGA3dShaderRegType type0, type1; 532 boolean need_temp = FALSE; 533 534 temp.value = 0; 535 type0 = SVGA3dShaderGetRegType( src0.base.value ); 536 type1 = SVGA3dShaderGetRegType( src1.base.value ); 537 538 if (type0 == SVGA3DREG_CONST && 539 type1 == SVGA3DREG_CONST && 540 src0.base.num != src1.base.num) 541 need_temp = TRUE; 542 543 if (type0 == SVGA3DREG_INPUT && 544 type1 == SVGA3DREG_INPUT && 545 src0.base.num != src1.base.num) 546 need_temp = TRUE; 547 548 if (need_temp) { 549 temp = get_temp( emit ); 550 551 if (!emit_repl( emit, temp, &src0 )) 552 return FALSE; 553 } 554 555 if (!emit_op2( emit, inst, dest, src0, src1 )) 556 return FALSE; 557 558 if (need_temp) 559 release_temp( emit, temp ); 560 561 return TRUE; 562 } 563 564 565 /** 566 * Submit/emit an instruction with three operands. 567 * 568 * SVGA shaders may not refer to >1 constant register in a single 569 * instruction. This function checks for that usage and inserts a 570 * move to temporary if detected. 571 */ 572 static boolean 573 submit_op3(struct svga_shader_emitter *emit, 574 SVGA3dShaderInstToken inst, 575 SVGA3dShaderDestToken dest, 576 struct src_register src0, 577 struct src_register src1, 578 struct src_register src2) 579 { 580 SVGA3dShaderDestToken temp0; 581 SVGA3dShaderDestToken temp1; 582 boolean need_temp0 = FALSE; 583 boolean need_temp1 = FALSE; 584 SVGA3dShaderRegType type0, type1, type2; 585 586 temp0.value = 0; 587 temp1.value = 0; 588 type0 = SVGA3dShaderGetRegType( src0.base.value ); 589 type1 = SVGA3dShaderGetRegType( src1.base.value ); 590 type2 = SVGA3dShaderGetRegType( src2.base.value ); 591 592 if (inst.op != SVGA3DOP_SINCOS) { 593 if (type0 == SVGA3DREG_CONST && 594 ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) || 595 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num))) 596 need_temp0 = TRUE; 597 598 if (type1 == SVGA3DREG_CONST && 599 (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num)) 600 need_temp1 = TRUE; 601 } 602 603 if (type0 == SVGA3DREG_INPUT && 604 ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) || 605 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num))) 606 need_temp0 = TRUE; 607 608 if (type1 == SVGA3DREG_INPUT && 609 (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num)) 610 need_temp1 = TRUE; 611 612 if (need_temp0) { 613 temp0 = get_temp( emit ); 614 615 if (!emit_repl( emit, temp0, &src0 )) 616 return FALSE; 617 } 618 619 if (need_temp1) { 620 temp1 = get_temp( emit ); 621 622 if (!emit_repl( emit, temp1, &src1 )) 623 return FALSE; 624 } 625 626 if (!emit_op3( emit, inst, dest, src0, src1, src2 )) 627 return FALSE; 628 629 if (need_temp1) 630 release_temp( emit, temp1 ); 631 if (need_temp0) 632 release_temp( emit, temp0 ); 633 return TRUE; 634 } 635 636 637 /** 638 * Submit/emit an instruction with four operands. 639 * 640 * SVGA shaders may not refer to >1 constant register in a single 641 * instruction. This function checks for that usage and inserts a 642 * move to temporary if detected. 643 */ 644 static boolean 645 submit_op4(struct svga_shader_emitter *emit, 646 SVGA3dShaderInstToken inst, 647 SVGA3dShaderDestToken dest, 648 struct src_register src0, 649 struct src_register src1, 650 struct src_register src2, 651 struct src_register src3) 652 { 653 SVGA3dShaderDestToken temp0; 654 SVGA3dShaderDestToken temp3; 655 boolean need_temp0 = FALSE; 656 boolean need_temp3 = FALSE; 657 SVGA3dShaderRegType type0, type1, type2, type3; 658 659 temp0.value = 0; 660 temp3.value = 0; 661 type0 = SVGA3dShaderGetRegType( src0.base.value ); 662 type1 = SVGA3dShaderGetRegType( src1.base.value ); 663 type2 = SVGA3dShaderGetRegType( src2.base.value ); 664 type3 = SVGA3dShaderGetRegType( src2.base.value ); 665 666 /* Make life a little easier - this is only used by the TXD 667 * instruction which is guaranteed not to have a constant/input reg 668 * in one slot at least: 669 */ 670 assert(type1 == SVGA3DREG_SAMPLER); 671 (void) type1; 672 673 if (type0 == SVGA3DREG_CONST && 674 ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) || 675 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num))) 676 need_temp0 = TRUE; 677 678 if (type3 == SVGA3DREG_CONST && 679 (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num)) 680 need_temp3 = TRUE; 681 682 if (type0 == SVGA3DREG_INPUT && 683 ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) || 684 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num))) 685 need_temp0 = TRUE; 686 687 if (type3 == SVGA3DREG_INPUT && 688 (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num)) 689 need_temp3 = TRUE; 690 691 if (need_temp0) { 692 temp0 = get_temp( emit ); 693 694 if (!emit_repl( emit, temp0, &src0 )) 695 return FALSE; 696 } 697 698 if (need_temp3) { 699 temp3 = get_temp( emit ); 700 701 if (!emit_repl( emit, temp3, &src3 )) 702 return FALSE; 703 } 704 705 if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 )) 706 return FALSE; 707 708 if (need_temp3) 709 release_temp( emit, temp3 ); 710 if (need_temp0) 711 release_temp( emit, temp0 ); 712 return TRUE; 713 } 714 715 716 /** 717 * Do the src and dest registers refer to the same register? 718 */ 719 static boolean 720 alias_src_dst(struct src_register src, 721 SVGA3dShaderDestToken dst) 722 { 723 if (src.base.num != dst.num) 724 return FALSE; 725 726 if (SVGA3dShaderGetRegType(dst.value) != 727 SVGA3dShaderGetRegType(src.base.value)) 728 return FALSE; 729 730 return TRUE; 731 } 732 733 734 /** 735 * Helper for emitting SVGA immediate values using the SVGA3DOP_DEF[I] 736 * instructions. 737 */ 738 static boolean 739 emit_def_const(struct svga_shader_emitter *emit, 740 SVGA3dShaderConstType type, 741 unsigned idx, float a, float b, float c, float d) 742 { 743 SVGA3DOpDefArgs def; 744 SVGA3dShaderInstToken opcode; 745 746 switch (type) { 747 case SVGA3D_CONST_TYPE_FLOAT: 748 opcode = inst_token( SVGA3DOP_DEF ); 749 def.dst = dst_register( SVGA3DREG_CONST, idx ); 750 def.constValues[0] = a; 751 def.constValues[1] = b; 752 def.constValues[2] = c; 753 def.constValues[3] = d; 754 break; 755 case SVGA3D_CONST_TYPE_INT: 756 opcode = inst_token( SVGA3DOP_DEFI ); 757 def.dst = dst_register( SVGA3DREG_CONSTINT, idx ); 758 def.constIValues[0] = (int)a; 759 def.constIValues[1] = (int)b; 760 def.constIValues[2] = (int)c; 761 def.constIValues[3] = (int)d; 762 break; 763 default: 764 assert(0); 765 opcode = inst_token( SVGA3DOP_NOP ); 766 break; 767 } 768 769 if (!emit_instruction(emit, opcode) || 770 !svga_shader_emit_dwords( emit, def.values, ARRAY_SIZE(def.values))) 771 return FALSE; 772 773 return TRUE; 774 } 775 776 777 static boolean 778 create_loop_const( struct svga_shader_emitter *emit ) 779 { 780 unsigned idx = emit->nr_hw_int_const++; 781 782 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx, 783 255, /* iteration count */ 784 0, /* initial value */ 785 1, /* step size */ 786 0 /* not used, must be 0 */)) 787 return FALSE; 788 789 emit->loop_const_idx = idx; 790 emit->created_loop_const = TRUE; 791 792 return TRUE; 793 } 794 795 static boolean 796 create_arl_consts( struct svga_shader_emitter *emit ) 797 { 798 int i; 799 800 for (i = 0; i < emit->num_arl_consts; i += 4) { 801 int j; 802 unsigned idx = emit->nr_hw_float_const++; 803 float vals[4]; 804 for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) { 805 vals[j] = (float) emit->arl_consts[i + j].number; 806 emit->arl_consts[i + j].idx = idx; 807 switch (j) { 808 case 0: 809 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X; 810 break; 811 case 1: 812 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y; 813 break; 814 case 2: 815 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z; 816 break; 817 case 3: 818 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W; 819 break; 820 } 821 } 822 while (j < 4) 823 vals[j++] = 0; 824 825 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, 826 vals[0], vals[1], 827 vals[2], vals[3])) 828 return FALSE; 829 } 830 831 return TRUE; 832 } 833 834 835 /** 836 * Return the register which holds the pixel shaders front/back- 837 * facing value. 838 */ 839 static struct src_register 840 get_vface( struct svga_shader_emitter *emit ) 841 { 842 assert(emit->emitted_vface); 843 return src_register(SVGA3DREG_MISCTYPE, SVGA3DMISCREG_FACE); 844 } 845 846 847 /** 848 * Create/emit a "common" constant with values {0, 0.5, -1, 1}. 849 * We can swizzle this to produce other useful constants such as 850 * {0, 0, 0, 0}, {1, 1, 1, 1}, etc. 851 */ 852 static boolean 853 create_common_immediate( struct svga_shader_emitter *emit ) 854 { 855 unsigned idx = emit->nr_hw_float_const++; 856 857 /* Emit the constant (0, 0.5, -1, 1) and use swizzling to generate 858 * other useful vectors. 859 */ 860 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, 861 idx, 0.0f, 0.5f, -1.0f, 1.0f )) 862 return FALSE; 863 emit->common_immediate_idx[0] = idx; 864 idx++; 865 866 /* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */ 867 if (emit->key.vs.adjust_attrib_range) { 868 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, 869 idx, 2.0f, 0.0f, 0.0f, 0.0f )) 870 return FALSE; 871 emit->common_immediate_idx[1] = idx; 872 } 873 else { 874 emit->common_immediate_idx[1] = -1; 875 } 876 877 emit->created_common_immediate = TRUE; 878 879 return TRUE; 880 } 881 882 883 /** 884 * Return swizzle/position for the given value in the "common" immediate. 885 */ 886 static inline unsigned 887 common_immediate_swizzle(float value) 888 { 889 if (value == 0.0f) 890 return TGSI_SWIZZLE_X; 891 else if (value == 0.5f) 892 return TGSI_SWIZZLE_Y; 893 else if (value == -1.0f) 894 return TGSI_SWIZZLE_Z; 895 else if (value == 1.0f) 896 return TGSI_SWIZZLE_W; 897 else { 898 assert(!"illegal value in common_immediate_swizzle"); 899 return TGSI_SWIZZLE_X; 900 } 901 } 902 903 904 /** 905 * Returns an immediate reg where all the terms are either 0, 1, 2 or 0.5 906 */ 907 static struct src_register 908 get_immediate(struct svga_shader_emitter *emit, 909 float x, float y, float z, float w) 910 { 911 unsigned sx = common_immediate_swizzle(x); 912 unsigned sy = common_immediate_swizzle(y); 913 unsigned sz = common_immediate_swizzle(z); 914 unsigned sw = common_immediate_swizzle(w); 915 assert(emit->created_common_immediate); 916 assert(emit->common_immediate_idx[0] >= 0); 917 return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]), 918 sx, sy, sz, sw); 919 } 920 921 922 /** 923 * returns {0, 0, 0, 0} immediate 924 */ 925 static struct src_register 926 get_zero_immediate( struct svga_shader_emitter *emit ) 927 { 928 assert(emit->created_common_immediate); 929 assert(emit->common_immediate_idx[0] >= 0); 930 return swizzle(src_register( SVGA3DREG_CONST, 931 emit->common_immediate_idx[0]), 932 0, 0, 0, 0); 933 } 934 935 936 /** 937 * returns {1, 1, 1, 1} immediate 938 */ 939 static struct src_register 940 get_one_immediate( struct svga_shader_emitter *emit ) 941 { 942 assert(emit->created_common_immediate); 943 assert(emit->common_immediate_idx[0] >= 0); 944 return swizzle(src_register( SVGA3DREG_CONST, 945 emit->common_immediate_idx[0]), 946 3, 3, 3, 3); 947 } 948 949 950 /** 951 * returns {0.5, 0.5, 0.5, 0.5} immediate 952 */ 953 static struct src_register 954 get_half_immediate( struct svga_shader_emitter *emit ) 955 { 956 assert(emit->created_common_immediate); 957 assert(emit->common_immediate_idx[0] >= 0); 958 return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]), 959 1, 1, 1, 1); 960 } 961 962 963 /** 964 * returns {2, 2, 2, 2} immediate 965 */ 966 static struct src_register 967 get_two_immediate( struct svga_shader_emitter *emit ) 968 { 969 /* Note we use the second common immediate here */ 970 assert(emit->created_common_immediate); 971 assert(emit->common_immediate_idx[1] >= 0); 972 return swizzle(src_register( SVGA3DREG_CONST, 973 emit->common_immediate_idx[1]), 974 0, 0, 0, 0); 975 } 976 977 978 /** 979 * returns the loop const 980 */ 981 static struct src_register 982 get_loop_const( struct svga_shader_emitter *emit ) 983 { 984 assert(emit->created_loop_const); 985 assert(emit->loop_const_idx >= 0); 986 return src_register( SVGA3DREG_CONSTINT, 987 emit->loop_const_idx ); 988 } 989 990 991 static struct src_register 992 get_fake_arl_const( struct svga_shader_emitter *emit ) 993 { 994 struct src_register reg; 995 int idx = 0, swizzle = 0, i; 996 997 for (i = 0; i < emit->num_arl_consts; ++ i) { 998 if (emit->arl_consts[i].arl_num == emit->current_arl) { 999 idx = emit->arl_consts[i].idx; 1000 swizzle = emit->arl_consts[i].swizzle; 1001 } 1002 } 1003 1004 reg = src_register( SVGA3DREG_CONST, idx ); 1005 return scalar(reg, swizzle); 1006 } 1007 1008 1009 /** 1010 * Return a register which holds the width and height of the texture 1011 * currently bound to the given sampler. 1012 */ 1013 static struct src_register 1014 get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num ) 1015 { 1016 int idx; 1017 struct src_register reg; 1018 1019 /* the width/height indexes start right after constants */ 1020 idx = emit->key.tex[sampler_num].width_height_idx + 1021 emit->info.file_max[TGSI_FILE_CONSTANT] + 1; 1022 1023 reg = src_register( SVGA3DREG_CONST, idx ); 1024 return reg; 1025 } 1026 1027 1028 static boolean 1029 emit_fake_arl(struct svga_shader_emitter *emit, 1030 const struct tgsi_full_instruction *insn) 1031 { 1032 const struct src_register src0 = 1033 translate_src_register(emit, &insn->Src[0] ); 1034 struct src_register src1 = get_fake_arl_const( emit ); 1035 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1036 SVGA3dShaderDestToken tmp = get_temp( emit ); 1037 1038 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0)) 1039 return FALSE; 1040 1041 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ), 1042 src1)) 1043 return FALSE; 1044 1045 /* replicate the original swizzle */ 1046 src1 = src(tmp); 1047 src1.base.swizzle = src0.base.swizzle; 1048 1049 return submit_op1( emit, inst_token( SVGA3DOP_MOVA ), 1050 dst, src1 ); 1051 } 1052 1053 1054 static boolean 1055 emit_if(struct svga_shader_emitter *emit, 1056 const struct tgsi_full_instruction *insn) 1057 { 1058 struct src_register src0 = 1059 translate_src_register(emit, &insn->Src[0]); 1060 struct src_register zero = get_zero_immediate(emit); 1061 SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC ); 1062 1063 if_token.control = SVGA3DOPCOMPC_NE; 1064 1065 if (SVGA3dShaderGetRegType(src0.base.value) == SVGA3DREG_CONST) { 1066 /* 1067 * Max different constant registers readable per IFC instruction is 1. 1068 */ 1069 SVGA3dShaderDestToken tmp = get_temp( emit ); 1070 1071 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0)) 1072 return FALSE; 1073 1074 src0 = scalar(src( tmp ), TGSI_SWIZZLE_X); 1075 } 1076 1077 emit->dynamic_branching_level++; 1078 1079 return (emit_instruction( emit, if_token ) && 1080 emit_src( emit, src0 ) && 1081 emit_src( emit, zero ) ); 1082 } 1083 1084 1085 static boolean 1086 emit_else(struct svga_shader_emitter *emit, 1087 const struct tgsi_full_instruction *insn) 1088 { 1089 return emit_instruction(emit, inst_token(SVGA3DOP_ELSE)); 1090 } 1091 1092 1093 static boolean 1094 emit_endif(struct svga_shader_emitter *emit, 1095 const struct tgsi_full_instruction *insn) 1096 { 1097 emit->dynamic_branching_level--; 1098 1099 return emit_instruction(emit, inst_token(SVGA3DOP_ENDIF)); 1100 } 1101 1102 1103 /** 1104 * Translate the following TGSI FLR instruction. 1105 * FLR DST, SRC 1106 * To the following SVGA3D instruction sequence. 1107 * FRC TMP, SRC 1108 * SUB DST, SRC, TMP 1109 */ 1110 static boolean 1111 emit_floor(struct svga_shader_emitter *emit, 1112 const struct tgsi_full_instruction *insn ) 1113 { 1114 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1115 const struct src_register src0 = 1116 translate_src_register(emit, &insn->Src[0] ); 1117 SVGA3dShaderDestToken temp = get_temp( emit ); 1118 1119 /* FRC TMP, SRC */ 1120 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 )) 1121 return FALSE; 1122 1123 /* SUB DST, SRC, TMP */ 1124 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0, 1125 negate( src( temp ) ) )) 1126 return FALSE; 1127 1128 return TRUE; 1129 } 1130 1131 1132 /** 1133 * Translate the following TGSI CEIL instruction. 1134 * CEIL DST, SRC 1135 * To the following SVGA3D instruction sequence. 1136 * FRC TMP, -SRC 1137 * ADD DST, SRC, TMP 1138 */ 1139 static boolean 1140 emit_ceil(struct svga_shader_emitter *emit, 1141 const struct tgsi_full_instruction *insn) 1142 { 1143 SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0); 1144 const struct src_register src0 = 1145 translate_src_register(emit, &insn->Src[0]); 1146 SVGA3dShaderDestToken temp = get_temp(emit); 1147 1148 /* FRC TMP, -SRC */ 1149 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), temp, negate(src0))) 1150 return FALSE; 1151 1152 /* ADD DST, SRC, TMP */ 1153 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), dst, src0, src(temp))) 1154 return FALSE; 1155 1156 return TRUE; 1157 } 1158 1159 1160 /** 1161 * Translate the following TGSI DIV instruction. 1162 * DIV DST.xy, SRC0, SRC1 1163 * To the following SVGA3D instruction sequence. 1164 * RCP TMP.x, SRC1.xxxx 1165 * RCP TMP.y, SRC1.yyyy 1166 * MUL DST.xy, SRC0, TMP 1167 */ 1168 static boolean 1169 emit_div(struct svga_shader_emitter *emit, 1170 const struct tgsi_full_instruction *insn ) 1171 { 1172 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1173 const struct src_register src0 = 1174 translate_src_register(emit, &insn->Src[0] ); 1175 const struct src_register src1 = 1176 translate_src_register(emit, &insn->Src[1] ); 1177 SVGA3dShaderDestToken temp = get_temp( emit ); 1178 unsigned i; 1179 1180 /* For each enabled element, perform a RCP instruction. Note that 1181 * RCP is scalar in SVGA3D: 1182 */ 1183 for (i = 0; i < 4; i++) { 1184 unsigned channel = 1 << i; 1185 if (dst.mask & channel) { 1186 /* RCP TMP.?, SRC1.???? */ 1187 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ), 1188 writemask(temp, channel), 1189 scalar(src1, i) )) 1190 return FALSE; 1191 } 1192 } 1193 1194 /* Vector mul: 1195 * MUL DST, SRC0, TMP 1196 */ 1197 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0, 1198 src( temp ) )) 1199 return FALSE; 1200 1201 return TRUE; 1202 } 1203 1204 1205 /** 1206 * Translate the following TGSI DP2 instruction. 1207 * DP2 DST, SRC1, SRC2 1208 * To the following SVGA3D instruction sequence. 1209 * MUL TMP, SRC1, SRC2 1210 * ADD DST, TMP.xxxx, TMP.yyyy 1211 */ 1212 static boolean 1213 emit_dp2(struct svga_shader_emitter *emit, 1214 const struct tgsi_full_instruction *insn ) 1215 { 1216 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1217 const struct src_register src0 = 1218 translate_src_register(emit, &insn->Src[0]); 1219 const struct src_register src1 = 1220 translate_src_register(emit, &insn->Src[1]); 1221 SVGA3dShaderDestToken temp = get_temp( emit ); 1222 struct src_register temp_src0, temp_src1; 1223 1224 /* MUL TMP, SRC1, SRC2 */ 1225 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 )) 1226 return FALSE; 1227 1228 temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X); 1229 temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y); 1230 1231 /* ADD DST, TMP.xxxx, TMP.yyyy */ 1232 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, 1233 temp_src0, temp_src1 )) 1234 return FALSE; 1235 1236 return TRUE; 1237 } 1238 1239 1240 /** 1241 * Translate the following TGSI DPH instruction. 1242 * DPH DST, SRC1, SRC2 1243 * To the following SVGA3D instruction sequence. 1244 * DP3 TMP, SRC1, SRC2 1245 * ADD DST, TMP, SRC2.wwww 1246 */ 1247 static boolean 1248 emit_dph(struct svga_shader_emitter *emit, 1249 const struct tgsi_full_instruction *insn ) 1250 { 1251 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1252 const struct src_register src0 = translate_src_register( 1253 emit, &insn->Src[0] ); 1254 struct src_register src1 = 1255 translate_src_register(emit, &insn->Src[1]); 1256 SVGA3dShaderDestToken temp = get_temp( emit ); 1257 1258 /* DP3 TMP, SRC1, SRC2 */ 1259 if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 )) 1260 return FALSE; 1261 1262 src1 = scalar(src1, TGSI_SWIZZLE_W); 1263 1264 /* ADD DST, TMP, SRC2.wwww */ 1265 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, 1266 src( temp ), src1 )) 1267 return FALSE; 1268 1269 return TRUE; 1270 } 1271 1272 1273 /** 1274 * Sine / Cosine helper function. 1275 */ 1276 static boolean 1277 do_emit_sincos(struct svga_shader_emitter *emit, 1278 SVGA3dShaderDestToken dst, 1279 struct src_register src0) 1280 { 1281 src0 = scalar(src0, TGSI_SWIZZLE_X); 1282 return submit_op1(emit, inst_token(SVGA3DOP_SINCOS), dst, src0); 1283 } 1284 1285 1286 /** 1287 * Translate/emit a TGSI SIN, COS or CSC instruction. 1288 */ 1289 static boolean 1290 emit_sincos(struct svga_shader_emitter *emit, 1291 const struct tgsi_full_instruction *insn) 1292 { 1293 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1294 struct src_register src0 = translate_src_register(emit, &insn->Src[0]); 1295 SVGA3dShaderDestToken temp = get_temp( emit ); 1296 1297 /* SCS TMP SRC */ 1298 if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 )) 1299 return FALSE; 1300 1301 /* MOV DST TMP */ 1302 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) )) 1303 return FALSE; 1304 1305 return TRUE; 1306 } 1307 1308 1309 /** 1310 * Translate TGSI SIN instruction into: 1311 * SCS TMP SRC 1312 * MOV DST TMP.yyyy 1313 */ 1314 static boolean 1315 emit_sin(struct svga_shader_emitter *emit, 1316 const struct tgsi_full_instruction *insn ) 1317 { 1318 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1319 struct src_register src0 = 1320 translate_src_register(emit, &insn->Src[0] ); 1321 SVGA3dShaderDestToken temp = get_temp( emit ); 1322 1323 /* SCS TMP SRC */ 1324 if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0)) 1325 return FALSE; 1326 1327 src0 = scalar(src( temp ), TGSI_SWIZZLE_Y); 1328 1329 /* MOV DST TMP.yyyy */ 1330 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 )) 1331 return FALSE; 1332 1333 return TRUE; 1334 } 1335 1336 1337 /* 1338 * Translate TGSI COS instruction into: 1339 * SCS TMP SRC 1340 * MOV DST TMP.xxxx 1341 */ 1342 static boolean 1343 emit_cos(struct svga_shader_emitter *emit, 1344 const struct tgsi_full_instruction *insn) 1345 { 1346 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1347 struct src_register src0 = 1348 translate_src_register(emit, &insn->Src[0] ); 1349 SVGA3dShaderDestToken temp = get_temp( emit ); 1350 1351 /* SCS TMP SRC */ 1352 if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 )) 1353 return FALSE; 1354 1355 src0 = scalar(src( temp ), TGSI_SWIZZLE_X); 1356 1357 /* MOV DST TMP.xxxx */ 1358 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 )) 1359 return FALSE; 1360 1361 return TRUE; 1362 } 1363 1364 1365 /** 1366 * Translate/emit TGSI SSG (Set Sign: -1, 0, +1) instruction. 1367 */ 1368 static boolean 1369 emit_ssg(struct svga_shader_emitter *emit, 1370 const struct tgsi_full_instruction *insn) 1371 { 1372 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1373 struct src_register src0 = 1374 translate_src_register(emit, &insn->Src[0] ); 1375 SVGA3dShaderDestToken temp0 = get_temp( emit ); 1376 SVGA3dShaderDestToken temp1 = get_temp( emit ); 1377 struct src_register zero, one; 1378 1379 if (emit->unit == PIPE_SHADER_VERTEX) { 1380 /* SGN DST, SRC0, TMP0, TMP1 */ 1381 return submit_op3( emit, inst_token( SVGA3DOP_SGN ), dst, src0, 1382 src( temp0 ), src( temp1 ) ); 1383 } 1384 1385 one = get_one_immediate(emit); 1386 zero = get_zero_immediate(emit); 1387 1388 /* CMP TMP0, SRC0, one, zero */ 1389 if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ), 1390 writemask( temp0, dst.mask ), src0, one, zero )) 1391 return FALSE; 1392 1393 /* CMP TMP1, negate(SRC0), negate(one), zero */ 1394 if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ), 1395 writemask( temp1, dst.mask ), negate( src0 ), negate( one ), 1396 zero )) 1397 return FALSE; 1398 1399 /* ADD DST, TMP0, TMP1 */ 1400 return submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src( temp0 ), 1401 src( temp1 ) ); 1402 } 1403 1404 1405 /** 1406 * Translate/emit KILL_IF instruction (kill if any of X,Y,Z,W are negative). 1407 */ 1408 static boolean 1409 emit_kill_if(struct svga_shader_emitter *emit, 1410 const struct tgsi_full_instruction *insn) 1411 { 1412 const struct tgsi_full_src_register *reg = &insn->Src[0]; 1413 struct src_register src0, srcIn; 1414 const boolean special = (reg->Register.Absolute || 1415 reg->Register.Negate || 1416 reg->Register.Indirect || 1417 reg->Register.SwizzleX != 0 || 1418 reg->Register.SwizzleY != 1 || 1419 reg->Register.SwizzleZ != 2 || 1420 reg->Register.File != TGSI_FILE_TEMPORARY); 1421 SVGA3dShaderDestToken temp; 1422 1423 src0 = srcIn = translate_src_register( emit, reg ); 1424 1425 if (special) { 1426 /* need a temp reg */ 1427 temp = get_temp( emit ); 1428 } 1429 1430 if (special) { 1431 /* move the source into a temp register */ 1432 submit_op1(emit, inst_token(SVGA3DOP_MOV), temp, src0); 1433 1434 src0 = src( temp ); 1435 } 1436 1437 /* Do the texkill by checking if any of the XYZW components are < 0. 1438 * Note that ps_2_0 and later take XYZW in consideration, while ps_1_x 1439 * only used XYZ. The MSDN documentation about this is incorrect. 1440 */ 1441 if (!submit_op0( emit, inst_token( SVGA3DOP_TEXKILL ), dst(src0) )) 1442 return FALSE; 1443 1444 return TRUE; 1445 } 1446 1447 1448 /** 1449 * Translate/emit unconditional kill instruction (usually found inside 1450 * an IF/ELSE/ENDIF block). 1451 */ 1452 static boolean 1453 emit_kill(struct svga_shader_emitter *emit, 1454 const struct tgsi_full_instruction *insn) 1455 { 1456 SVGA3dShaderDestToken temp; 1457 struct src_register one = get_one_immediate(emit); 1458 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_TEXKILL ); 1459 1460 /* texkill doesn't allow negation on the operand so lets move 1461 * negation of {1} to a temp register */ 1462 temp = get_temp( emit ); 1463 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, 1464 negate( one ) )) 1465 return FALSE; 1466 1467 return submit_op0( emit, inst, temp ); 1468 } 1469 1470 1471 /** 1472 * Test if r1 and r2 are the same register. 1473 */ 1474 static boolean 1475 same_register(struct src_register r1, struct src_register r2) 1476 { 1477 return (r1.base.num == r2.base.num && 1478 r1.base.type_upper == r2.base.type_upper && 1479 r1.base.type_lower == r2.base.type_lower); 1480 } 1481 1482 1483 1484 /** 1485 * Implement conditionals by initializing destination reg to 'fail', 1486 * then set predicate reg with UFOP_SETP, then move 'pass' to dest 1487 * based on predicate reg. 1488 * 1489 * SETP src0, cmp, src1 -- do this first to avoid aliasing problems. 1490 * MOV dst, fail 1491 * MOV dst, pass, p0 1492 */ 1493 static boolean 1494 emit_conditional(struct svga_shader_emitter *emit, 1495 unsigned compare_func, 1496 SVGA3dShaderDestToken dst, 1497 struct src_register src0, 1498 struct src_register src1, 1499 struct src_register pass, 1500 struct src_register fail) 1501 { 1502 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 ); 1503 SVGA3dShaderInstToken setp_token; 1504 1505 switch (compare_func) { 1506 case PIPE_FUNC_NEVER: 1507 return submit_op1( emit, inst_token( SVGA3DOP_MOV ), 1508 dst, fail ); 1509 break; 1510 case PIPE_FUNC_LESS: 1511 setp_token = inst_token_setp(SVGA3DOPCOMP_LT); 1512 break; 1513 case PIPE_FUNC_EQUAL: 1514 setp_token = inst_token_setp(SVGA3DOPCOMP_EQ); 1515 break; 1516 case PIPE_FUNC_LEQUAL: 1517 setp_token = inst_token_setp(SVGA3DOPCOMP_LE); 1518 break; 1519 case PIPE_FUNC_GREATER: 1520 setp_token = inst_token_setp(SVGA3DOPCOMP_GT); 1521 break; 1522 case PIPE_FUNC_NOTEQUAL: 1523 setp_token = inst_token_setp(SVGA3DOPCOMPC_NE); 1524 break; 1525 case PIPE_FUNC_GEQUAL: 1526 setp_token = inst_token_setp(SVGA3DOPCOMP_GE); 1527 break; 1528 case PIPE_FUNC_ALWAYS: 1529 return submit_op1( emit, inst_token( SVGA3DOP_MOV ), 1530 dst, pass ); 1531 break; 1532 } 1533 1534 if (same_register(src(dst), pass)) { 1535 /* We'll get bad results if the dst and pass registers are the same 1536 * so use a temp register containing pass. 1537 */ 1538 SVGA3dShaderDestToken temp = get_temp(emit); 1539 if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), temp, pass)) 1540 return FALSE; 1541 pass = src(temp); 1542 } 1543 1544 /* SETP src0, COMPOP, src1 */ 1545 if (!submit_op2( emit, setp_token, pred_reg, 1546 src0, src1 )) 1547 return FALSE; 1548 1549 /* MOV dst, fail */ 1550 if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), dst, fail)) 1551 return FALSE; 1552 1553 /* MOV dst, pass (predicated) 1554 * 1555 * Note that the predicate reg (and possible modifiers) is passed 1556 * as the first source argument. 1557 */ 1558 if (!submit_op2(emit, 1559 inst_token_predicated(SVGA3DOP_MOV), dst, 1560 src(pred_reg), pass)) 1561 return FALSE; 1562 1563 return TRUE; 1564 } 1565 1566 1567 /** 1568 * Helper for emiting 'selection' commands. Basically: 1569 * if (src0 OP src1) 1570 * dst = 1.0; 1571 * else 1572 * dst = 0.0; 1573 */ 1574 static boolean 1575 emit_select(struct svga_shader_emitter *emit, 1576 unsigned compare_func, 1577 SVGA3dShaderDestToken dst, 1578 struct src_register src0, 1579 struct src_register src1 ) 1580 { 1581 /* There are some SVGA instructions which implement some selects 1582 * directly, but they are only available in the vertex shader. 1583 */ 1584 if (emit->unit == PIPE_SHADER_VERTEX) { 1585 switch (compare_func) { 1586 case PIPE_FUNC_GEQUAL: 1587 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 ); 1588 case PIPE_FUNC_LEQUAL: 1589 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 ); 1590 case PIPE_FUNC_GREATER: 1591 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 ); 1592 case PIPE_FUNC_LESS: 1593 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 ); 1594 default: 1595 break; 1596 } 1597 } 1598 1599 /* Otherwise, need to use the setp approach: 1600 */ 1601 { 1602 struct src_register one, zero; 1603 /* zero immediate is 0,0,0,1 */ 1604 zero = get_zero_immediate(emit); 1605 one = get_one_immediate(emit); 1606 1607 return emit_conditional(emit, compare_func, dst, src0, src1, one, zero); 1608 } 1609 } 1610 1611 1612 /** 1613 * Translate/emit a TGSI SEQ, SNE, SLT, SGE, etc. instruction. 1614 */ 1615 static boolean 1616 emit_select_op(struct svga_shader_emitter *emit, 1617 unsigned compare, 1618 const struct tgsi_full_instruction *insn) 1619 { 1620 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1621 struct src_register src0 = translate_src_register( 1622 emit, &insn->Src[0] ); 1623 struct src_register src1 = translate_src_register( 1624 emit, &insn->Src[1] ); 1625 1626 return emit_select( emit, compare, dst, src0, src1 ); 1627 } 1628 1629 1630 /** 1631 * Translate TGSI CMP instruction. Component-wise: 1632 * dst = (src0 < 0.0) ? src1 : src2 1633 */ 1634 static boolean 1635 emit_cmp(struct svga_shader_emitter *emit, 1636 const struct tgsi_full_instruction *insn) 1637 { 1638 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1639 const struct src_register src0 = 1640 translate_src_register(emit, &insn->Src[0] ); 1641 const struct src_register src1 = 1642 translate_src_register(emit, &insn->Src[1] ); 1643 const struct src_register src2 = 1644 translate_src_register(emit, &insn->Src[2] ); 1645 1646 if (emit->unit == PIPE_SHADER_VERTEX) { 1647 struct src_register zero = get_zero_immediate(emit); 1648 /* We used to simulate CMP with SLT+LRP. But that didn't work when 1649 * src1 or src2 was Inf/NaN. In particular, GLSL sqrt(0) failed 1650 * because it involves a CMP to handle the 0 case. 1651 * Use a conditional expression instead. 1652 */ 1653 return emit_conditional(emit, PIPE_FUNC_LESS, dst, 1654 src0, zero, src1, src2); 1655 } 1656 else { 1657 assert(emit->unit == PIPE_SHADER_FRAGMENT); 1658 1659 /* CMP DST, SRC0, SRC2, SRC1 */ 1660 return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, 1661 src0, src2, src1); 1662 } 1663 } 1664 1665 1666 /** 1667 * Translate/emit 2-operand (coord, sampler) texture instructions. 1668 */ 1669 static boolean 1670 emit_tex2(struct svga_shader_emitter *emit, 1671 const struct tgsi_full_instruction *insn, 1672 SVGA3dShaderDestToken dst) 1673 { 1674 SVGA3dShaderInstToken inst; 1675 struct src_register texcoord; 1676 struct src_register sampler; 1677 SVGA3dShaderDestToken tmp; 1678 1679 inst.value = 0; 1680 1681 switch (insn->Instruction.Opcode) { 1682 case TGSI_OPCODE_TEX: 1683 inst.op = SVGA3DOP_TEX; 1684 break; 1685 case TGSI_OPCODE_TXP: 1686 inst.op = SVGA3DOP_TEX; 1687 inst.control = SVGA3DOPCONT_PROJECT; 1688 break; 1689 case TGSI_OPCODE_TXB: 1690 inst.op = SVGA3DOP_TEX; 1691 inst.control = SVGA3DOPCONT_BIAS; 1692 break; 1693 case TGSI_OPCODE_TXL: 1694 inst.op = SVGA3DOP_TEXLDL; 1695 break; 1696 default: 1697 assert(0); 1698 return FALSE; 1699 } 1700 1701 texcoord = translate_src_register( emit, &insn->Src[0] ); 1702 sampler = translate_src_register( emit, &insn->Src[1] ); 1703 1704 if (emit->key.tex[sampler.base.num].unnormalized || 1705 emit->dynamic_branching_level > 0) 1706 tmp = get_temp( emit ); 1707 1708 /* Can't do mipmapping inside dynamic branch constructs. Force LOD 1709 * zero in that case. 1710 */ 1711 if (emit->dynamic_branching_level > 0 && 1712 inst.op == SVGA3DOP_TEX && 1713 SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) { 1714 struct src_register zero = get_zero_immediate(emit); 1715 1716 /* MOV tmp, texcoord */ 1717 if (!submit_op1( emit, 1718 inst_token( SVGA3DOP_MOV ), 1719 tmp, 1720 texcoord )) 1721 return FALSE; 1722 1723 /* MOV tmp.w, zero */ 1724 if (!submit_op1( emit, 1725 inst_token( SVGA3DOP_MOV ), 1726 writemask( tmp, TGSI_WRITEMASK_W ), 1727 zero )) 1728 return FALSE; 1729 1730 texcoord = src( tmp ); 1731 inst.op = SVGA3DOP_TEXLDL; 1732 } 1733 1734 /* Explicit normalization of texcoords: 1735 */ 1736 if (emit->key.tex[sampler.base.num].unnormalized) { 1737 struct src_register wh = get_tex_dimensions( emit, sampler.base.num ); 1738 1739 /* MUL tmp, SRC0, WH */ 1740 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), 1741 tmp, texcoord, wh )) 1742 return FALSE; 1743 1744 texcoord = src( tmp ); 1745 } 1746 1747 return submit_op2( emit, inst, dst, texcoord, sampler ); 1748 } 1749 1750 1751 /** 1752 * Translate/emit 4-operand (coord, ddx, ddy, sampler) texture instructions. 1753 */ 1754 static boolean 1755 emit_tex4(struct svga_shader_emitter *emit, 1756 const struct tgsi_full_instruction *insn, 1757 SVGA3dShaderDestToken dst ) 1758 { 1759 SVGA3dShaderInstToken inst; 1760 struct src_register texcoord; 1761 struct src_register ddx; 1762 struct src_register ddy; 1763 struct src_register sampler; 1764 1765 texcoord = translate_src_register( emit, &insn->Src[0] ); 1766 ddx = translate_src_register( emit, &insn->Src[1] ); 1767 ddy = translate_src_register( emit, &insn->Src[2] ); 1768 sampler = translate_src_register( emit, &insn->Src[3] ); 1769 1770 inst.value = 0; 1771 1772 switch (insn->Instruction.Opcode) { 1773 case TGSI_OPCODE_TXD: 1774 inst.op = SVGA3DOP_TEXLDD; /* 4 args! */ 1775 break; 1776 default: 1777 assert(0); 1778 return FALSE; 1779 } 1780 1781 return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy ); 1782 } 1783 1784 1785 /** 1786 * Emit texture swizzle code. We do this here since SVGA samplers don't 1787 * directly support swizzles. 1788 */ 1789 static boolean 1790 emit_tex_swizzle(struct svga_shader_emitter *emit, 1791 SVGA3dShaderDestToken dst, 1792 struct src_register src, 1793 unsigned swizzle_x, 1794 unsigned swizzle_y, 1795 unsigned swizzle_z, 1796 unsigned swizzle_w) 1797 { 1798 const unsigned swizzleIn[4] = {swizzle_x, swizzle_y, swizzle_z, swizzle_w}; 1799 unsigned srcSwizzle[4]; 1800 unsigned srcWritemask = 0x0, zeroWritemask = 0x0, oneWritemask = 0x0; 1801 unsigned i; 1802 1803 /* build writemasks and srcSwizzle terms */ 1804 for (i = 0; i < 4; i++) { 1805 if (swizzleIn[i] == PIPE_SWIZZLE_0) { 1806 srcSwizzle[i] = TGSI_SWIZZLE_X + i; 1807 zeroWritemask |= (1 << i); 1808 } 1809 else if (swizzleIn[i] == PIPE_SWIZZLE_1) { 1810 srcSwizzle[i] = TGSI_SWIZZLE_X + i; 1811 oneWritemask |= (1 << i); 1812 } 1813 else { 1814 srcSwizzle[i] = swizzleIn[i]; 1815 srcWritemask |= (1 << i); 1816 } 1817 } 1818 1819 /* write x/y/z/w comps */ 1820 if (dst.mask & srcWritemask) { 1821 if (!submit_op1(emit, 1822 inst_token(SVGA3DOP_MOV), 1823 writemask(dst, srcWritemask), 1824 swizzle(src, 1825 srcSwizzle[0], 1826 srcSwizzle[1], 1827 srcSwizzle[2], 1828 srcSwizzle[3]))) 1829 return FALSE; 1830 } 1831 1832 /* write 0 comps */ 1833 if (dst.mask & zeroWritemask) { 1834 if (!submit_op1(emit, 1835 inst_token(SVGA3DOP_MOV), 1836 writemask(dst, zeroWritemask), 1837 get_zero_immediate(emit))) 1838 return FALSE; 1839 } 1840 1841 /* write 1 comps */ 1842 if (dst.mask & oneWritemask) { 1843 if (!submit_op1(emit, 1844 inst_token(SVGA3DOP_MOV), 1845 writemask(dst, oneWritemask), 1846 get_one_immediate(emit))) 1847 return FALSE; 1848 } 1849 1850 return TRUE; 1851 } 1852 1853 1854 /** 1855 * Translate/emit a TGSI texture sample instruction. 1856 */ 1857 static boolean 1858 emit_tex(struct svga_shader_emitter *emit, 1859 const struct tgsi_full_instruction *insn) 1860 { 1861 SVGA3dShaderDestToken dst = 1862 translate_dst_register( emit, insn, 0 ); 1863 struct src_register src0 = 1864 translate_src_register( emit, &insn->Src[0] ); 1865 struct src_register src1 = 1866 translate_src_register( emit, &insn->Src[1] ); 1867 1868 SVGA3dShaderDestToken tex_result; 1869 const unsigned unit = src1.base.num; 1870 1871 /* check for shadow samplers */ 1872 boolean compare = (emit->key.tex[unit].compare_mode == 1873 PIPE_TEX_COMPARE_R_TO_TEXTURE); 1874 1875 /* texture swizzle */ 1876 boolean swizzle = (emit->key.tex[unit].swizzle_r != PIPE_SWIZZLE_X || 1877 emit->key.tex[unit].swizzle_g != PIPE_SWIZZLE_Y || 1878 emit->key.tex[unit].swizzle_b != PIPE_SWIZZLE_Z || 1879 emit->key.tex[unit].swizzle_a != PIPE_SWIZZLE_W); 1880 1881 boolean saturate = insn->Instruction.Saturate; 1882 1883 /* If doing compare processing or tex swizzle or saturation, we need to put 1884 * the fetched color into a temporary so it can be used as a source later on. 1885 */ 1886 if (compare || swizzle || saturate) { 1887 tex_result = get_temp( emit ); 1888 } 1889 else { 1890 tex_result = dst; 1891 } 1892 1893 switch(insn->Instruction.Opcode) { 1894 case TGSI_OPCODE_TEX: 1895 case TGSI_OPCODE_TXB: 1896 case TGSI_OPCODE_TXP: 1897 case TGSI_OPCODE_TXL: 1898 if (!emit_tex2( emit, insn, tex_result )) 1899 return FALSE; 1900 break; 1901 case TGSI_OPCODE_TXD: 1902 if (!emit_tex4( emit, insn, tex_result )) 1903 return FALSE; 1904 break; 1905 default: 1906 assert(0); 1907 } 1908 1909 if (compare) { 1910 SVGA3dShaderDestToken dst2; 1911 1912 if (swizzle || saturate) 1913 dst2 = tex_result; 1914 else 1915 dst2 = dst; 1916 1917 if (dst.mask & TGSI_WRITEMASK_XYZ) { 1918 SVGA3dShaderDestToken src0_zdivw = get_temp( emit ); 1919 /* When sampling a depth texture, the result of the comparison is in 1920 * the Y component. 1921 */ 1922 struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y); 1923 struct src_register r_coord; 1924 1925 if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) { 1926 /* Divide texcoord R by Q */ 1927 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ), 1928 writemask(src0_zdivw, TGSI_WRITEMASK_X), 1929 scalar(src0, TGSI_SWIZZLE_W) )) 1930 return FALSE; 1931 1932 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), 1933 writemask(src0_zdivw, TGSI_WRITEMASK_X), 1934 scalar(src0, TGSI_SWIZZLE_Z), 1935 scalar(src(src0_zdivw), TGSI_SWIZZLE_X) )) 1936 return FALSE; 1937 1938 r_coord = scalar(src(src0_zdivw), TGSI_SWIZZLE_X); 1939 } 1940 else { 1941 r_coord = scalar(src0, TGSI_SWIZZLE_Z); 1942 } 1943 1944 /* Compare texture sample value against R component of texcoord */ 1945 if (!emit_select(emit, 1946 emit->key.tex[unit].compare_func, 1947 writemask( dst2, TGSI_WRITEMASK_XYZ ), 1948 r_coord, 1949 tex_src_x)) 1950 return FALSE; 1951 } 1952 1953 if (dst.mask & TGSI_WRITEMASK_W) { 1954 struct src_register one = get_one_immediate(emit); 1955 1956 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 1957 writemask( dst2, TGSI_WRITEMASK_W ), 1958 one )) 1959 return FALSE; 1960 } 1961 } 1962 1963 if (saturate && !swizzle) { 1964 /* MOV_SAT real_dst, dst */ 1965 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) )) 1966 return FALSE; 1967 } 1968 else if (swizzle) { 1969 /* swizzle from tex_result to dst (handles saturation too, if any) */ 1970 emit_tex_swizzle(emit, 1971 dst, src(tex_result), 1972 emit->key.tex[unit].swizzle_r, 1973 emit->key.tex[unit].swizzle_g, 1974 emit->key.tex[unit].swizzle_b, 1975 emit->key.tex[unit].swizzle_a); 1976 } 1977 1978 return TRUE; 1979 } 1980 1981 1982 static boolean 1983 emit_bgnloop(struct svga_shader_emitter *emit, 1984 const struct tgsi_full_instruction *insn) 1985 { 1986 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP ); 1987 struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 ); 1988 struct src_register const_int = get_loop_const( emit ); 1989 1990 emit->dynamic_branching_level++; 1991 1992 return (emit_instruction( emit, inst ) && 1993 emit_src( emit, loop_reg ) && 1994 emit_src( emit, const_int ) ); 1995 } 1996 1997 1998 static boolean 1999 emit_endloop(struct svga_shader_emitter *emit, 2000 const struct tgsi_full_instruction *insn) 2001 { 2002 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP ); 2003 2004 emit->dynamic_branching_level--; 2005 2006 return emit_instruction( emit, inst ); 2007 } 2008 2009 2010 /** 2011 * Translate/emit TGSI BREAK (out of loop) instruction. 2012 */ 2013 static boolean 2014 emit_brk(struct svga_shader_emitter *emit, 2015 const struct tgsi_full_instruction *insn) 2016 { 2017 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK ); 2018 return emit_instruction( emit, inst ); 2019 } 2020 2021 2022 /** 2023 * Emit simple instruction which operates on one scalar value (not 2024 * a vector). Ex: LG2, RCP, RSQ. 2025 */ 2026 static boolean 2027 emit_scalar_op1(struct svga_shader_emitter *emit, 2028 unsigned opcode, 2029 const struct tgsi_full_instruction *insn) 2030 { 2031 SVGA3dShaderInstToken inst; 2032 SVGA3dShaderDestToken dst; 2033 struct src_register src; 2034 2035 inst = inst_token( opcode ); 2036 dst = translate_dst_register( emit, insn, 0 ); 2037 src = translate_src_register( emit, &insn->Src[0] ); 2038 src = scalar( src, TGSI_SWIZZLE_X ); 2039 2040 return submit_op1( emit, inst, dst, src ); 2041 } 2042 2043 2044 /** 2045 * Translate/emit a simple instruction (one which has no special-case 2046 * code) such as ADD, MUL, MIN, MAX. 2047 */ 2048 static boolean 2049 emit_simple_instruction(struct svga_shader_emitter *emit, 2050 unsigned opcode, 2051 const struct tgsi_full_instruction *insn) 2052 { 2053 const struct tgsi_full_src_register *src = insn->Src; 2054 SVGA3dShaderInstToken inst; 2055 SVGA3dShaderDestToken dst; 2056 2057 inst = inst_token( opcode ); 2058 dst = translate_dst_register( emit, insn, 0 ); 2059 2060 switch (insn->Instruction.NumSrcRegs) { 2061 case 0: 2062 return submit_op0( emit, inst, dst ); 2063 case 1: 2064 return submit_op1( emit, inst, dst, 2065 translate_src_register( emit, &src[0] )); 2066 case 2: 2067 return submit_op2( emit, inst, dst, 2068 translate_src_register( emit, &src[0] ), 2069 translate_src_register( emit, &src[1] ) ); 2070 case 3: 2071 return submit_op3( emit, inst, dst, 2072 translate_src_register( emit, &src[0] ), 2073 translate_src_register( emit, &src[1] ), 2074 translate_src_register( emit, &src[2] ) ); 2075 default: 2076 assert(0); 2077 return FALSE; 2078 } 2079 } 2080 2081 2082 /** 2083 * TGSI_OPCODE_MOVE is only special-cased here to detect the 2084 * svga_fragment_shader::constant_color_output case. 2085 */ 2086 static boolean 2087 emit_mov(struct svga_shader_emitter *emit, 2088 const struct tgsi_full_instruction *insn) 2089 { 2090 const struct tgsi_full_src_register *src = &insn->Src[0]; 2091 const struct tgsi_full_dst_register *dst = &insn->Dst[0]; 2092 2093 if (emit->unit == PIPE_SHADER_FRAGMENT && 2094 dst->Register.File == TGSI_FILE_OUTPUT && 2095 dst->Register.Index == 0 && 2096 src->Register.File == TGSI_FILE_CONSTANT && 2097 !src->Register.Indirect) { 2098 emit->constant_color_output = TRUE; 2099 } 2100 2101 return emit_simple_instruction(emit, SVGA3DOP_MOV, insn); 2102 } 2103 2104 2105 /** 2106 * Translate/emit TGSI DDX, DDY instructions. 2107 */ 2108 static boolean 2109 emit_deriv(struct svga_shader_emitter *emit, 2110 const struct tgsi_full_instruction *insn ) 2111 { 2112 if (emit->dynamic_branching_level > 0 && 2113 insn->Src[0].Register.File == TGSI_FILE_TEMPORARY) 2114 { 2115 SVGA3dShaderDestToken dst = 2116 translate_dst_register( emit, insn, 0 ); 2117 2118 /* Deriv opcodes not valid inside dynamic branching, workaround 2119 * by zeroing out the destination. 2120 */ 2121 if (!submit_op1(emit, 2122 inst_token( SVGA3DOP_MOV ), 2123 dst, 2124 get_zero_immediate(emit))) 2125 return FALSE; 2126 2127 return TRUE; 2128 } 2129 else { 2130 unsigned opcode; 2131 const struct tgsi_full_src_register *reg = &insn->Src[0]; 2132 SVGA3dShaderInstToken inst; 2133 SVGA3dShaderDestToken dst; 2134 struct src_register src0; 2135 2136 switch (insn->Instruction.Opcode) { 2137 case TGSI_OPCODE_DDX: 2138 opcode = SVGA3DOP_DSX; 2139 break; 2140 case TGSI_OPCODE_DDY: 2141 opcode = SVGA3DOP_DSY; 2142 break; 2143 default: 2144 return FALSE; 2145 } 2146 2147 inst = inst_token( opcode ); 2148 dst = translate_dst_register( emit, insn, 0 ); 2149 src0 = translate_src_register( emit, reg ); 2150 2151 /* We cannot use negate or abs on source to dsx/dsy instruction. 2152 */ 2153 if (reg->Register.Absolute || 2154 reg->Register.Negate) { 2155 SVGA3dShaderDestToken temp = get_temp( emit ); 2156 2157 if (!emit_repl( emit, temp, &src0 )) 2158 return FALSE; 2159 } 2160 2161 return submit_op1( emit, inst, dst, src0 ); 2162 } 2163 } 2164 2165 2166 /** 2167 * Translate/emit ARL (Address Register Load) instruction. Used to 2168 * move a value into the special 'address' register. Used to implement 2169 * indirect/variable indexing into arrays. 2170 */ 2171 static boolean 2172 emit_arl(struct svga_shader_emitter *emit, 2173 const struct tgsi_full_instruction *insn) 2174 { 2175 ++emit->current_arl; 2176 if (emit->unit == PIPE_SHADER_FRAGMENT) { 2177 /* MOVA not present in pixel shader instruction set. 2178 * Ignore this instruction altogether since it is 2179 * only used for loop counters -- and for that 2180 * we reference aL directly. 2181 */ 2182 return TRUE; 2183 } 2184 if (svga_arl_needs_adjustment( emit )) { 2185 return emit_fake_arl( emit, insn ); 2186 } else { 2187 /* no need to adjust, just emit straight arl */ 2188 return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn); 2189 } 2190 } 2191 2192 2193 static boolean 2194 emit_pow(struct svga_shader_emitter *emit, 2195 const struct tgsi_full_instruction *insn) 2196 { 2197 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2198 struct src_register src0 = translate_src_register( 2199 emit, &insn->Src[0] ); 2200 struct src_register src1 = translate_src_register( 2201 emit, &insn->Src[1] ); 2202 boolean need_tmp = FALSE; 2203 2204 /* POW can only output to a temporary */ 2205 if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY) 2206 need_tmp = TRUE; 2207 2208 /* POW src1 must not be the same register as dst */ 2209 if (alias_src_dst( src1, dst )) 2210 need_tmp = TRUE; 2211 2212 /* it's a scalar op */ 2213 src0 = scalar( src0, TGSI_SWIZZLE_X ); 2214 src1 = scalar( src1, TGSI_SWIZZLE_X ); 2215 2216 if (need_tmp) { 2217 SVGA3dShaderDestToken tmp = 2218 writemask(get_temp( emit ), TGSI_WRITEMASK_X ); 2219 2220 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1)) 2221 return FALSE; 2222 2223 return submit_op1(emit, inst_token( SVGA3DOP_MOV ), 2224 dst, scalar(src(tmp), 0) ); 2225 } 2226 else { 2227 return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1); 2228 } 2229 } 2230 2231 2232 /** 2233 * Translate/emit TGSI XPD (vector cross product) instruction. 2234 */ 2235 static boolean 2236 emit_xpd(struct svga_shader_emitter *emit, 2237 const struct tgsi_full_instruction *insn) 2238 { 2239 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2240 const struct src_register src0 = translate_src_register( 2241 emit, &insn->Src[0] ); 2242 const struct src_register src1 = translate_src_register( 2243 emit, &insn->Src[1] ); 2244 boolean need_dst_tmp = FALSE; 2245 2246 /* XPD can only output to a temporary */ 2247 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP) 2248 need_dst_tmp = TRUE; 2249 2250 /* The dst reg must not be the same as src0 or src1*/ 2251 if (alias_src_dst(src0, dst) || 2252 alias_src_dst(src1, dst)) 2253 need_dst_tmp = TRUE; 2254 2255 if (need_dst_tmp) { 2256 SVGA3dShaderDestToken tmp = get_temp( emit ); 2257 2258 /* Obey DX9 restrictions on mask: 2259 */ 2260 tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ; 2261 2262 if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1)) 2263 return FALSE; 2264 2265 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp ))) 2266 return FALSE; 2267 } 2268 else { 2269 if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1)) 2270 return FALSE; 2271 } 2272 2273 /* Need to emit 1.0 to dst.w? 2274 */ 2275 if (dst.mask & TGSI_WRITEMASK_W) { 2276 struct src_register one = get_one_immediate( emit ); 2277 2278 if (!submit_op1(emit, 2279 inst_token( SVGA3DOP_MOV ), 2280 writemask(dst, TGSI_WRITEMASK_W), 2281 one)) 2282 return FALSE; 2283 } 2284 2285 return TRUE; 2286 } 2287 2288 2289 /** 2290 * Emit a LRP (linear interpolation) instruction. 2291 */ 2292 static boolean 2293 submit_lrp(struct svga_shader_emitter *emit, 2294 SVGA3dShaderDestToken dst, 2295 struct src_register src0, 2296 struct src_register src1, 2297 struct src_register src2) 2298 { 2299 SVGA3dShaderDestToken tmp; 2300 boolean need_dst_tmp = FALSE; 2301 2302 /* The dst reg must be a temporary, and not be the same as src0 or src2 */ 2303 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP || 2304 alias_src_dst(src0, dst) || 2305 alias_src_dst(src2, dst)) 2306 need_dst_tmp = TRUE; 2307 2308 if (need_dst_tmp) { 2309 tmp = get_temp( emit ); 2310 tmp.mask = dst.mask; 2311 } 2312 else { 2313 tmp = dst; 2314 } 2315 2316 if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2)) 2317 return FALSE; 2318 2319 if (need_dst_tmp) { 2320 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp ))) 2321 return FALSE; 2322 } 2323 2324 return TRUE; 2325 } 2326 2327 2328 /** 2329 * Translate/emit LRP (Linear Interpolation) instruction. 2330 */ 2331 static boolean 2332 emit_lrp(struct svga_shader_emitter *emit, 2333 const struct tgsi_full_instruction *insn) 2334 { 2335 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2336 const struct src_register src0 = translate_src_register( 2337 emit, &insn->Src[0] ); 2338 const struct src_register src1 = translate_src_register( 2339 emit, &insn->Src[1] ); 2340 const struct src_register src2 = translate_src_register( 2341 emit, &insn->Src[2] ); 2342 2343 return submit_lrp(emit, dst, src0, src1, src2); 2344 } 2345 2346 /** 2347 * Translate/emit DST (Distance function) instruction. 2348 */ 2349 static boolean 2350 emit_dst_insn(struct svga_shader_emitter *emit, 2351 const struct tgsi_full_instruction *insn) 2352 { 2353 if (emit->unit == PIPE_SHADER_VERTEX) { 2354 /* SVGA/DX9 has a DST instruction, but only for vertex shaders: 2355 */ 2356 return emit_simple_instruction(emit, SVGA3DOP_DST, insn); 2357 } 2358 else { 2359 /* result[0] = 1 * 1; 2360 * result[1] = a[1] * b[1]; 2361 * result[2] = a[2] * 1; 2362 * result[3] = 1 * b[3]; 2363 */ 2364 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2365 SVGA3dShaderDestToken tmp; 2366 const struct src_register src0 = translate_src_register( 2367 emit, &insn->Src[0] ); 2368 const struct src_register src1 = translate_src_register( 2369 emit, &insn->Src[1] ); 2370 boolean need_tmp = FALSE; 2371 2372 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP || 2373 alias_src_dst(src0, dst) || 2374 alias_src_dst(src1, dst)) 2375 need_tmp = TRUE; 2376 2377 if (need_tmp) { 2378 tmp = get_temp( emit ); 2379 } 2380 else { 2381 tmp = dst; 2382 } 2383 2384 /* tmp.xw = 1.0 2385 */ 2386 if (tmp.mask & TGSI_WRITEMASK_XW) { 2387 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2388 writemask(tmp, TGSI_WRITEMASK_XW ), 2389 get_one_immediate(emit))) 2390 return FALSE; 2391 } 2392 2393 /* tmp.yz = src0 2394 */ 2395 if (tmp.mask & TGSI_WRITEMASK_YZ) { 2396 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2397 writemask(tmp, TGSI_WRITEMASK_YZ ), 2398 src0)) 2399 return FALSE; 2400 } 2401 2402 /* tmp.yw = tmp * src1 2403 */ 2404 if (tmp.mask & TGSI_WRITEMASK_YW) { 2405 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), 2406 writemask(tmp, TGSI_WRITEMASK_YW ), 2407 src(tmp), 2408 src1)) 2409 return FALSE; 2410 } 2411 2412 /* dst = tmp 2413 */ 2414 if (need_tmp) { 2415 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2416 dst, 2417 src(tmp))) 2418 return FALSE; 2419 } 2420 } 2421 2422 return TRUE; 2423 } 2424 2425 2426 static boolean 2427 emit_exp(struct svga_shader_emitter *emit, 2428 const struct tgsi_full_instruction *insn) 2429 { 2430 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2431 struct src_register src0 = 2432 translate_src_register( emit, &insn->Src[0] ); 2433 SVGA3dShaderDestToken fraction; 2434 2435 if (dst.mask & TGSI_WRITEMASK_Y) 2436 fraction = dst; 2437 else if (dst.mask & TGSI_WRITEMASK_X) 2438 fraction = get_temp( emit ); 2439 else 2440 fraction.value = 0; 2441 2442 /* If y is being written, fill it with src0 - floor(src0). 2443 */ 2444 if (dst.mask & TGSI_WRITEMASK_XY) { 2445 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), 2446 writemask( fraction, TGSI_WRITEMASK_Y ), 2447 src0 )) 2448 return FALSE; 2449 } 2450 2451 /* If x is being written, fill it with 2 ^ floor(src0). 2452 */ 2453 if (dst.mask & TGSI_WRITEMASK_X) { 2454 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), 2455 writemask( dst, TGSI_WRITEMASK_X ), 2456 src0, 2457 scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) ) 2458 return FALSE; 2459 2460 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ), 2461 writemask( dst, TGSI_WRITEMASK_X ), 2462 scalar( src( dst ), TGSI_SWIZZLE_X ) ) ) 2463 return FALSE; 2464 2465 if (!(dst.mask & TGSI_WRITEMASK_Y)) 2466 release_temp( emit, fraction ); 2467 } 2468 2469 /* If z is being written, fill it with 2 ^ src0 (partial precision). 2470 */ 2471 if (dst.mask & TGSI_WRITEMASK_Z) { 2472 if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ), 2473 writemask( dst, TGSI_WRITEMASK_Z ), 2474 src0 ) ) 2475 return FALSE; 2476 } 2477 2478 /* If w is being written, fill it with one. 2479 */ 2480 if (dst.mask & TGSI_WRITEMASK_W) { 2481 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2482 writemask(dst, TGSI_WRITEMASK_W), 2483 get_one_immediate(emit))) 2484 return FALSE; 2485 } 2486 2487 return TRUE; 2488 } 2489 2490 2491 /** 2492 * Translate/emit LIT (Lighting helper) instruction. 2493 */ 2494 static boolean 2495 emit_lit(struct svga_shader_emitter *emit, 2496 const struct tgsi_full_instruction *insn) 2497 { 2498 if (emit->unit == PIPE_SHADER_VERTEX) { 2499 /* SVGA/DX9 has a LIT instruction, but only for vertex shaders: 2500 */ 2501 return emit_simple_instruction(emit, SVGA3DOP_LIT, insn); 2502 } 2503 else { 2504 /* D3D vs. GL semantics can be fairly easily accomodated by 2505 * variations on this sequence. 2506 * 2507 * GL: 2508 * tmp.y = src.x 2509 * tmp.z = pow(src.y,src.w) 2510 * p0 = src0.xxxx > 0 2511 * result = zero.wxxw 2512 * (p0) result.yz = tmp 2513 * 2514 * D3D: 2515 * tmp.y = src.x 2516 * tmp.z = pow(src.y,src.w) 2517 * p0 = src0.xxyy > 0 2518 * result = zero.wxxw 2519 * (p0) result.yz = tmp 2520 * 2521 * Will implement the GL version for now. 2522 */ 2523 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2524 SVGA3dShaderDestToken tmp = get_temp( emit ); 2525 const struct src_register src0 = translate_src_register( 2526 emit, &insn->Src[0] ); 2527 2528 /* tmp = pow(src.y, src.w) 2529 */ 2530 if (dst.mask & TGSI_WRITEMASK_Z) { 2531 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), 2532 tmp, 2533 scalar(src0, 1), 2534 scalar(src0, 3))) 2535 return FALSE; 2536 } 2537 2538 /* tmp.y = src.x 2539 */ 2540 if (dst.mask & TGSI_WRITEMASK_Y) { 2541 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2542 writemask(tmp, TGSI_WRITEMASK_Y ), 2543 scalar(src0, 0))) 2544 return FALSE; 2545 } 2546 2547 /* Can't quite do this with emit conditional due to the extra 2548 * writemask on the predicated mov: 2549 */ 2550 { 2551 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 ); 2552 struct src_register predsrc; 2553 2554 /* D3D vs GL semantics: 2555 */ 2556 if (0) 2557 predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */ 2558 else 2559 predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */ 2560 2561 /* SETP src0.xxyy, GT, {0}.x */ 2562 if (!submit_op2( emit, 2563 inst_token_setp(SVGA3DOPCOMP_GT), 2564 pred_reg, 2565 predsrc, 2566 get_zero_immediate(emit))) 2567 return FALSE; 2568 2569 /* MOV dst, fail */ 2570 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, 2571 get_immediate(emit, 1.0f, 0.0f, 0.0f, 1.0f))) 2572 return FALSE; 2573 2574 /* MOV dst.yz, tmp (predicated) 2575 * 2576 * Note that the predicate reg (and possible modifiers) is passed 2577 * as the first source argument. 2578 */ 2579 if (dst.mask & TGSI_WRITEMASK_YZ) { 2580 if (!submit_op2( emit, 2581 inst_token_predicated(SVGA3DOP_MOV), 2582 writemask(dst, TGSI_WRITEMASK_YZ), 2583 src( pred_reg ), src( tmp ) )) 2584 return FALSE; 2585 } 2586 } 2587 } 2588 2589 return TRUE; 2590 } 2591 2592 2593 static boolean 2594 emit_ex2(struct svga_shader_emitter *emit, 2595 const struct tgsi_full_instruction *insn) 2596 { 2597 SVGA3dShaderInstToken inst; 2598 SVGA3dShaderDestToken dst; 2599 struct src_register src0; 2600 2601 inst = inst_token( SVGA3DOP_EXP ); 2602 dst = translate_dst_register( emit, insn, 0 ); 2603 src0 = translate_src_register( emit, &insn->Src[0] ); 2604 src0 = scalar( src0, TGSI_SWIZZLE_X ); 2605 2606 if (dst.mask != TGSI_WRITEMASK_XYZW) { 2607 SVGA3dShaderDestToken tmp = get_temp( emit ); 2608 2609 if (!submit_op1( emit, inst, tmp, src0 )) 2610 return FALSE; 2611 2612 return submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2613 dst, 2614 scalar( src( tmp ), TGSI_SWIZZLE_X ) ); 2615 } 2616 2617 return submit_op1( emit, inst, dst, src0 ); 2618 } 2619 2620 2621 static boolean 2622 emit_log(struct svga_shader_emitter *emit, 2623 const struct tgsi_full_instruction *insn) 2624 { 2625 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2626 struct src_register src0 = 2627 translate_src_register( emit, &insn->Src[0] ); 2628 SVGA3dShaderDestToken abs_tmp; 2629 struct src_register abs_src0; 2630 SVGA3dShaderDestToken log2_abs; 2631 2632 abs_tmp.value = 0; 2633 2634 if (dst.mask & TGSI_WRITEMASK_Z) 2635 log2_abs = dst; 2636 else if (dst.mask & TGSI_WRITEMASK_XY) 2637 log2_abs = get_temp( emit ); 2638 else 2639 log2_abs.value = 0; 2640 2641 /* If z is being written, fill it with log2( abs( src0 ) ). 2642 */ 2643 if (dst.mask & TGSI_WRITEMASK_XYZ) { 2644 if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS) 2645 abs_src0 = src0; 2646 else { 2647 abs_tmp = get_temp( emit ); 2648 2649 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2650 abs_tmp, 2651 src0 ) ) 2652 return FALSE; 2653 2654 abs_src0 = src( abs_tmp ); 2655 } 2656 2657 abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) ); 2658 2659 if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ), 2660 writemask( log2_abs, TGSI_WRITEMASK_Z ), 2661 abs_src0 ) ) 2662 return FALSE; 2663 } 2664 2665 if (dst.mask & TGSI_WRITEMASK_XY) { 2666 SVGA3dShaderDestToken floor_log2; 2667 2668 if (dst.mask & TGSI_WRITEMASK_X) 2669 floor_log2 = dst; 2670 else 2671 floor_log2 = get_temp( emit ); 2672 2673 /* If x is being written, fill it with floor( log2( abs( src0 ) ) ). 2674 */ 2675 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), 2676 writemask( floor_log2, TGSI_WRITEMASK_X ), 2677 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) ) 2678 return FALSE; 2679 2680 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), 2681 writemask( floor_log2, TGSI_WRITEMASK_X ), 2682 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ), 2683 negate( src( floor_log2 ) ) ) ) 2684 return FALSE; 2685 2686 /* If y is being written, fill it with 2687 * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ). 2688 */ 2689 if (dst.mask & TGSI_WRITEMASK_Y) { 2690 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ), 2691 writemask( dst, TGSI_WRITEMASK_Y ), 2692 negate( scalar( src( floor_log2 ), 2693 TGSI_SWIZZLE_X ) ) ) ) 2694 return FALSE; 2695 2696 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), 2697 writemask( dst, TGSI_WRITEMASK_Y ), 2698 src( dst ), 2699 abs_src0 ) ) 2700 return FALSE; 2701 } 2702 2703 if (!(dst.mask & TGSI_WRITEMASK_X)) 2704 release_temp( emit, floor_log2 ); 2705 2706 if (!(dst.mask & TGSI_WRITEMASK_Z)) 2707 release_temp( emit, log2_abs ); 2708 } 2709 2710 if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod && 2711 src0.base.srcMod != SVGA3DSRCMOD_ABS) 2712 release_temp( emit, abs_tmp ); 2713 2714 /* If w is being written, fill it with one. 2715 */ 2716 if (dst.mask & TGSI_WRITEMASK_W) { 2717 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2718 writemask(dst, TGSI_WRITEMASK_W), 2719 get_one_immediate(emit))) 2720 return FALSE; 2721 } 2722 2723 return TRUE; 2724 } 2725 2726 2727 /** 2728 * Translate TGSI TRUNC or ROUND instruction. 2729 * We need to truncate toward zero. Ex: trunc(-1.9) = -1 2730 * Different approaches are needed for VS versus PS. 2731 */ 2732 static boolean 2733 emit_trunc_round(struct svga_shader_emitter *emit, 2734 const struct tgsi_full_instruction *insn, 2735 boolean round) 2736 { 2737 SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0); 2738 const struct src_register src0 = 2739 translate_src_register(emit, &insn->Src[0] ); 2740 SVGA3dShaderDestToken t1 = get_temp(emit); 2741 2742 if (round) { 2743 SVGA3dShaderDestToken t0 = get_temp(emit); 2744 struct src_register half = get_half_immediate(emit); 2745 2746 /* t0 = abs(src0) + 0.5 */ 2747 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t0, 2748 absolute(src0), half)) 2749 return FALSE; 2750 2751 /* t1 = fract(t0) */ 2752 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, src(t0))) 2753 return FALSE; 2754 2755 /* t1 = t0 - t1 */ 2756 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, src(t0), 2757 negate(src(t1)))) 2758 return FALSE; 2759 } 2760 else { 2761 /* trunc */ 2762 2763 /* t1 = fract(abs(src0)) */ 2764 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, absolute(src0))) 2765 return FALSE; 2766 2767 /* t1 = abs(src0) - t1 */ 2768 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, absolute(src0), 2769 negate(src(t1)))) 2770 return FALSE; 2771 } 2772 2773 /* 2774 * Now we need to multiply t1 by the sign of the original value. 2775 */ 2776 if (emit->unit == PIPE_SHADER_VERTEX) { 2777 /* For VS: use SGN instruction */ 2778 /* Need two extra/dummy registers: */ 2779 SVGA3dShaderDestToken t2 = get_temp(emit), t3 = get_temp(emit), 2780 t4 = get_temp(emit); 2781 2782 /* t2 = sign(src0) */ 2783 if (!submit_op3(emit, inst_token(SVGA3DOP_SGN), t2, src0, 2784 src(t3), src(t4))) 2785 return FALSE; 2786 2787 /* dst = t1 * t2 */ 2788 if (!submit_op2(emit, inst_token(SVGA3DOP_MUL), dst, src(t1), src(t2))) 2789 return FALSE; 2790 } 2791 else { 2792 /* For FS: Use CMP instruction */ 2793 return submit_op3(emit, inst_token( SVGA3DOP_CMP ), dst, 2794 src0, src(t1), negate(src(t1))); 2795 } 2796 2797 return TRUE; 2798 } 2799 2800 2801 /** 2802 * Translate/emit "begin subroutine" instruction/marker/label. 2803 */ 2804 static boolean 2805 emit_bgnsub(struct svga_shader_emitter *emit, 2806 unsigned position, 2807 const struct tgsi_full_instruction *insn) 2808 { 2809 unsigned i; 2810 2811 /* Note that we've finished the main function and are now emitting 2812 * subroutines. This affects how we terminate the generated 2813 * shader. 2814 */ 2815 emit->in_main_func = FALSE; 2816 2817 for (i = 0; i < emit->nr_labels; i++) { 2818 if (emit->label[i] == position) { 2819 return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) && 2820 emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) && 2821 emit_src( emit, src_register( SVGA3DREG_LABEL, i ))); 2822 } 2823 } 2824 2825 assert(0); 2826 return TRUE; 2827 } 2828 2829 2830 /** 2831 * Translate/emit subroutine call instruction. 2832 */ 2833 static boolean 2834 emit_call(struct svga_shader_emitter *emit, 2835 const struct tgsi_full_instruction *insn) 2836 { 2837 unsigned position = insn->Label.Label; 2838 unsigned i; 2839 2840 for (i = 0; i < emit->nr_labels; i++) { 2841 if (emit->label[i] == position) 2842 break; 2843 } 2844 2845 if (emit->nr_labels == ARRAY_SIZE(emit->label)) 2846 return FALSE; 2847 2848 if (i == emit->nr_labels) { 2849 emit->label[i] = position; 2850 emit->nr_labels++; 2851 } 2852 2853 return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) && 2854 emit_src( emit, src_register( SVGA3DREG_LABEL, i ))); 2855 } 2856 2857 2858 /** 2859 * Called at the end of the shader. Actually, emit special "fix-up" 2860 * code for the vertex/fragment shader. 2861 */ 2862 static boolean 2863 emit_end(struct svga_shader_emitter *emit) 2864 { 2865 if (emit->unit == PIPE_SHADER_VERTEX) { 2866 return emit_vs_postamble( emit ); 2867 } 2868 else { 2869 return emit_ps_postamble( emit ); 2870 } 2871 } 2872 2873 2874 /** 2875 * Translate any TGSI instruction to SVGA. 2876 */ 2877 static boolean 2878 svga_emit_instruction(struct svga_shader_emitter *emit, 2879 unsigned position, 2880 const struct tgsi_full_instruction *insn) 2881 { 2882 switch (insn->Instruction.Opcode) { 2883 2884 case TGSI_OPCODE_ARL: 2885 return emit_arl( emit, insn ); 2886 2887 case TGSI_OPCODE_TEX: 2888 case TGSI_OPCODE_TXB: 2889 case TGSI_OPCODE_TXP: 2890 case TGSI_OPCODE_TXL: 2891 case TGSI_OPCODE_TXD: 2892 return emit_tex( emit, insn ); 2893 2894 case TGSI_OPCODE_DDX: 2895 case TGSI_OPCODE_DDY: 2896 return emit_deriv( emit, insn ); 2897 2898 case TGSI_OPCODE_BGNSUB: 2899 return emit_bgnsub( emit, position, insn ); 2900 2901 case TGSI_OPCODE_ENDSUB: 2902 return TRUE; 2903 2904 case TGSI_OPCODE_CAL: 2905 return emit_call( emit, insn ); 2906 2907 case TGSI_OPCODE_FLR: 2908 return emit_floor( emit, insn ); 2909 2910 case TGSI_OPCODE_TRUNC: 2911 return emit_trunc_round( emit, insn, FALSE ); 2912 2913 case TGSI_OPCODE_ROUND: 2914 return emit_trunc_round( emit, insn, TRUE ); 2915 2916 case TGSI_OPCODE_CEIL: 2917 return emit_ceil( emit, insn ); 2918 2919 case TGSI_OPCODE_CMP: 2920 return emit_cmp( emit, insn ); 2921 2922 case TGSI_OPCODE_DIV: 2923 return emit_div( emit, insn ); 2924 2925 case TGSI_OPCODE_DP2: 2926 return emit_dp2( emit, insn ); 2927 2928 case TGSI_OPCODE_DPH: 2929 return emit_dph( emit, insn ); 2930 2931 case TGSI_OPCODE_COS: 2932 return emit_cos( emit, insn ); 2933 2934 case TGSI_OPCODE_SIN: 2935 return emit_sin( emit, insn ); 2936 2937 case TGSI_OPCODE_SCS: 2938 return emit_sincos( emit, insn ); 2939 2940 case TGSI_OPCODE_END: 2941 /* TGSI always finishes the main func with an END */ 2942 return emit_end( emit ); 2943 2944 case TGSI_OPCODE_KILL_IF: 2945 return emit_kill_if( emit, insn ); 2946 2947 /* Selection opcodes. The underlying language is fairly 2948 * non-orthogonal about these. 2949 */ 2950 case TGSI_OPCODE_SEQ: 2951 return emit_select_op( emit, PIPE_FUNC_EQUAL, insn ); 2952 2953 case TGSI_OPCODE_SNE: 2954 return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn ); 2955 2956 case TGSI_OPCODE_SGT: 2957 return emit_select_op( emit, PIPE_FUNC_GREATER, insn ); 2958 2959 case TGSI_OPCODE_SGE: 2960 return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn ); 2961 2962 case TGSI_OPCODE_SLT: 2963 return emit_select_op( emit, PIPE_FUNC_LESS, insn ); 2964 2965 case TGSI_OPCODE_SLE: 2966 return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn ); 2967 2968 case TGSI_OPCODE_POW: 2969 return emit_pow( emit, insn ); 2970 2971 case TGSI_OPCODE_EX2: 2972 return emit_ex2( emit, insn ); 2973 2974 case TGSI_OPCODE_EXP: 2975 return emit_exp( emit, insn ); 2976 2977 case TGSI_OPCODE_LOG: 2978 return emit_log( emit, insn ); 2979 2980 case TGSI_OPCODE_LG2: 2981 return emit_scalar_op1( emit, SVGA3DOP_LOG, insn ); 2982 2983 case TGSI_OPCODE_RSQ: 2984 return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn ); 2985 2986 case TGSI_OPCODE_RCP: 2987 return emit_scalar_op1( emit, SVGA3DOP_RCP, insn ); 2988 2989 case TGSI_OPCODE_CONT: 2990 /* not expected (we return PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED = 0) */ 2991 return FALSE; 2992 2993 case TGSI_OPCODE_RET: 2994 /* This is a noop -- we tell mesa that we can't support RET 2995 * within a function (early return), so this will always be 2996 * followed by an ENDSUB. 2997 */ 2998 return TRUE; 2999 3000 /* These aren't actually used by any of the frontends we care 3001 * about: 3002 */ 3003 case TGSI_OPCODE_CLAMP: 3004 case TGSI_OPCODE_AND: 3005 case TGSI_OPCODE_OR: 3006 case TGSI_OPCODE_I2F: 3007 case TGSI_OPCODE_NOT: 3008 case TGSI_OPCODE_SHL: 3009 case TGSI_OPCODE_ISHR: 3010 case TGSI_OPCODE_XOR: 3011 return FALSE; 3012 3013 case TGSI_OPCODE_IF: 3014 return emit_if( emit, insn ); 3015 case TGSI_OPCODE_ELSE: 3016 return emit_else( emit, insn ); 3017 case TGSI_OPCODE_ENDIF: 3018 return emit_endif( emit, insn ); 3019 3020 case TGSI_OPCODE_BGNLOOP: 3021 return emit_bgnloop( emit, insn ); 3022 case TGSI_OPCODE_ENDLOOP: 3023 return emit_endloop( emit, insn ); 3024 case TGSI_OPCODE_BRK: 3025 return emit_brk( emit, insn ); 3026 3027 case TGSI_OPCODE_XPD: 3028 return emit_xpd( emit, insn ); 3029 3030 case TGSI_OPCODE_KILL: 3031 return emit_kill( emit, insn ); 3032 3033 case TGSI_OPCODE_DST: 3034 return emit_dst_insn( emit, insn ); 3035 3036 case TGSI_OPCODE_LIT: 3037 return emit_lit( emit, insn ); 3038 3039 case TGSI_OPCODE_LRP: 3040 return emit_lrp( emit, insn ); 3041 3042 case TGSI_OPCODE_SSG: 3043 return emit_ssg( emit, insn ); 3044 3045 case TGSI_OPCODE_MOV: 3046 return emit_mov( emit, insn ); 3047 3048 default: 3049 { 3050 unsigned opcode = translate_opcode(insn->Instruction.Opcode); 3051 3052 if (opcode == SVGA3DOP_LAST_INST) 3053 return FALSE; 3054 3055 if (!emit_simple_instruction( emit, opcode, insn )) 3056 return FALSE; 3057 } 3058 } 3059 3060 return TRUE; 3061 } 3062 3063 3064 /** 3065 * Translate/emit a TGSI IMMEDIATE declaration. 3066 * An immediate vector is a constant that's hard-coded into the shader. 3067 */ 3068 static boolean 3069 svga_emit_immediate(struct svga_shader_emitter *emit, 3070 const struct tgsi_full_immediate *imm) 3071 { 3072 static const float id[4] = {0,0,0,1}; 3073 float value[4]; 3074 unsigned i; 3075 3076 assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5); 3077 for (i = 0; i < imm->Immediate.NrTokens - 1; i++) { 3078 float f = imm->u[i].Float; 3079 value[i] = util_is_inf_or_nan(f) ? 0.0f : f; 3080 } 3081 3082 /* If the immediate has less than four values, fill in the remaining 3083 * positions from id={0,0,0,1}. 3084 */ 3085 for ( ; i < 4; i++ ) 3086 value[i] = id[i]; 3087 3088 return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, 3089 emit->imm_start + emit->internal_imm_count++, 3090 value[0], value[1], value[2], value[3]); 3091 } 3092 3093 3094 static boolean 3095 make_immediate(struct svga_shader_emitter *emit, 3096 float a, float b, float c, float d, 3097 struct src_register *out ) 3098 { 3099 unsigned idx = emit->nr_hw_float_const++; 3100 3101 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, 3102 idx, a, b, c, d )) 3103 return FALSE; 3104 3105 *out = src_register( SVGA3DREG_CONST, idx ); 3106 3107 return TRUE; 3108 } 3109 3110 3111 /** 3112 * Emit special VS instructions at top of shader. 3113 */ 3114 static boolean 3115 emit_vs_preamble(struct svga_shader_emitter *emit) 3116 { 3117 if (!emit->key.vs.need_prescale) { 3118 if (!make_immediate( emit, 0, 0, .5, .5, 3119 &emit->imm_0055)) 3120 return FALSE; 3121 } 3122 3123 return TRUE; 3124 } 3125 3126 3127 /** 3128 * Emit special PS instructions at top of shader. 3129 */ 3130 static boolean 3131 emit_ps_preamble(struct svga_shader_emitter *emit) 3132 { 3133 if (emit->ps_reads_pos && emit->info.reads_z) { 3134 /* 3135 * Assemble the position from various bits of inputs. Depth and W are 3136 * passed in a texcoord this is due to D3D's vPos not hold Z or W. 3137 * Also fixup the perspective interpolation. 3138 * 3139 * temp_pos.xy = vPos.xy 3140 * temp_pos.w = rcp(texcoord1.w); 3141 * temp_pos.z = texcoord1.z * temp_pos.w; 3142 */ 3143 if (!submit_op1( emit, 3144 inst_token(SVGA3DOP_MOV), 3145 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_XY ), 3146 emit->ps_true_pos )) 3147 return FALSE; 3148 3149 if (!submit_op1( emit, 3150 inst_token(SVGA3DOP_RCP), 3151 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_W ), 3152 scalar( emit->ps_depth_pos, TGSI_SWIZZLE_W ) )) 3153 return FALSE; 3154 3155 if (!submit_op2( emit, 3156 inst_token(SVGA3DOP_MUL), 3157 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_Z ), 3158 scalar( emit->ps_depth_pos, TGSI_SWIZZLE_Z ), 3159 scalar( src(emit->ps_temp_pos), TGSI_SWIZZLE_W ) )) 3160 return FALSE; 3161 } 3162 3163 return TRUE; 3164 } 3165 3166 3167 /** 3168 * Emit special PS instructions at end of shader. 3169 */ 3170 static boolean 3171 emit_ps_postamble(struct svga_shader_emitter *emit) 3172 { 3173 unsigned i; 3174 3175 /* PS oDepth is incredibly fragile and it's very hard to catch the 3176 * types of usage that break it during shader emit. Easier just to 3177 * redirect the main program to a temporary and then only touch 3178 * oDepth with a hand-crafted MOV below. 3179 */ 3180 if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) { 3181 if (!submit_op1( emit, 3182 inst_token(SVGA3DOP_MOV), 3183 emit->true_pos, 3184 scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) )) 3185 return FALSE; 3186 } 3187 3188 for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { 3189 if (SVGA3dShaderGetRegType(emit->true_color_output[i].value) != 0) { 3190 /* Potentially override output colors with white for XOR 3191 * logicop workaround. 3192 */ 3193 if (emit->unit == PIPE_SHADER_FRAGMENT && 3194 emit->key.fs.white_fragments) { 3195 struct src_register one = get_one_immediate(emit); 3196 3197 if (!submit_op1( emit, 3198 inst_token(SVGA3DOP_MOV), 3199 emit->true_color_output[i], 3200 one )) 3201 return FALSE; 3202 } 3203 else if (emit->unit == PIPE_SHADER_FRAGMENT && 3204 i < emit->key.fs.write_color0_to_n_cbufs) { 3205 /* Write temp color output [0] to true output [i] */ 3206 if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), 3207 emit->true_color_output[i], 3208 src(emit->temp_color_output[0]))) { 3209 return FALSE; 3210 } 3211 } 3212 else { 3213 if (!submit_op1( emit, 3214 inst_token(SVGA3DOP_MOV), 3215 emit->true_color_output[i], 3216 src(emit->temp_color_output[i]) )) 3217 return FALSE; 3218 } 3219 } 3220 } 3221 3222 return TRUE; 3223 } 3224 3225 3226 /** 3227 * Emit special VS instructions at end of shader. 3228 */ 3229 static boolean 3230 emit_vs_postamble(struct svga_shader_emitter *emit) 3231 { 3232 /* PSIZ output is incredibly fragile and it's very hard to catch 3233 * the types of usage that break it during shader emit. Easier 3234 * just to redirect the main program to a temporary and then only 3235 * touch PSIZ with a hand-crafted MOV below. 3236 */ 3237 if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) { 3238 if (!submit_op1( emit, 3239 inst_token(SVGA3DOP_MOV), 3240 emit->true_psiz, 3241 scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) )) 3242 return FALSE; 3243 } 3244 3245 /* Need to perform various manipulations on vertex position to cope 3246 * with the different GL and D3D clip spaces. 3247 */ 3248 if (emit->key.vs.need_prescale) { 3249 SVGA3dShaderDestToken temp_pos = emit->temp_pos; 3250 SVGA3dShaderDestToken depth = emit->depth_pos; 3251 SVGA3dShaderDestToken pos = emit->true_pos; 3252 unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1; 3253 struct src_register prescale_scale = src_register( SVGA3DREG_CONST, 3254 offset + 0 ); 3255 struct src_register prescale_trans = src_register( SVGA3DREG_CONST, 3256 offset + 1 ); 3257 3258 if (!submit_op1( emit, 3259 inst_token(SVGA3DOP_MOV), 3260 writemask(depth, TGSI_WRITEMASK_W), 3261 scalar(src(temp_pos), TGSI_SWIZZLE_W) )) 3262 return FALSE; 3263 3264 /* MUL temp_pos.xyz, temp_pos, prescale.scale 3265 * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos 3266 * --> Note that prescale.trans.w == 0 3267 */ 3268 if (!submit_op2( emit, 3269 inst_token(SVGA3DOP_MUL), 3270 writemask(temp_pos, TGSI_WRITEMASK_XYZ), 3271 src(temp_pos), 3272 prescale_scale )) 3273 return FALSE; 3274 3275 if (!submit_op3( emit, 3276 inst_token(SVGA3DOP_MAD), 3277 pos, 3278 swizzle(src(temp_pos), 3, 3, 3, 3), 3279 prescale_trans, 3280 src(temp_pos))) 3281 return FALSE; 3282 3283 /* Also write to depth value */ 3284 if (!submit_op3( emit, 3285 inst_token(SVGA3DOP_MAD), 3286 writemask(depth, TGSI_WRITEMASK_Z), 3287 swizzle(src(temp_pos), 3, 3, 3, 3), 3288 prescale_trans, 3289 src(temp_pos) )) 3290 return FALSE; 3291 } 3292 else { 3293 SVGA3dShaderDestToken temp_pos = emit->temp_pos; 3294 SVGA3dShaderDestToken depth = emit->depth_pos; 3295 SVGA3dShaderDestToken pos = emit->true_pos; 3296 struct src_register imm_0055 = emit->imm_0055; 3297 3298 /* Adjust GL clipping coordinate space to hardware (D3D-style): 3299 * 3300 * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos 3301 * MOV result.position, temp_pos 3302 */ 3303 if (!submit_op2( emit, 3304 inst_token(SVGA3DOP_DP4), 3305 writemask(temp_pos, TGSI_WRITEMASK_Z), 3306 imm_0055, 3307 src(temp_pos) )) 3308 return FALSE; 3309 3310 if (!submit_op1( emit, 3311 inst_token(SVGA3DOP_MOV), 3312 pos, 3313 src(temp_pos) )) 3314 return FALSE; 3315 3316 /* Move the manipulated depth into the extra texcoord reg */ 3317 if (!submit_op1( emit, 3318 inst_token(SVGA3DOP_MOV), 3319 writemask(depth, TGSI_WRITEMASK_ZW), 3320 src(temp_pos) )) 3321 return FALSE; 3322 } 3323 3324 return TRUE; 3325 } 3326 3327 3328 /** 3329 * For the pixel shader: emit the code which chooses the front 3330 * or back face color depending on triangle orientation. 3331 * This happens at the top of the fragment shader. 3332 * 3333 * 0: IF VFACE :4 3334 * 1: COLOR = FrontColor; 3335 * 2: ELSE 3336 * 3: COLOR = BackColor; 3337 * 4: ENDIF 3338 */ 3339 static boolean 3340 emit_light_twoside(struct svga_shader_emitter *emit) 3341 { 3342 struct src_register vface, zero; 3343 struct src_register front[2]; 3344 struct src_register back[2]; 3345 SVGA3dShaderDestToken color[2]; 3346 int count = emit->internal_color_count; 3347 unsigned i; 3348 SVGA3dShaderInstToken if_token; 3349 3350 if (count == 0) 3351 return TRUE; 3352 3353 vface = get_vface( emit ); 3354 zero = get_zero_immediate(emit); 3355 3356 /* Can't use get_temp() to allocate the color reg as such 3357 * temporaries will be reclaimed after each instruction by the call 3358 * to reset_temp_regs(). 3359 */ 3360 for (i = 0; i < count; i++) { 3361 color[i] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ ); 3362 front[i] = emit->input_map[emit->internal_color_idx[i]]; 3363 3364 /* Back is always the next input: 3365 */ 3366 back[i] = front[i]; 3367 back[i].base.num = front[i].base.num + 1; 3368 3369 /* Reassign the input_map to the actual front-face color: 3370 */ 3371 emit->input_map[emit->internal_color_idx[i]] = src(color[i]); 3372 } 3373 3374 if_token = inst_token( SVGA3DOP_IFC ); 3375 3376 if (emit->key.fs.front_ccw) 3377 if_token.control = SVGA3DOPCOMP_LT; 3378 else 3379 if_token.control = SVGA3DOPCOMP_GT; 3380 3381 if (!(emit_instruction( emit, if_token ) && 3382 emit_src( emit, vface ) && 3383 emit_src( emit, zero ) )) 3384 return FALSE; 3385 3386 for (i = 0; i < count; i++) { 3387 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] )) 3388 return FALSE; 3389 } 3390 3391 if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE)))) 3392 return FALSE; 3393 3394 for (i = 0; i < count; i++) { 3395 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] )) 3396 return FALSE; 3397 } 3398 3399 if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) )) 3400 return FALSE; 3401 3402 return TRUE; 3403 } 3404 3405 3406 /** 3407 * Emit special setup code for the front/back face register in the FS. 3408 * 0: SETP_GT TEMP, VFACE, 0 3409 * where TEMP is a fake frontface register 3410 */ 3411 static boolean 3412 emit_frontface(struct svga_shader_emitter *emit) 3413 { 3414 struct src_register vface; 3415 SVGA3dShaderDestToken temp; 3416 struct src_register pass, fail; 3417 3418 vface = get_vface( emit ); 3419 3420 /* Can't use get_temp() to allocate the fake frontface reg as such 3421 * temporaries will be reclaimed after each instruction by the call 3422 * to reset_temp_regs(). 3423 */ 3424 temp = dst_register( SVGA3DREG_TEMP, 3425 emit->nr_hw_temp++ ); 3426 3427 if (emit->key.fs.front_ccw) { 3428 pass = get_zero_immediate(emit); 3429 fail = get_one_immediate(emit); 3430 } else { 3431 pass = get_one_immediate(emit); 3432 fail = get_zero_immediate(emit); 3433 } 3434 3435 if (!emit_conditional(emit, PIPE_FUNC_GREATER, 3436 temp, vface, get_zero_immediate(emit), 3437 pass, fail)) 3438 return FALSE; 3439 3440 /* Reassign the input_map to the actual front-face color: 3441 */ 3442 emit->input_map[emit->internal_frontface_idx] = src(temp); 3443 3444 return TRUE; 3445 } 3446 3447 3448 /** 3449 * Emit code to invert the T component of the incoming texture coordinate. 3450 * This is used for drawing point sprites when 3451 * pipe_rasterizer_state::sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT. 3452 */ 3453 static boolean 3454 emit_inverted_texcoords(struct svga_shader_emitter *emit) 3455 { 3456 unsigned inverted_texcoords = emit->inverted_texcoords; 3457 3458 while (inverted_texcoords) { 3459 const unsigned unit = ffs(inverted_texcoords) - 1; 3460 3461 assert(emit->inverted_texcoords & (1 << unit)); 3462 3463 assert(unit < ARRAY_SIZE(emit->ps_true_texcoord)); 3464 3465 assert(unit < ARRAY_SIZE(emit->ps_inverted_texcoord_input)); 3466 3467 assert(emit->ps_inverted_texcoord_input[unit] 3468 < ARRAY_SIZE(emit->input_map)); 3469 3470 /* inverted = coord * (1, -1, 1, 1) + (0, 1, 0, 0) */ 3471 if (!submit_op3(emit, 3472 inst_token(SVGA3DOP_MAD), 3473 dst(emit->ps_inverted_texcoord[unit]), 3474 emit->ps_true_texcoord[unit], 3475 get_immediate(emit, 1.0f, -1.0f, 1.0f, 1.0f), 3476 get_immediate(emit, 0.0f, 1.0f, 0.0f, 0.0f))) 3477 return FALSE; 3478 3479 /* Reassign the input_map entry to the new texcoord register */ 3480 emit->input_map[emit->ps_inverted_texcoord_input[unit]] = 3481 emit->ps_inverted_texcoord[unit]; 3482 3483 inverted_texcoords &= ~(1 << unit); 3484 } 3485 3486 return TRUE; 3487 } 3488 3489 3490 /** 3491 * Emit code to adjust vertex shader inputs/attributes: 3492 * - Change range from [0,1] to [-1,1] (for normalized byte/short attribs). 3493 * - Set attrib W component = 1. 3494 */ 3495 static boolean 3496 emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit) 3497 { 3498 unsigned adjust_mask = (emit->key.vs.adjust_attrib_range | 3499 emit->key.vs.adjust_attrib_w_1); 3500 3501 while (adjust_mask) { 3502 /* Adjust vertex attrib range and/or set W component = 1 */ 3503 const unsigned index = u_bit_scan(&adjust_mask); 3504 struct src_register tmp; 3505 3506 /* allocate a temp reg */ 3507 tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp); 3508 emit->nr_hw_temp++; 3509 3510 if (emit->key.vs.adjust_attrib_range & (1 << index)) { 3511 /* The vertex input/attribute is supposed to be a signed value in 3512 * the range [-1,1] but we actually fetched/converted it to the 3513 * range [0,1]. This most likely happens when the app specifies a 3514 * signed byte attribute but we interpreted it as unsigned bytes. 3515 * See also svga_translate_vertex_format(). 3516 * 3517 * Here, we emit some extra instructions to adjust 3518 * the attribute values from [0,1] to [-1,1]. 3519 * 3520 * The adjustment we implement is: 3521 * new_attrib = attrib * 2.0; 3522 * if (attrib >= 0.5) 3523 * new_attrib = new_attrib - 2.0; 3524 * This isn't exactly right (it's off by a bit or so) but close enough. 3525 */ 3526 SVGA3dShaderDestToken pred_reg = dst_register(SVGA3DREG_PREDICATE, 0); 3527 3528 /* tmp = attrib * 2.0 */ 3529 if (!submit_op2(emit, 3530 inst_token(SVGA3DOP_MUL), 3531 dst(tmp), 3532 emit->input_map[index], 3533 get_two_immediate(emit))) 3534 return FALSE; 3535 3536 /* pred = (attrib >= 0.5) */ 3537 if (!submit_op2(emit, 3538 inst_token_setp(SVGA3DOPCOMP_GE), 3539 pred_reg, 3540 emit->input_map[index], /* vert attrib */ 3541 get_half_immediate(emit))) /* 0.5 */ 3542 return FALSE; 3543 3544 /* sub(pred) tmp, tmp, 2.0 */ 3545 if (!submit_op3(emit, 3546 inst_token_predicated(SVGA3DOP_SUB), 3547 dst(tmp), 3548 src(pred_reg), 3549 tmp, 3550 get_two_immediate(emit))) 3551 return FALSE; 3552 } 3553 else { 3554 /* just copy the vertex input attrib to the temp register */ 3555 if (!submit_op1(emit, 3556 inst_token(SVGA3DOP_MOV), 3557 dst(tmp), 3558 emit->input_map[index])) 3559 return FALSE; 3560 } 3561 3562 if (emit->key.vs.adjust_attrib_w_1 & (1 << index)) { 3563 /* move 1 into W position of tmp */ 3564 if (!submit_op1(emit, 3565 inst_token(SVGA3DOP_MOV), 3566 writemask(dst(tmp), TGSI_WRITEMASK_W), 3567 get_one_immediate(emit))) 3568 return FALSE; 3569 } 3570 3571 /* Reassign the input_map entry to the new tmp register */ 3572 emit->input_map[index] = tmp; 3573 } 3574 3575 return TRUE; 3576 } 3577 3578 3579 /** 3580 * Determine if we need to create the "common" immediate value which is 3581 * used for generating useful vector constants such as {0,0,0,0} and 3582 * {1,1,1,1}. 3583 * We could just do this all the time except that we want to conserve 3584 * registers whenever possible. 3585 */ 3586 static boolean 3587 needs_to_create_common_immediate(const struct svga_shader_emitter *emit) 3588 { 3589 unsigned i; 3590 3591 if (emit->unit == PIPE_SHADER_FRAGMENT) { 3592 if (emit->key.fs.light_twoside) 3593 return TRUE; 3594 3595 if (emit->key.fs.white_fragments) 3596 return TRUE; 3597 3598 if (emit->emit_frontface) 3599 return TRUE; 3600 3601 if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 || 3602 emit->info.opcode_count[TGSI_OPCODE_SSG] >= 1 || 3603 emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1) 3604 return TRUE; 3605 3606 if (emit->inverted_texcoords) 3607 return TRUE; 3608 3609 /* look for any PIPE_SWIZZLE_0/ONE terms */ 3610 for (i = 0; i < emit->key.num_textures; i++) { 3611 if (emit->key.tex[i].swizzle_r > PIPE_SWIZZLE_W || 3612 emit->key.tex[i].swizzle_g > PIPE_SWIZZLE_W || 3613 emit->key.tex[i].swizzle_b > PIPE_SWIZZLE_W || 3614 emit->key.tex[i].swizzle_a > PIPE_SWIZZLE_W) 3615 return TRUE; 3616 } 3617 3618 for (i = 0; i < emit->key.num_textures; i++) { 3619 if (emit->key.tex[i].compare_mode 3620 == PIPE_TEX_COMPARE_R_TO_TEXTURE) 3621 return TRUE; 3622 } 3623 } 3624 else if (emit->unit == PIPE_SHADER_VERTEX) { 3625 if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1) 3626 return TRUE; 3627 if (emit->key.vs.adjust_attrib_range || 3628 emit->key.vs.adjust_attrib_w_1) 3629 return TRUE; 3630 } 3631 3632 if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 || 3633 emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 || 3634 emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 || 3635 emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 || 3636 emit->info.opcode_count[TGSI_OPCODE_ROUND] >= 1 || 3637 emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 || 3638 emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 || 3639 emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 || 3640 emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 || 3641 emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 || 3642 emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 || 3643 emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 || 3644 emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 || 3645 emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 || 3646 emit->info.opcode_count[TGSI_OPCODE_KILL] >= 1) 3647 return TRUE; 3648 3649 return FALSE; 3650 } 3651 3652 3653 /** 3654 * Do we need to create a looping constant? 3655 */ 3656 static boolean 3657 needs_to_create_loop_const(const struct svga_shader_emitter *emit) 3658 { 3659 return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1); 3660 } 3661 3662 3663 static boolean 3664 needs_to_create_arl_consts(const struct svga_shader_emitter *emit) 3665 { 3666 return (emit->num_arl_consts > 0); 3667 } 3668 3669 3670 static boolean 3671 pre_parse_add_indirect( struct svga_shader_emitter *emit, 3672 int num, int current_arl) 3673 { 3674 unsigned i; 3675 assert(num < 0); 3676 3677 for (i = 0; i < emit->num_arl_consts; ++i) { 3678 if (emit->arl_consts[i].arl_num == current_arl) 3679 break; 3680 } 3681 /* new entry */ 3682 if (emit->num_arl_consts == i) { 3683 ++emit->num_arl_consts; 3684 } 3685 emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ? 3686 num : 3687 emit->arl_consts[i].number; 3688 emit->arl_consts[i].arl_num = current_arl; 3689 return TRUE; 3690 } 3691 3692 3693 static boolean 3694 pre_parse_instruction( struct svga_shader_emitter *emit, 3695 const struct tgsi_full_instruction *insn, 3696 int current_arl) 3697 { 3698 if (insn->Src[0].Register.Indirect && 3699 insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) { 3700 const struct tgsi_full_src_register *reg = &insn->Src[0]; 3701 if (reg->Register.Index < 0) { 3702 pre_parse_add_indirect(emit, reg->Register.Index, current_arl); 3703 } 3704 } 3705 3706 if (insn->Src[1].Register.Indirect && 3707 insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) { 3708 const struct tgsi_full_src_register *reg = &insn->Src[1]; 3709 if (reg->Register.Index < 0) { 3710 pre_parse_add_indirect(emit, reg->Register.Index, current_arl); 3711 } 3712 } 3713 3714 if (insn->Src[2].Register.Indirect && 3715 insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) { 3716 const struct tgsi_full_src_register *reg = &insn->Src[2]; 3717 if (reg->Register.Index < 0) { 3718 pre_parse_add_indirect(emit, reg->Register.Index, current_arl); 3719 } 3720 } 3721 3722 return TRUE; 3723 } 3724 3725 3726 static boolean 3727 pre_parse_tokens( struct svga_shader_emitter *emit, 3728 const struct tgsi_token *tokens ) 3729 { 3730 struct tgsi_parse_context parse; 3731 int current_arl = 0; 3732 3733 tgsi_parse_init( &parse, tokens ); 3734 3735 while (!tgsi_parse_end_of_tokens( &parse )) { 3736 tgsi_parse_token( &parse ); 3737 switch (parse.FullToken.Token.Type) { 3738 case TGSI_TOKEN_TYPE_IMMEDIATE: 3739 case TGSI_TOKEN_TYPE_DECLARATION: 3740 break; 3741 case TGSI_TOKEN_TYPE_INSTRUCTION: 3742 if (parse.FullToken.FullInstruction.Instruction.Opcode == 3743 TGSI_OPCODE_ARL) { 3744 ++current_arl; 3745 } 3746 if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction, 3747 current_arl )) 3748 return FALSE; 3749 break; 3750 default: 3751 break; 3752 } 3753 3754 } 3755 return TRUE; 3756 } 3757 3758 3759 static boolean 3760 svga_shader_emit_helpers(struct svga_shader_emitter *emit) 3761 { 3762 if (needs_to_create_common_immediate( emit )) { 3763 create_common_immediate( emit ); 3764 } 3765 if (needs_to_create_loop_const( emit )) { 3766 create_loop_const( emit ); 3767 } 3768 if (needs_to_create_arl_consts( emit )) { 3769 create_arl_consts( emit ); 3770 } 3771 3772 if (emit->unit == PIPE_SHADER_FRAGMENT) { 3773 if (!svga_shader_emit_samplers_decl( emit )) 3774 return FALSE; 3775 3776 if (!emit_ps_preamble( emit )) 3777 return FALSE; 3778 3779 if (emit->key.fs.light_twoside) { 3780 if (!emit_light_twoside( emit )) 3781 return FALSE; 3782 } 3783 if (emit->emit_frontface) { 3784 if (!emit_frontface( emit )) 3785 return FALSE; 3786 } 3787 if (emit->inverted_texcoords) { 3788 if (!emit_inverted_texcoords( emit )) 3789 return FALSE; 3790 } 3791 } 3792 else { 3793 assert(emit->unit == PIPE_SHADER_VERTEX); 3794 if (emit->key.vs.adjust_attrib_range) { 3795 if (!emit_adjusted_vertex_attribs(emit) || 3796 emit->key.vs.adjust_attrib_w_1) { 3797 return FALSE; 3798 } 3799 } 3800 } 3801 3802 return TRUE; 3803 } 3804 3805 3806 /** 3807 * This is the main entrypoint into the TGSI instruction translater. 3808 * Translate TGSI shader tokens into an SVGA shader. 3809 */ 3810 boolean 3811 svga_shader_emit_instructions(struct svga_shader_emitter *emit, 3812 const struct tgsi_token *tokens) 3813 { 3814 struct tgsi_parse_context parse; 3815 const struct tgsi_token *new_tokens = NULL; 3816 boolean ret = TRUE; 3817 boolean helpers_emitted = FALSE; 3818 unsigned line_nr = 0; 3819 3820 if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.pstipple) { 3821 unsigned unit; 3822 3823 new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0, 3824 TGSI_FILE_INPUT); 3825 3826 if (new_tokens) { 3827 /* Setup texture state for stipple */ 3828 emit->sampler_target[unit] = TGSI_TEXTURE_2D; 3829 emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; 3830 emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y; 3831 emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; 3832 emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W; 3833 3834 emit->pstipple_sampler_unit = unit; 3835 3836 tokens = new_tokens; 3837 } 3838 } 3839 3840 tgsi_parse_init( &parse, tokens ); 3841 emit->internal_imm_count = 0; 3842 3843 if (emit->unit == PIPE_SHADER_VERTEX) { 3844 ret = emit_vs_preamble( emit ); 3845 if (!ret) 3846 goto done; 3847 } 3848 3849 pre_parse_tokens(emit, tokens); 3850 3851 while (!tgsi_parse_end_of_tokens( &parse )) { 3852 tgsi_parse_token( &parse ); 3853 3854 switch (parse.FullToken.Token.Type) { 3855 case TGSI_TOKEN_TYPE_IMMEDIATE: 3856 ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate ); 3857 if (!ret) 3858 goto done; 3859 break; 3860 3861 case TGSI_TOKEN_TYPE_DECLARATION: 3862 ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration ); 3863 if (!ret) 3864 goto done; 3865 break; 3866 3867 case TGSI_TOKEN_TYPE_INSTRUCTION: 3868 if (!helpers_emitted) { 3869 if (!svga_shader_emit_helpers( emit )) 3870 goto done; 3871 helpers_emitted = TRUE; 3872 } 3873 ret = svga_emit_instruction( emit, 3874 line_nr++, 3875 &parse.FullToken.FullInstruction ); 3876 if (!ret) 3877 goto done; 3878 break; 3879 default: 3880 break; 3881 } 3882 3883 reset_temp_regs( emit ); 3884 } 3885 3886 /* Need to terminate the current subroutine. Note that the 3887 * hardware doesn't tolerate shaders without sub-routines 3888 * terminating with RET+END. 3889 */ 3890 if (!emit->in_main_func) { 3891 ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) ); 3892 if (!ret) 3893 goto done; 3894 } 3895 3896 assert(emit->dynamic_branching_level == 0); 3897 3898 /* Need to terminate the whole shader: 3899 */ 3900 ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) ); 3901 if (!ret) 3902 goto done; 3903 3904 done: 3905 tgsi_parse_free( &parse ); 3906 if (new_tokens) { 3907 tgsi_free_tokens(new_tokens); 3908 } 3909 3910 return ret; 3911 } 3912