1 /********************************************************** 2 * Copyright 2008-2009 VMware, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **********************************************************/ 25 26 27 #include "pipe/p_shader_tokens.h" 28 #include "tgsi/tgsi_dump.h" 29 #include "tgsi/tgsi_parse.h" 30 #include "util/u_memory.h" 31 #include "util/u_math.h" 32 #include "util/u_pstipple.h" 33 34 #include "svga_tgsi_emit.h" 35 #include "svga_context.h" 36 37 38 static boolean emit_vs_postamble( struct svga_shader_emitter *emit ); 39 static boolean emit_ps_postamble( struct svga_shader_emitter *emit ); 40 41 42 static unsigned 43 translate_opcode(uint opcode) 44 { 45 switch (opcode) { 46 case TGSI_OPCODE_ADD: return SVGA3DOP_ADD; 47 case TGSI_OPCODE_DP3: return SVGA3DOP_DP3; 48 case TGSI_OPCODE_DP4: return SVGA3DOP_DP4; 49 case TGSI_OPCODE_FRC: return SVGA3DOP_FRC; 50 case TGSI_OPCODE_MAD: return SVGA3DOP_MAD; 51 case TGSI_OPCODE_MAX: return SVGA3DOP_MAX; 52 case TGSI_OPCODE_MIN: return SVGA3DOP_MIN; 53 case TGSI_OPCODE_MOV: return SVGA3DOP_MOV; 54 case TGSI_OPCODE_MUL: return SVGA3DOP_MUL; 55 case TGSI_OPCODE_NOP: return SVGA3DOP_NOP; 56 default: 57 assert(!"svga: unexpected opcode in translate_opcode()"); 58 return SVGA3DOP_LAST_INST; 59 } 60 } 61 62 63 static unsigned 64 translate_file(unsigned file) 65 { 66 switch (file) { 67 case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP; 68 case TGSI_FILE_INPUT: return SVGA3DREG_INPUT; 69 case TGSI_FILE_OUTPUT: return SVGA3DREG_OUTPUT; /* VS3.0+ only */ 70 case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST; 71 case TGSI_FILE_CONSTANT: return SVGA3DREG_CONST; 72 case TGSI_FILE_SAMPLER: return SVGA3DREG_SAMPLER; 73 case TGSI_FILE_ADDRESS: return SVGA3DREG_ADDR; 74 default: 75 assert(!"svga: unexpected register file in translate_file()"); 76 return SVGA3DREG_TEMP; 77 } 78 } 79 80 81 /** 82 * Translate a TGSI destination register to an SVGA3DShaderDestToken. 83 * \param insn the TGSI instruction 84 * \param idx which TGSI dest register to translate (usually (always?) zero) 85 */ 86 static SVGA3dShaderDestToken 87 translate_dst_register( struct svga_shader_emitter *emit, 88 const struct tgsi_full_instruction *insn, 89 unsigned idx ) 90 { 91 const struct tgsi_full_dst_register *reg = &insn->Dst[idx]; 92 SVGA3dShaderDestToken dest; 93 94 switch (reg->Register.File) { 95 case TGSI_FILE_OUTPUT: 96 /* Output registers encode semantic information in their name. 97 * Need to lookup a table built at decl time: 98 */ 99 dest = emit->output_map[reg->Register.Index]; 100 emit->num_output_writes++; 101 break; 102 103 default: 104 { 105 unsigned index = reg->Register.Index; 106 assert(index < SVGA3D_TEMPREG_MAX); 107 index = MIN2(index, SVGA3D_TEMPREG_MAX - 1); 108 dest = dst_register(translate_file(reg->Register.File), index); 109 } 110 break; 111 } 112 113 if (reg->Register.Indirect) { 114 debug_warning("Indirect indexing of dest registers is not supported!\n"); 115 } 116 117 dest.mask = reg->Register.WriteMask; 118 assert(dest.mask); 119 120 if (insn->Instruction.Saturate) 121 dest.dstMod = SVGA3DDSTMOD_SATURATE; 122 123 return dest; 124 } 125 126 127 /** 128 * Apply a swizzle to a src_register, returning a new src_register 129 * Ex: swizzle(SRC.ZZYY, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_X, SWIZZLE_Y) 130 * would return SRC.YYZZ 131 */ 132 static struct src_register 133 swizzle(struct src_register src, 134 unsigned x, unsigned y, unsigned z, unsigned w) 135 { 136 assert(x < 4); 137 assert(y < 4); 138 assert(z < 4); 139 assert(w < 4); 140 x = (src.base.swizzle >> (x * 2)) & 0x3; 141 y = (src.base.swizzle >> (y * 2)) & 0x3; 142 z = (src.base.swizzle >> (z * 2)) & 0x3; 143 w = (src.base.swizzle >> (w * 2)) & 0x3; 144 145 src.base.swizzle = TRANSLATE_SWIZZLE(x, y, z, w); 146 147 return src; 148 } 149 150 151 /** 152 * Apply a "scalar" swizzle to a src_register returning a new 153 * src_register where all the swizzle terms are the same. 154 * Ex: scalar(SRC.WZYX, SWIZZLE_Y) would return SRC.ZZZZ 155 */ 156 static struct src_register 157 scalar(struct src_register src, unsigned comp) 158 { 159 assert(comp < 4); 160 return swizzle( src, comp, comp, comp, comp ); 161 } 162 163 164 static boolean 165 svga_arl_needs_adjustment( const struct svga_shader_emitter *emit ) 166 { 167 unsigned i; 168 169 for (i = 0; i < emit->num_arl_consts; ++i) { 170 if (emit->arl_consts[i].arl_num == emit->current_arl) 171 return TRUE; 172 } 173 return FALSE; 174 } 175 176 177 static int 178 svga_arl_adjustment( const struct svga_shader_emitter *emit ) 179 { 180 unsigned i; 181 182 for (i = 0; i < emit->num_arl_consts; ++i) { 183 if (emit->arl_consts[i].arl_num == emit->current_arl) 184 return emit->arl_consts[i].number; 185 } 186 return 0; 187 } 188 189 190 /** 191 * Translate a TGSI src register to a src_register. 192 */ 193 static struct src_register 194 translate_src_register( const struct svga_shader_emitter *emit, 195 const struct tgsi_full_src_register *reg ) 196 { 197 struct src_register src; 198 199 switch (reg->Register.File) { 200 case TGSI_FILE_INPUT: 201 /* Input registers are referred to by their semantic name rather 202 * than by index. Use the mapping build up from the decls: 203 */ 204 src = emit->input_map[reg->Register.Index]; 205 break; 206 207 case TGSI_FILE_IMMEDIATE: 208 /* Immediates are appended after TGSI constants in the D3D 209 * constant buffer. 210 */ 211 src = src_register( translate_file( reg->Register.File ), 212 reg->Register.Index + emit->imm_start ); 213 break; 214 215 default: 216 src = src_register( translate_file( reg->Register.File ), 217 reg->Register.Index ); 218 break; 219 } 220 221 /* Indirect addressing. 222 */ 223 if (reg->Register.Indirect) { 224 if (emit->unit == PIPE_SHADER_FRAGMENT) { 225 /* Pixel shaders have only loop registers for relative 226 * addressing into inputs. Ignore the redundant address 227 * register, the contents of aL should be in sync with it. 228 */ 229 if (reg->Register.File == TGSI_FILE_INPUT) { 230 src.base.relAddr = 1; 231 src.indirect = src_token(SVGA3DREG_LOOP, 0); 232 } 233 } 234 else { 235 /* Constant buffers only. 236 */ 237 if (reg->Register.File == TGSI_FILE_CONSTANT) { 238 /* we shift the offset towards the minimum */ 239 if (svga_arl_needs_adjustment( emit )) { 240 src.base.num -= svga_arl_adjustment( emit ); 241 } 242 src.base.relAddr = 1; 243 244 /* Not really sure what should go in the second token: 245 */ 246 src.indirect = src_token( SVGA3DREG_ADDR, 247 reg->Indirect.Index ); 248 249 src.indirect.swizzle = SWIZZLE_XXXX; 250 } 251 } 252 } 253 254 src = swizzle( src, 255 reg->Register.SwizzleX, 256 reg->Register.SwizzleY, 257 reg->Register.SwizzleZ, 258 reg->Register.SwizzleW ); 259 260 /* src.mod isn't a bitfield, unfortunately: 261 * See tgsi_util_get_full_src_register_sign_mode for implementation details. 262 */ 263 if (reg->Register.Absolute) { 264 if (reg->Register.Negate) 265 src.base.srcMod = SVGA3DSRCMOD_ABSNEG; 266 else 267 src.base.srcMod = SVGA3DSRCMOD_ABS; 268 } 269 else { 270 if (reg->Register.Negate) 271 src.base.srcMod = SVGA3DSRCMOD_NEG; 272 else 273 src.base.srcMod = SVGA3DSRCMOD_NONE; 274 } 275 276 return src; 277 } 278 279 280 /* 281 * Get a temporary register. 282 * Note: if we exceed the temporary register limit we just use 283 * register SVGA3D_TEMPREG_MAX - 1. 284 */ 285 static SVGA3dShaderDestToken 286 get_temp( struct svga_shader_emitter *emit ) 287 { 288 int i = emit->nr_hw_temp + emit->internal_temp_count++; 289 if (i >= SVGA3D_TEMPREG_MAX) { 290 debug_warn_once("svga: Too many temporary registers used in shader\n"); 291 i = SVGA3D_TEMPREG_MAX - 1; 292 } 293 return dst_register( SVGA3DREG_TEMP, i ); 294 } 295 296 297 /** 298 * Release a single temp. Currently only effective if it was the last 299 * allocated temp, otherwise release will be delayed until the next 300 * call to reset_temp_regs(). 301 */ 302 static void 303 release_temp( struct svga_shader_emitter *emit, 304 SVGA3dShaderDestToken temp ) 305 { 306 if (temp.num == emit->internal_temp_count - 1) 307 emit->internal_temp_count--; 308 } 309 310 311 /** 312 * Release all temps. 313 */ 314 static void 315 reset_temp_regs(struct svga_shader_emitter *emit) 316 { 317 emit->internal_temp_count = 0; 318 } 319 320 321 /** Emit bytecode for a src_register */ 322 static boolean 323 emit_src(struct svga_shader_emitter *emit, const struct src_register src) 324 { 325 if (src.base.relAddr) { 326 assert(src.base.reserved0); 327 assert(src.indirect.reserved0); 328 return (svga_shader_emit_dword( emit, src.base.value ) && 329 svga_shader_emit_dword( emit, src.indirect.value )); 330 } 331 else { 332 assert(src.base.reserved0); 333 return svga_shader_emit_dword( emit, src.base.value ); 334 } 335 } 336 337 338 /** Emit bytecode for a dst_register */ 339 static boolean 340 emit_dst(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dest) 341 { 342 assert(dest.reserved0); 343 assert(dest.mask); 344 return svga_shader_emit_dword( emit, dest.value ); 345 } 346 347 348 /** Emit bytecode for a 1-operand instruction */ 349 static boolean 350 emit_op1(struct svga_shader_emitter *emit, 351 SVGA3dShaderInstToken inst, 352 SVGA3dShaderDestToken dest, 353 struct src_register src0) 354 { 355 return (emit_instruction(emit, inst) && 356 emit_dst(emit, dest) && 357 emit_src(emit, src0)); 358 } 359 360 361 /** Emit bytecode for a 2-operand instruction */ 362 static boolean 363 emit_op2(struct svga_shader_emitter *emit, 364 SVGA3dShaderInstToken inst, 365 SVGA3dShaderDestToken dest, 366 struct src_register src0, 367 struct src_register src1) 368 { 369 return (emit_instruction(emit, inst) && 370 emit_dst(emit, dest) && 371 emit_src(emit, src0) && 372 emit_src(emit, src1)); 373 } 374 375 376 /** Emit bytecode for a 3-operand instruction */ 377 static boolean 378 emit_op3(struct svga_shader_emitter *emit, 379 SVGA3dShaderInstToken inst, 380 SVGA3dShaderDestToken dest, 381 struct src_register src0, 382 struct src_register src1, 383 struct src_register src2) 384 { 385 return (emit_instruction(emit, inst) && 386 emit_dst(emit, dest) && 387 emit_src(emit, src0) && 388 emit_src(emit, src1) && 389 emit_src(emit, src2)); 390 } 391 392 393 /** Emit bytecode for a 4-operand instruction */ 394 static boolean 395 emit_op4(struct svga_shader_emitter *emit, 396 SVGA3dShaderInstToken inst, 397 SVGA3dShaderDestToken dest, 398 struct src_register src0, 399 struct src_register src1, 400 struct src_register src2, 401 struct src_register src3) 402 { 403 return (emit_instruction(emit, inst) && 404 emit_dst(emit, dest) && 405 emit_src(emit, src0) && 406 emit_src(emit, src1) && 407 emit_src(emit, src2) && 408 emit_src(emit, src3)); 409 } 410 411 412 /** 413 * Apply the absolute value modifier to the given src_register, returning 414 * a new src_register. 415 */ 416 static struct src_register 417 absolute(struct src_register src) 418 { 419 src.base.srcMod = SVGA3DSRCMOD_ABS; 420 return src; 421 } 422 423 424 /** 425 * Apply the negation modifier to the given src_register, returning 426 * a new src_register. 427 */ 428 static struct src_register 429 negate(struct src_register src) 430 { 431 switch (src.base.srcMod) { 432 case SVGA3DSRCMOD_ABS: 433 src.base.srcMod = SVGA3DSRCMOD_ABSNEG; 434 break; 435 case SVGA3DSRCMOD_ABSNEG: 436 src.base.srcMod = SVGA3DSRCMOD_ABS; 437 break; 438 case SVGA3DSRCMOD_NEG: 439 src.base.srcMod = SVGA3DSRCMOD_NONE; 440 break; 441 case SVGA3DSRCMOD_NONE: 442 src.base.srcMod = SVGA3DSRCMOD_NEG; 443 break; 444 } 445 return src; 446 } 447 448 449 450 /* Replace the src with the temporary specified in the dst, but copying 451 * only the necessary channels, and preserving the original swizzle (which is 452 * important given that several opcodes have constraints in the allowed 453 * swizzles). 454 */ 455 static boolean 456 emit_repl(struct svga_shader_emitter *emit, 457 SVGA3dShaderDestToken dst, 458 struct src_register *src0) 459 { 460 unsigned src0_swizzle; 461 unsigned chan; 462 463 assert(SVGA3dShaderGetRegType(dst.value) == SVGA3DREG_TEMP); 464 465 src0_swizzle = src0->base.swizzle; 466 467 dst.mask = 0; 468 for (chan = 0; chan < 4; ++chan) { 469 unsigned swizzle = (src0_swizzle >> (chan *2)) & 0x3; 470 dst.mask |= 1 << swizzle; 471 } 472 assert(dst.mask); 473 474 src0->base.swizzle = SVGA3DSWIZZLE_NONE; 475 476 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, *src0 )) 477 return FALSE; 478 479 *src0 = src( dst ); 480 src0->base.swizzle = src0_swizzle; 481 482 return TRUE; 483 } 484 485 486 /** 487 * Submit/emit an instruction with zero operands. 488 */ 489 static boolean 490 submit_op0(struct svga_shader_emitter *emit, 491 SVGA3dShaderInstToken inst, 492 SVGA3dShaderDestToken dest) 493 { 494 return (emit_instruction( emit, inst ) && 495 emit_dst( emit, dest )); 496 } 497 498 499 /** 500 * Submit/emit an instruction with one operand. 501 */ 502 static boolean 503 submit_op1(struct svga_shader_emitter *emit, 504 SVGA3dShaderInstToken inst, 505 SVGA3dShaderDestToken dest, 506 struct src_register src0) 507 { 508 return emit_op1( emit, inst, dest, src0 ); 509 } 510 511 512 /** 513 * Submit/emit an instruction with two operands. 514 * 515 * SVGA shaders may not refer to >1 constant register in a single 516 * instruction. This function checks for that usage and inserts a 517 * move to temporary if detected. 518 * 519 * The same applies to input registers -- at most a single input 520 * register may be read by any instruction. 521 */ 522 static boolean 523 submit_op2(struct svga_shader_emitter *emit, 524 SVGA3dShaderInstToken inst, 525 SVGA3dShaderDestToken dest, 526 struct src_register src0, 527 struct src_register src1) 528 { 529 SVGA3dShaderDestToken temp; 530 SVGA3dShaderRegType type0, type1; 531 boolean need_temp = FALSE; 532 533 temp.value = 0; 534 type0 = SVGA3dShaderGetRegType( src0.base.value ); 535 type1 = SVGA3dShaderGetRegType( src1.base.value ); 536 537 if (type0 == SVGA3DREG_CONST && 538 type1 == SVGA3DREG_CONST && 539 src0.base.num != src1.base.num) 540 need_temp = TRUE; 541 542 if (type0 == SVGA3DREG_INPUT && 543 type1 == SVGA3DREG_INPUT && 544 src0.base.num != src1.base.num) 545 need_temp = TRUE; 546 547 if (need_temp) { 548 temp = get_temp( emit ); 549 550 if (!emit_repl( emit, temp, &src0 )) 551 return FALSE; 552 } 553 554 if (!emit_op2( emit, inst, dest, src0, src1 )) 555 return FALSE; 556 557 if (need_temp) 558 release_temp( emit, temp ); 559 560 return TRUE; 561 } 562 563 564 /** 565 * Submit/emit an instruction with three operands. 566 * 567 * SVGA shaders may not refer to >1 constant register in a single 568 * instruction. This function checks for that usage and inserts a 569 * move to temporary if detected. 570 */ 571 static boolean 572 submit_op3(struct svga_shader_emitter *emit, 573 SVGA3dShaderInstToken inst, 574 SVGA3dShaderDestToken dest, 575 struct src_register src0, 576 struct src_register src1, 577 struct src_register src2) 578 { 579 SVGA3dShaderDestToken temp0; 580 SVGA3dShaderDestToken temp1; 581 boolean need_temp0 = FALSE; 582 boolean need_temp1 = FALSE; 583 SVGA3dShaderRegType type0, type1, type2; 584 585 temp0.value = 0; 586 temp1.value = 0; 587 type0 = SVGA3dShaderGetRegType( src0.base.value ); 588 type1 = SVGA3dShaderGetRegType( src1.base.value ); 589 type2 = SVGA3dShaderGetRegType( src2.base.value ); 590 591 if (inst.op != SVGA3DOP_SINCOS) { 592 if (type0 == SVGA3DREG_CONST && 593 ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) || 594 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num))) 595 need_temp0 = TRUE; 596 597 if (type1 == SVGA3DREG_CONST && 598 (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num)) 599 need_temp1 = TRUE; 600 } 601 602 if (type0 == SVGA3DREG_INPUT && 603 ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) || 604 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num))) 605 need_temp0 = TRUE; 606 607 if (type1 == SVGA3DREG_INPUT && 608 (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num)) 609 need_temp1 = TRUE; 610 611 if (need_temp0) { 612 temp0 = get_temp( emit ); 613 614 if (!emit_repl( emit, temp0, &src0 )) 615 return FALSE; 616 } 617 618 if (need_temp1) { 619 temp1 = get_temp( emit ); 620 621 if (!emit_repl( emit, temp1, &src1 )) 622 return FALSE; 623 } 624 625 if (!emit_op3( emit, inst, dest, src0, src1, src2 )) 626 return FALSE; 627 628 if (need_temp1) 629 release_temp( emit, temp1 ); 630 if (need_temp0) 631 release_temp( emit, temp0 ); 632 return TRUE; 633 } 634 635 636 /** 637 * Submit/emit an instruction with four operands. 638 * 639 * SVGA shaders may not refer to >1 constant register in a single 640 * instruction. This function checks for that usage and inserts a 641 * move to temporary if detected. 642 */ 643 static boolean 644 submit_op4(struct svga_shader_emitter *emit, 645 SVGA3dShaderInstToken inst, 646 SVGA3dShaderDestToken dest, 647 struct src_register src0, 648 struct src_register src1, 649 struct src_register src2, 650 struct src_register src3) 651 { 652 SVGA3dShaderDestToken temp0; 653 SVGA3dShaderDestToken temp3; 654 boolean need_temp0 = FALSE; 655 boolean need_temp3 = FALSE; 656 SVGA3dShaderRegType type0, type1, type2, type3; 657 658 temp0.value = 0; 659 temp3.value = 0; 660 type0 = SVGA3dShaderGetRegType( src0.base.value ); 661 type1 = SVGA3dShaderGetRegType( src1.base.value ); 662 type2 = SVGA3dShaderGetRegType( src2.base.value ); 663 type3 = SVGA3dShaderGetRegType( src2.base.value ); 664 665 /* Make life a little easier - this is only used by the TXD 666 * instruction which is guaranteed not to have a constant/input reg 667 * in one slot at least: 668 */ 669 assert(type1 == SVGA3DREG_SAMPLER); 670 (void) type1; 671 672 if (type0 == SVGA3DREG_CONST && 673 ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) || 674 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num))) 675 need_temp0 = TRUE; 676 677 if (type3 == SVGA3DREG_CONST && 678 (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num)) 679 need_temp3 = TRUE; 680 681 if (type0 == SVGA3DREG_INPUT && 682 ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) || 683 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num))) 684 need_temp0 = TRUE; 685 686 if (type3 == SVGA3DREG_INPUT && 687 (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num)) 688 need_temp3 = TRUE; 689 690 if (need_temp0) { 691 temp0 = get_temp( emit ); 692 693 if (!emit_repl( emit, temp0, &src0 )) 694 return FALSE; 695 } 696 697 if (need_temp3) { 698 temp3 = get_temp( emit ); 699 700 if (!emit_repl( emit, temp3, &src3 )) 701 return FALSE; 702 } 703 704 if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 )) 705 return FALSE; 706 707 if (need_temp3) 708 release_temp( emit, temp3 ); 709 if (need_temp0) 710 release_temp( emit, temp0 ); 711 return TRUE; 712 } 713 714 715 /** 716 * Do the src and dest registers refer to the same register? 717 */ 718 static boolean 719 alias_src_dst(struct src_register src, 720 SVGA3dShaderDestToken dst) 721 { 722 if (src.base.num != dst.num) 723 return FALSE; 724 725 if (SVGA3dShaderGetRegType(dst.value) != 726 SVGA3dShaderGetRegType(src.base.value)) 727 return FALSE; 728 729 return TRUE; 730 } 731 732 733 /** 734 * Helper for emitting SVGA immediate values using the SVGA3DOP_DEF[I] 735 * instructions. 736 */ 737 static boolean 738 emit_def_const(struct svga_shader_emitter *emit, 739 SVGA3dShaderConstType type, 740 unsigned idx, float a, float b, float c, float d) 741 { 742 SVGA3DOpDefArgs def; 743 SVGA3dShaderInstToken opcode; 744 745 switch (type) { 746 case SVGA3D_CONST_TYPE_FLOAT: 747 opcode = inst_token( SVGA3DOP_DEF ); 748 def.dst = dst_register( SVGA3DREG_CONST, idx ); 749 def.constValues[0] = a; 750 def.constValues[1] = b; 751 def.constValues[2] = c; 752 def.constValues[3] = d; 753 break; 754 case SVGA3D_CONST_TYPE_INT: 755 opcode = inst_token( SVGA3DOP_DEFI ); 756 def.dst = dst_register( SVGA3DREG_CONSTINT, idx ); 757 def.constIValues[0] = (int)a; 758 def.constIValues[1] = (int)b; 759 def.constIValues[2] = (int)c; 760 def.constIValues[3] = (int)d; 761 break; 762 default: 763 assert(0); 764 opcode = inst_token( SVGA3DOP_NOP ); 765 break; 766 } 767 768 if (!emit_instruction(emit, opcode) || 769 !svga_shader_emit_dwords( emit, def.values, ARRAY_SIZE(def.values))) 770 return FALSE; 771 772 return TRUE; 773 } 774 775 776 static boolean 777 create_loop_const( struct svga_shader_emitter *emit ) 778 { 779 unsigned idx = emit->nr_hw_int_const++; 780 781 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx, 782 255, /* iteration count */ 783 0, /* initial value */ 784 1, /* step size */ 785 0 /* not used, must be 0 */)) 786 return FALSE; 787 788 emit->loop_const_idx = idx; 789 emit->created_loop_const = TRUE; 790 791 return TRUE; 792 } 793 794 static boolean 795 create_arl_consts( struct svga_shader_emitter *emit ) 796 { 797 int i; 798 799 for (i = 0; i < emit->num_arl_consts; i += 4) { 800 int j; 801 unsigned idx = emit->nr_hw_float_const++; 802 float vals[4]; 803 for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) { 804 vals[j] = (float) emit->arl_consts[i + j].number; 805 emit->arl_consts[i + j].idx = idx; 806 switch (j) { 807 case 0: 808 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X; 809 break; 810 case 1: 811 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y; 812 break; 813 case 2: 814 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z; 815 break; 816 case 3: 817 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W; 818 break; 819 } 820 } 821 while (j < 4) 822 vals[j++] = 0; 823 824 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, 825 vals[0], vals[1], 826 vals[2], vals[3])) 827 return FALSE; 828 } 829 830 return TRUE; 831 } 832 833 834 /** 835 * Return the register which holds the pixel shaders front/back- 836 * facing value. 837 */ 838 static struct src_register 839 get_vface( struct svga_shader_emitter *emit ) 840 { 841 assert(emit->emitted_vface); 842 return src_register(SVGA3DREG_MISCTYPE, SVGA3DMISCREG_FACE); 843 } 844 845 846 /** 847 * Create/emit a "common" constant with values {0, 0.5, -1, 1}. 848 * We can swizzle this to produce other useful constants such as 849 * {0, 0, 0, 0}, {1, 1, 1, 1}, etc. 850 */ 851 static boolean 852 create_common_immediate( struct svga_shader_emitter *emit ) 853 { 854 unsigned idx = emit->nr_hw_float_const++; 855 856 /* Emit the constant (0, 0.5, -1, 1) and use swizzling to generate 857 * other useful vectors. 858 */ 859 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, 860 idx, 0.0f, 0.5f, -1.0f, 1.0f )) 861 return FALSE; 862 emit->common_immediate_idx[0] = idx; 863 idx++; 864 865 /* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */ 866 if (emit->key.vs.adjust_attrib_range) { 867 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, 868 idx, 2.0f, 0.0f, 0.0f, 0.0f )) 869 return FALSE; 870 emit->common_immediate_idx[1] = idx; 871 } 872 else { 873 emit->common_immediate_idx[1] = -1; 874 } 875 876 emit->created_common_immediate = TRUE; 877 878 return TRUE; 879 } 880 881 882 /** 883 * Return swizzle/position for the given value in the "common" immediate. 884 */ 885 static inline unsigned 886 common_immediate_swizzle(float value) 887 { 888 if (value == 0.0f) 889 return TGSI_SWIZZLE_X; 890 else if (value == 0.5f) 891 return TGSI_SWIZZLE_Y; 892 else if (value == -1.0f) 893 return TGSI_SWIZZLE_Z; 894 else if (value == 1.0f) 895 return TGSI_SWIZZLE_W; 896 else { 897 assert(!"illegal value in common_immediate_swizzle"); 898 return TGSI_SWIZZLE_X; 899 } 900 } 901 902 903 /** 904 * Returns an immediate reg where all the terms are either 0, 1, 2 or 0.5 905 */ 906 static struct src_register 907 get_immediate(struct svga_shader_emitter *emit, 908 float x, float y, float z, float w) 909 { 910 unsigned sx = common_immediate_swizzle(x); 911 unsigned sy = common_immediate_swizzle(y); 912 unsigned sz = common_immediate_swizzle(z); 913 unsigned sw = common_immediate_swizzle(w); 914 assert(emit->created_common_immediate); 915 assert(emit->common_immediate_idx[0] >= 0); 916 return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]), 917 sx, sy, sz, sw); 918 } 919 920 921 /** 922 * returns {0, 0, 0, 0} immediate 923 */ 924 static struct src_register 925 get_zero_immediate( struct svga_shader_emitter *emit ) 926 { 927 assert(emit->created_common_immediate); 928 assert(emit->common_immediate_idx[0] >= 0); 929 return swizzle(src_register( SVGA3DREG_CONST, 930 emit->common_immediate_idx[0]), 931 0, 0, 0, 0); 932 } 933 934 935 /** 936 * returns {1, 1, 1, 1} immediate 937 */ 938 static struct src_register 939 get_one_immediate( struct svga_shader_emitter *emit ) 940 { 941 assert(emit->created_common_immediate); 942 assert(emit->common_immediate_idx[0] >= 0); 943 return swizzle(src_register( SVGA3DREG_CONST, 944 emit->common_immediate_idx[0]), 945 3, 3, 3, 3); 946 } 947 948 949 /** 950 * returns {0.5, 0.5, 0.5, 0.5} immediate 951 */ 952 static struct src_register 953 get_half_immediate( struct svga_shader_emitter *emit ) 954 { 955 assert(emit->created_common_immediate); 956 assert(emit->common_immediate_idx[0] >= 0); 957 return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]), 958 1, 1, 1, 1); 959 } 960 961 962 /** 963 * returns {2, 2, 2, 2} immediate 964 */ 965 static struct src_register 966 get_two_immediate( struct svga_shader_emitter *emit ) 967 { 968 /* Note we use the second common immediate here */ 969 assert(emit->created_common_immediate); 970 assert(emit->common_immediate_idx[1] >= 0); 971 return swizzle(src_register( SVGA3DREG_CONST, 972 emit->common_immediate_idx[1]), 973 0, 0, 0, 0); 974 } 975 976 977 /** 978 * returns the loop const 979 */ 980 static struct src_register 981 get_loop_const( struct svga_shader_emitter *emit ) 982 { 983 assert(emit->created_loop_const); 984 assert(emit->loop_const_idx >= 0); 985 return src_register( SVGA3DREG_CONSTINT, 986 emit->loop_const_idx ); 987 } 988 989 990 static struct src_register 991 get_fake_arl_const( struct svga_shader_emitter *emit ) 992 { 993 struct src_register reg; 994 int idx = 0, swizzle = 0, i; 995 996 for (i = 0; i < emit->num_arl_consts; ++ i) { 997 if (emit->arl_consts[i].arl_num == emit->current_arl) { 998 idx = emit->arl_consts[i].idx; 999 swizzle = emit->arl_consts[i].swizzle; 1000 } 1001 } 1002 1003 reg = src_register( SVGA3DREG_CONST, idx ); 1004 return scalar(reg, swizzle); 1005 } 1006 1007 1008 /** 1009 * Return a register which holds the width and height of the texture 1010 * currently bound to the given sampler. 1011 */ 1012 static struct src_register 1013 get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num ) 1014 { 1015 int idx; 1016 struct src_register reg; 1017 1018 /* the width/height indexes start right after constants */ 1019 idx = emit->key.tex[sampler_num].width_height_idx + 1020 emit->info.file_max[TGSI_FILE_CONSTANT] + 1; 1021 1022 reg = src_register( SVGA3DREG_CONST, idx ); 1023 return reg; 1024 } 1025 1026 1027 static boolean 1028 emit_fake_arl(struct svga_shader_emitter *emit, 1029 const struct tgsi_full_instruction *insn) 1030 { 1031 const struct src_register src0 = 1032 translate_src_register(emit, &insn->Src[0] ); 1033 struct src_register src1 = get_fake_arl_const( emit ); 1034 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1035 SVGA3dShaderDestToken tmp = get_temp( emit ); 1036 1037 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0)) 1038 return FALSE; 1039 1040 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ), 1041 src1)) 1042 return FALSE; 1043 1044 /* replicate the original swizzle */ 1045 src1 = src(tmp); 1046 src1.base.swizzle = src0.base.swizzle; 1047 1048 return submit_op1( emit, inst_token( SVGA3DOP_MOVA ), 1049 dst, src1 ); 1050 } 1051 1052 1053 static boolean 1054 emit_if(struct svga_shader_emitter *emit, 1055 const struct tgsi_full_instruction *insn) 1056 { 1057 struct src_register src0 = 1058 translate_src_register(emit, &insn->Src[0]); 1059 struct src_register zero = get_zero_immediate(emit); 1060 SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC ); 1061 1062 if_token.control = SVGA3DOPCOMPC_NE; 1063 1064 if (SVGA3dShaderGetRegType(src0.base.value) == SVGA3DREG_CONST) { 1065 /* 1066 * Max different constant registers readable per IFC instruction is 1. 1067 */ 1068 SVGA3dShaderDestToken tmp = get_temp( emit ); 1069 1070 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0)) 1071 return FALSE; 1072 1073 src0 = scalar(src( tmp ), TGSI_SWIZZLE_X); 1074 } 1075 1076 emit->dynamic_branching_level++; 1077 1078 return (emit_instruction( emit, if_token ) && 1079 emit_src( emit, src0 ) && 1080 emit_src( emit, zero ) ); 1081 } 1082 1083 1084 static boolean 1085 emit_else(struct svga_shader_emitter *emit, 1086 const struct tgsi_full_instruction *insn) 1087 { 1088 return emit_instruction(emit, inst_token(SVGA3DOP_ELSE)); 1089 } 1090 1091 1092 static boolean 1093 emit_endif(struct svga_shader_emitter *emit, 1094 const struct tgsi_full_instruction *insn) 1095 { 1096 emit->dynamic_branching_level--; 1097 1098 return emit_instruction(emit, inst_token(SVGA3DOP_ENDIF)); 1099 } 1100 1101 1102 /** 1103 * Translate the following TGSI FLR instruction. 1104 * FLR DST, SRC 1105 * To the following SVGA3D instruction sequence. 1106 * FRC TMP, SRC 1107 * SUB DST, SRC, TMP 1108 */ 1109 static boolean 1110 emit_floor(struct svga_shader_emitter *emit, 1111 const struct tgsi_full_instruction *insn ) 1112 { 1113 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1114 const struct src_register src0 = 1115 translate_src_register(emit, &insn->Src[0] ); 1116 SVGA3dShaderDestToken temp = get_temp( emit ); 1117 1118 /* FRC TMP, SRC */ 1119 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 )) 1120 return FALSE; 1121 1122 /* SUB DST, SRC, TMP */ 1123 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0, 1124 negate( src( temp ) ) )) 1125 return FALSE; 1126 1127 return TRUE; 1128 } 1129 1130 1131 /** 1132 * Translate the following TGSI CEIL instruction. 1133 * CEIL DST, SRC 1134 * To the following SVGA3D instruction sequence. 1135 * FRC TMP, -SRC 1136 * ADD DST, SRC, TMP 1137 */ 1138 static boolean 1139 emit_ceil(struct svga_shader_emitter *emit, 1140 const struct tgsi_full_instruction *insn) 1141 { 1142 SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0); 1143 const struct src_register src0 = 1144 translate_src_register(emit, &insn->Src[0]); 1145 SVGA3dShaderDestToken temp = get_temp(emit); 1146 1147 /* FRC TMP, -SRC */ 1148 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), temp, negate(src0))) 1149 return FALSE; 1150 1151 /* ADD DST, SRC, TMP */ 1152 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), dst, src0, src(temp))) 1153 return FALSE; 1154 1155 return TRUE; 1156 } 1157 1158 1159 /** 1160 * Translate the following TGSI DIV instruction. 1161 * DIV DST.xy, SRC0, SRC1 1162 * To the following SVGA3D instruction sequence. 1163 * RCP TMP.x, SRC1.xxxx 1164 * RCP TMP.y, SRC1.yyyy 1165 * MUL DST.xy, SRC0, TMP 1166 */ 1167 static boolean 1168 emit_div(struct svga_shader_emitter *emit, 1169 const struct tgsi_full_instruction *insn ) 1170 { 1171 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1172 const struct src_register src0 = 1173 translate_src_register(emit, &insn->Src[0] ); 1174 const struct src_register src1 = 1175 translate_src_register(emit, &insn->Src[1] ); 1176 SVGA3dShaderDestToken temp = get_temp( emit ); 1177 unsigned i; 1178 1179 /* For each enabled element, perform a RCP instruction. Note that 1180 * RCP is scalar in SVGA3D: 1181 */ 1182 for (i = 0; i < 4; i++) { 1183 unsigned channel = 1 << i; 1184 if (dst.mask & channel) { 1185 /* RCP TMP.?, SRC1.???? */ 1186 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ), 1187 writemask(temp, channel), 1188 scalar(src1, i) )) 1189 return FALSE; 1190 } 1191 } 1192 1193 /* Vector mul: 1194 * MUL DST, SRC0, TMP 1195 */ 1196 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0, 1197 src( temp ) )) 1198 return FALSE; 1199 1200 return TRUE; 1201 } 1202 1203 1204 /** 1205 * Translate the following TGSI DP2 instruction. 1206 * DP2 DST, SRC1, SRC2 1207 * To the following SVGA3D instruction sequence. 1208 * MUL TMP, SRC1, SRC2 1209 * ADD DST, TMP.xxxx, TMP.yyyy 1210 */ 1211 static boolean 1212 emit_dp2(struct svga_shader_emitter *emit, 1213 const struct tgsi_full_instruction *insn ) 1214 { 1215 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1216 const struct src_register src0 = 1217 translate_src_register(emit, &insn->Src[0]); 1218 const struct src_register src1 = 1219 translate_src_register(emit, &insn->Src[1]); 1220 SVGA3dShaderDestToken temp = get_temp( emit ); 1221 struct src_register temp_src0, temp_src1; 1222 1223 /* MUL TMP, SRC1, SRC2 */ 1224 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 )) 1225 return FALSE; 1226 1227 temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X); 1228 temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y); 1229 1230 /* ADD DST, TMP.xxxx, TMP.yyyy */ 1231 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, 1232 temp_src0, temp_src1 )) 1233 return FALSE; 1234 1235 return TRUE; 1236 } 1237 1238 1239 /** 1240 * Sine / Cosine helper function. 1241 */ 1242 static boolean 1243 do_emit_sincos(struct svga_shader_emitter *emit, 1244 SVGA3dShaderDestToken dst, 1245 struct src_register src0) 1246 { 1247 src0 = scalar(src0, TGSI_SWIZZLE_X); 1248 return submit_op1(emit, inst_token(SVGA3DOP_SINCOS), dst, src0); 1249 } 1250 1251 1252 /** 1253 * Translate TGSI SIN instruction into: 1254 * SCS TMP SRC 1255 * MOV DST TMP.yyyy 1256 */ 1257 static boolean 1258 emit_sin(struct svga_shader_emitter *emit, 1259 const struct tgsi_full_instruction *insn ) 1260 { 1261 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1262 struct src_register src0 = 1263 translate_src_register(emit, &insn->Src[0] ); 1264 SVGA3dShaderDestToken temp = get_temp( emit ); 1265 1266 /* SCS TMP SRC */ 1267 if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0)) 1268 return FALSE; 1269 1270 src0 = scalar(src( temp ), TGSI_SWIZZLE_Y); 1271 1272 /* MOV DST TMP.yyyy */ 1273 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 )) 1274 return FALSE; 1275 1276 return TRUE; 1277 } 1278 1279 1280 /* 1281 * Translate TGSI COS instruction into: 1282 * SCS TMP SRC 1283 * MOV DST TMP.xxxx 1284 */ 1285 static boolean 1286 emit_cos(struct svga_shader_emitter *emit, 1287 const struct tgsi_full_instruction *insn) 1288 { 1289 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1290 struct src_register src0 = 1291 translate_src_register(emit, &insn->Src[0] ); 1292 SVGA3dShaderDestToken temp = get_temp( emit ); 1293 1294 /* SCS TMP SRC */ 1295 if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 )) 1296 return FALSE; 1297 1298 src0 = scalar(src( temp ), TGSI_SWIZZLE_X); 1299 1300 /* MOV DST TMP.xxxx */ 1301 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 )) 1302 return FALSE; 1303 1304 return TRUE; 1305 } 1306 1307 1308 /** 1309 * Translate/emit TGSI SSG (Set Sign: -1, 0, +1) instruction. 1310 */ 1311 static boolean 1312 emit_ssg(struct svga_shader_emitter *emit, 1313 const struct tgsi_full_instruction *insn) 1314 { 1315 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1316 struct src_register src0 = 1317 translate_src_register(emit, &insn->Src[0] ); 1318 SVGA3dShaderDestToken temp0 = get_temp( emit ); 1319 SVGA3dShaderDestToken temp1 = get_temp( emit ); 1320 struct src_register zero, one; 1321 1322 if (emit->unit == PIPE_SHADER_VERTEX) { 1323 /* SGN DST, SRC0, TMP0, TMP1 */ 1324 return submit_op3( emit, inst_token( SVGA3DOP_SGN ), dst, src0, 1325 src( temp0 ), src( temp1 ) ); 1326 } 1327 1328 one = get_one_immediate(emit); 1329 zero = get_zero_immediate(emit); 1330 1331 /* CMP TMP0, SRC0, one, zero */ 1332 if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ), 1333 writemask( temp0, dst.mask ), src0, one, zero )) 1334 return FALSE; 1335 1336 /* CMP TMP1, negate(SRC0), negate(one), zero */ 1337 if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ), 1338 writemask( temp1, dst.mask ), negate( src0 ), negate( one ), 1339 zero )) 1340 return FALSE; 1341 1342 /* ADD DST, TMP0, TMP1 */ 1343 return submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src( temp0 ), 1344 src( temp1 ) ); 1345 } 1346 1347 1348 /** 1349 * Translate/emit KILL_IF instruction (kill if any of X,Y,Z,W are negative). 1350 */ 1351 static boolean 1352 emit_kill_if(struct svga_shader_emitter *emit, 1353 const struct tgsi_full_instruction *insn) 1354 { 1355 const struct tgsi_full_src_register *reg = &insn->Src[0]; 1356 struct src_register src0, srcIn; 1357 const boolean special = (reg->Register.Absolute || 1358 reg->Register.Negate || 1359 reg->Register.Indirect || 1360 reg->Register.SwizzleX != 0 || 1361 reg->Register.SwizzleY != 1 || 1362 reg->Register.SwizzleZ != 2 || 1363 reg->Register.File != TGSI_FILE_TEMPORARY); 1364 SVGA3dShaderDestToken temp; 1365 1366 src0 = srcIn = translate_src_register( emit, reg ); 1367 1368 if (special) { 1369 /* need a temp reg */ 1370 temp = get_temp( emit ); 1371 } 1372 1373 if (special) { 1374 /* move the source into a temp register */ 1375 submit_op1(emit, inst_token(SVGA3DOP_MOV), temp, src0); 1376 1377 src0 = src( temp ); 1378 } 1379 1380 /* Do the texkill by checking if any of the XYZW components are < 0. 1381 * Note that ps_2_0 and later take XYZW in consideration, while ps_1_x 1382 * only used XYZ. The MSDN documentation about this is incorrect. 1383 */ 1384 if (!submit_op0( emit, inst_token( SVGA3DOP_TEXKILL ), dst(src0) )) 1385 return FALSE; 1386 1387 return TRUE; 1388 } 1389 1390 1391 /** 1392 * Translate/emit unconditional kill instruction (usually found inside 1393 * an IF/ELSE/ENDIF block). 1394 */ 1395 static boolean 1396 emit_kill(struct svga_shader_emitter *emit, 1397 const struct tgsi_full_instruction *insn) 1398 { 1399 SVGA3dShaderDestToken temp; 1400 struct src_register one = get_one_immediate(emit); 1401 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_TEXKILL ); 1402 1403 /* texkill doesn't allow negation on the operand so lets move 1404 * negation of {1} to a temp register */ 1405 temp = get_temp( emit ); 1406 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, 1407 negate( one ) )) 1408 return FALSE; 1409 1410 return submit_op0( emit, inst, temp ); 1411 } 1412 1413 1414 /** 1415 * Test if r1 and r2 are the same register. 1416 */ 1417 static boolean 1418 same_register(struct src_register r1, struct src_register r2) 1419 { 1420 return (r1.base.num == r2.base.num && 1421 r1.base.type_upper == r2.base.type_upper && 1422 r1.base.type_lower == r2.base.type_lower); 1423 } 1424 1425 1426 1427 /** 1428 * Implement conditionals by initializing destination reg to 'fail', 1429 * then set predicate reg with UFOP_SETP, then move 'pass' to dest 1430 * based on predicate reg. 1431 * 1432 * SETP src0, cmp, src1 -- do this first to avoid aliasing problems. 1433 * MOV dst, fail 1434 * MOV dst, pass, p0 1435 */ 1436 static boolean 1437 emit_conditional(struct svga_shader_emitter *emit, 1438 unsigned compare_func, 1439 SVGA3dShaderDestToken dst, 1440 struct src_register src0, 1441 struct src_register src1, 1442 struct src_register pass, 1443 struct src_register fail) 1444 { 1445 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 ); 1446 SVGA3dShaderInstToken setp_token; 1447 1448 switch (compare_func) { 1449 case PIPE_FUNC_NEVER: 1450 return submit_op1( emit, inst_token( SVGA3DOP_MOV ), 1451 dst, fail ); 1452 break; 1453 case PIPE_FUNC_LESS: 1454 setp_token = inst_token_setp(SVGA3DOPCOMP_LT); 1455 break; 1456 case PIPE_FUNC_EQUAL: 1457 setp_token = inst_token_setp(SVGA3DOPCOMP_EQ); 1458 break; 1459 case PIPE_FUNC_LEQUAL: 1460 setp_token = inst_token_setp(SVGA3DOPCOMP_LE); 1461 break; 1462 case PIPE_FUNC_GREATER: 1463 setp_token = inst_token_setp(SVGA3DOPCOMP_GT); 1464 break; 1465 case PIPE_FUNC_NOTEQUAL: 1466 setp_token = inst_token_setp(SVGA3DOPCOMPC_NE); 1467 break; 1468 case PIPE_FUNC_GEQUAL: 1469 setp_token = inst_token_setp(SVGA3DOPCOMP_GE); 1470 break; 1471 case PIPE_FUNC_ALWAYS: 1472 return submit_op1( emit, inst_token( SVGA3DOP_MOV ), 1473 dst, pass ); 1474 break; 1475 } 1476 1477 if (same_register(src(dst), pass)) { 1478 /* We'll get bad results if the dst and pass registers are the same 1479 * so use a temp register containing pass. 1480 */ 1481 SVGA3dShaderDestToken temp = get_temp(emit); 1482 if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), temp, pass)) 1483 return FALSE; 1484 pass = src(temp); 1485 } 1486 1487 /* SETP src0, COMPOP, src1 */ 1488 if (!submit_op2( emit, setp_token, pred_reg, 1489 src0, src1 )) 1490 return FALSE; 1491 1492 /* MOV dst, fail */ 1493 if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), dst, fail)) 1494 return FALSE; 1495 1496 /* MOV dst, pass (predicated) 1497 * 1498 * Note that the predicate reg (and possible modifiers) is passed 1499 * as the first source argument. 1500 */ 1501 if (!submit_op2(emit, 1502 inst_token_predicated(SVGA3DOP_MOV), dst, 1503 src(pred_reg), pass)) 1504 return FALSE; 1505 1506 return TRUE; 1507 } 1508 1509 1510 /** 1511 * Helper for emiting 'selection' commands. Basically: 1512 * if (src0 OP src1) 1513 * dst = 1.0; 1514 * else 1515 * dst = 0.0; 1516 */ 1517 static boolean 1518 emit_select(struct svga_shader_emitter *emit, 1519 unsigned compare_func, 1520 SVGA3dShaderDestToken dst, 1521 struct src_register src0, 1522 struct src_register src1 ) 1523 { 1524 /* There are some SVGA instructions which implement some selects 1525 * directly, but they are only available in the vertex shader. 1526 */ 1527 if (emit->unit == PIPE_SHADER_VERTEX) { 1528 switch (compare_func) { 1529 case PIPE_FUNC_GEQUAL: 1530 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 ); 1531 case PIPE_FUNC_LEQUAL: 1532 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 ); 1533 case PIPE_FUNC_GREATER: 1534 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 ); 1535 case PIPE_FUNC_LESS: 1536 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 ); 1537 default: 1538 break; 1539 } 1540 } 1541 1542 /* Otherwise, need to use the setp approach: 1543 */ 1544 { 1545 struct src_register one, zero; 1546 /* zero immediate is 0,0,0,1 */ 1547 zero = get_zero_immediate(emit); 1548 one = get_one_immediate(emit); 1549 1550 return emit_conditional(emit, compare_func, dst, src0, src1, one, zero); 1551 } 1552 } 1553 1554 1555 /** 1556 * Translate/emit a TGSI SEQ, SNE, SLT, SGE, etc. instruction. 1557 */ 1558 static boolean 1559 emit_select_op(struct svga_shader_emitter *emit, 1560 unsigned compare, 1561 const struct tgsi_full_instruction *insn) 1562 { 1563 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1564 struct src_register src0 = translate_src_register( 1565 emit, &insn->Src[0] ); 1566 struct src_register src1 = translate_src_register( 1567 emit, &insn->Src[1] ); 1568 1569 return emit_select( emit, compare, dst, src0, src1 ); 1570 } 1571 1572 1573 /** 1574 * Translate TGSI CMP instruction. Component-wise: 1575 * dst = (src0 < 0.0) ? src1 : src2 1576 */ 1577 static boolean 1578 emit_cmp(struct svga_shader_emitter *emit, 1579 const struct tgsi_full_instruction *insn) 1580 { 1581 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1582 const struct src_register src0 = 1583 translate_src_register(emit, &insn->Src[0] ); 1584 const struct src_register src1 = 1585 translate_src_register(emit, &insn->Src[1] ); 1586 const struct src_register src2 = 1587 translate_src_register(emit, &insn->Src[2] ); 1588 1589 if (emit->unit == PIPE_SHADER_VERTEX) { 1590 struct src_register zero = get_zero_immediate(emit); 1591 /* We used to simulate CMP with SLT+LRP. But that didn't work when 1592 * src1 or src2 was Inf/NaN. In particular, GLSL sqrt(0) failed 1593 * because it involves a CMP to handle the 0 case. 1594 * Use a conditional expression instead. 1595 */ 1596 return emit_conditional(emit, PIPE_FUNC_LESS, dst, 1597 src0, zero, src1, src2); 1598 } 1599 else { 1600 assert(emit->unit == PIPE_SHADER_FRAGMENT); 1601 1602 /* CMP DST, SRC0, SRC2, SRC1 */ 1603 return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, 1604 src0, src2, src1); 1605 } 1606 } 1607 1608 1609 /** 1610 * Translate/emit 2-operand (coord, sampler) texture instructions. 1611 */ 1612 static boolean 1613 emit_tex2(struct svga_shader_emitter *emit, 1614 const struct tgsi_full_instruction *insn, 1615 SVGA3dShaderDestToken dst) 1616 { 1617 SVGA3dShaderInstToken inst; 1618 struct src_register texcoord; 1619 struct src_register sampler; 1620 SVGA3dShaderDestToken tmp; 1621 1622 inst.value = 0; 1623 1624 switch (insn->Instruction.Opcode) { 1625 case TGSI_OPCODE_TEX: 1626 inst.op = SVGA3DOP_TEX; 1627 break; 1628 case TGSI_OPCODE_TXP: 1629 inst.op = SVGA3DOP_TEX; 1630 inst.control = SVGA3DOPCONT_PROJECT; 1631 break; 1632 case TGSI_OPCODE_TXB: 1633 inst.op = SVGA3DOP_TEX; 1634 inst.control = SVGA3DOPCONT_BIAS; 1635 break; 1636 case TGSI_OPCODE_TXL: 1637 inst.op = SVGA3DOP_TEXLDL; 1638 break; 1639 default: 1640 assert(0); 1641 return FALSE; 1642 } 1643 1644 texcoord = translate_src_register( emit, &insn->Src[0] ); 1645 sampler = translate_src_register( emit, &insn->Src[1] ); 1646 1647 if (emit->key.tex[sampler.base.num].unnormalized || 1648 emit->dynamic_branching_level > 0) 1649 tmp = get_temp( emit ); 1650 1651 /* Can't do mipmapping inside dynamic branch constructs. Force LOD 1652 * zero in that case. 1653 */ 1654 if (emit->dynamic_branching_level > 0 && 1655 inst.op == SVGA3DOP_TEX && 1656 SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) { 1657 struct src_register zero = get_zero_immediate(emit); 1658 1659 /* MOV tmp, texcoord */ 1660 if (!submit_op1( emit, 1661 inst_token( SVGA3DOP_MOV ), 1662 tmp, 1663 texcoord )) 1664 return FALSE; 1665 1666 /* MOV tmp.w, zero */ 1667 if (!submit_op1( emit, 1668 inst_token( SVGA3DOP_MOV ), 1669 writemask( tmp, TGSI_WRITEMASK_W ), 1670 zero )) 1671 return FALSE; 1672 1673 texcoord = src( tmp ); 1674 inst.op = SVGA3DOP_TEXLDL; 1675 } 1676 1677 /* Explicit normalization of texcoords: 1678 */ 1679 if (emit->key.tex[sampler.base.num].unnormalized) { 1680 struct src_register wh = get_tex_dimensions( emit, sampler.base.num ); 1681 1682 /* MUL tmp, SRC0, WH */ 1683 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), 1684 tmp, texcoord, wh )) 1685 return FALSE; 1686 1687 texcoord = src( tmp ); 1688 } 1689 1690 return submit_op2( emit, inst, dst, texcoord, sampler ); 1691 } 1692 1693 1694 /** 1695 * Translate/emit 4-operand (coord, ddx, ddy, sampler) texture instructions. 1696 */ 1697 static boolean 1698 emit_tex4(struct svga_shader_emitter *emit, 1699 const struct tgsi_full_instruction *insn, 1700 SVGA3dShaderDestToken dst ) 1701 { 1702 SVGA3dShaderInstToken inst; 1703 struct src_register texcoord; 1704 struct src_register ddx; 1705 struct src_register ddy; 1706 struct src_register sampler; 1707 1708 texcoord = translate_src_register( emit, &insn->Src[0] ); 1709 ddx = translate_src_register( emit, &insn->Src[1] ); 1710 ddy = translate_src_register( emit, &insn->Src[2] ); 1711 sampler = translate_src_register( emit, &insn->Src[3] ); 1712 1713 inst.value = 0; 1714 1715 switch (insn->Instruction.Opcode) { 1716 case TGSI_OPCODE_TXD: 1717 inst.op = SVGA3DOP_TEXLDD; /* 4 args! */ 1718 break; 1719 default: 1720 assert(0); 1721 return FALSE; 1722 } 1723 1724 return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy ); 1725 } 1726 1727 1728 /** 1729 * Emit texture swizzle code. We do this here since SVGA samplers don't 1730 * directly support swizzles. 1731 */ 1732 static boolean 1733 emit_tex_swizzle(struct svga_shader_emitter *emit, 1734 SVGA3dShaderDestToken dst, 1735 struct src_register src, 1736 unsigned swizzle_x, 1737 unsigned swizzle_y, 1738 unsigned swizzle_z, 1739 unsigned swizzle_w) 1740 { 1741 const unsigned swizzleIn[4] = {swizzle_x, swizzle_y, swizzle_z, swizzle_w}; 1742 unsigned srcSwizzle[4]; 1743 unsigned srcWritemask = 0x0, zeroWritemask = 0x0, oneWritemask = 0x0; 1744 unsigned i; 1745 1746 /* build writemasks and srcSwizzle terms */ 1747 for (i = 0; i < 4; i++) { 1748 if (swizzleIn[i] == PIPE_SWIZZLE_0) { 1749 srcSwizzle[i] = TGSI_SWIZZLE_X + i; 1750 zeroWritemask |= (1 << i); 1751 } 1752 else if (swizzleIn[i] == PIPE_SWIZZLE_1) { 1753 srcSwizzle[i] = TGSI_SWIZZLE_X + i; 1754 oneWritemask |= (1 << i); 1755 } 1756 else { 1757 srcSwizzle[i] = swizzleIn[i]; 1758 srcWritemask |= (1 << i); 1759 } 1760 } 1761 1762 /* write x/y/z/w comps */ 1763 if (dst.mask & srcWritemask) { 1764 if (!submit_op1(emit, 1765 inst_token(SVGA3DOP_MOV), 1766 writemask(dst, srcWritemask), 1767 swizzle(src, 1768 srcSwizzle[0], 1769 srcSwizzle[1], 1770 srcSwizzle[2], 1771 srcSwizzle[3]))) 1772 return FALSE; 1773 } 1774 1775 /* write 0 comps */ 1776 if (dst.mask & zeroWritemask) { 1777 if (!submit_op1(emit, 1778 inst_token(SVGA3DOP_MOV), 1779 writemask(dst, zeroWritemask), 1780 get_zero_immediate(emit))) 1781 return FALSE; 1782 } 1783 1784 /* write 1 comps */ 1785 if (dst.mask & oneWritemask) { 1786 if (!submit_op1(emit, 1787 inst_token(SVGA3DOP_MOV), 1788 writemask(dst, oneWritemask), 1789 get_one_immediate(emit))) 1790 return FALSE; 1791 } 1792 1793 return TRUE; 1794 } 1795 1796 1797 /** 1798 * Translate/emit a TGSI texture sample instruction. 1799 */ 1800 static boolean 1801 emit_tex(struct svga_shader_emitter *emit, 1802 const struct tgsi_full_instruction *insn) 1803 { 1804 SVGA3dShaderDestToken dst = 1805 translate_dst_register( emit, insn, 0 ); 1806 struct src_register src0 = 1807 translate_src_register( emit, &insn->Src[0] ); 1808 struct src_register src1 = 1809 translate_src_register( emit, &insn->Src[1] ); 1810 1811 SVGA3dShaderDestToken tex_result; 1812 const unsigned unit = src1.base.num; 1813 1814 /* check for shadow samplers */ 1815 boolean compare = (emit->key.tex[unit].compare_mode == 1816 PIPE_TEX_COMPARE_R_TO_TEXTURE); 1817 1818 /* texture swizzle */ 1819 boolean swizzle = (emit->key.tex[unit].swizzle_r != PIPE_SWIZZLE_X || 1820 emit->key.tex[unit].swizzle_g != PIPE_SWIZZLE_Y || 1821 emit->key.tex[unit].swizzle_b != PIPE_SWIZZLE_Z || 1822 emit->key.tex[unit].swizzle_a != PIPE_SWIZZLE_W); 1823 1824 boolean saturate = insn->Instruction.Saturate; 1825 1826 /* If doing compare processing or tex swizzle or saturation, we need to put 1827 * the fetched color into a temporary so it can be used as a source later on. 1828 */ 1829 if (compare || swizzle || saturate) { 1830 tex_result = get_temp( emit ); 1831 } 1832 else { 1833 tex_result = dst; 1834 } 1835 1836 switch(insn->Instruction.Opcode) { 1837 case TGSI_OPCODE_TEX: 1838 case TGSI_OPCODE_TXB: 1839 case TGSI_OPCODE_TXP: 1840 case TGSI_OPCODE_TXL: 1841 if (!emit_tex2( emit, insn, tex_result )) 1842 return FALSE; 1843 break; 1844 case TGSI_OPCODE_TXD: 1845 if (!emit_tex4( emit, insn, tex_result )) 1846 return FALSE; 1847 break; 1848 default: 1849 assert(0); 1850 } 1851 1852 if (compare) { 1853 SVGA3dShaderDestToken dst2; 1854 1855 if (swizzle || saturate) 1856 dst2 = tex_result; 1857 else 1858 dst2 = dst; 1859 1860 if (dst.mask & TGSI_WRITEMASK_XYZ) { 1861 SVGA3dShaderDestToken src0_zdivw = get_temp( emit ); 1862 /* When sampling a depth texture, the result of the comparison is in 1863 * the Y component. 1864 */ 1865 struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y); 1866 struct src_register r_coord; 1867 1868 if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) { 1869 /* Divide texcoord R by Q */ 1870 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ), 1871 writemask(src0_zdivw, TGSI_WRITEMASK_X), 1872 scalar(src0, TGSI_SWIZZLE_W) )) 1873 return FALSE; 1874 1875 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), 1876 writemask(src0_zdivw, TGSI_WRITEMASK_X), 1877 scalar(src0, TGSI_SWIZZLE_Z), 1878 scalar(src(src0_zdivw), TGSI_SWIZZLE_X) )) 1879 return FALSE; 1880 1881 r_coord = scalar(src(src0_zdivw), TGSI_SWIZZLE_X); 1882 } 1883 else { 1884 r_coord = scalar(src0, TGSI_SWIZZLE_Z); 1885 } 1886 1887 /* Compare texture sample value against R component of texcoord */ 1888 if (!emit_select(emit, 1889 emit->key.tex[unit].compare_func, 1890 writemask( dst2, TGSI_WRITEMASK_XYZ ), 1891 r_coord, 1892 tex_src_x)) 1893 return FALSE; 1894 } 1895 1896 if (dst.mask & TGSI_WRITEMASK_W) { 1897 struct src_register one = get_one_immediate(emit); 1898 1899 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 1900 writemask( dst2, TGSI_WRITEMASK_W ), 1901 one )) 1902 return FALSE; 1903 } 1904 } 1905 1906 if (saturate && !swizzle) { 1907 /* MOV_SAT real_dst, dst */ 1908 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) )) 1909 return FALSE; 1910 } 1911 else if (swizzle) { 1912 /* swizzle from tex_result to dst (handles saturation too, if any) */ 1913 emit_tex_swizzle(emit, 1914 dst, src(tex_result), 1915 emit->key.tex[unit].swizzle_r, 1916 emit->key.tex[unit].swizzle_g, 1917 emit->key.tex[unit].swizzle_b, 1918 emit->key.tex[unit].swizzle_a); 1919 } 1920 1921 return TRUE; 1922 } 1923 1924 1925 static boolean 1926 emit_bgnloop(struct svga_shader_emitter *emit, 1927 const struct tgsi_full_instruction *insn) 1928 { 1929 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP ); 1930 struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 ); 1931 struct src_register const_int = get_loop_const( emit ); 1932 1933 emit->dynamic_branching_level++; 1934 1935 return (emit_instruction( emit, inst ) && 1936 emit_src( emit, loop_reg ) && 1937 emit_src( emit, const_int ) ); 1938 } 1939 1940 1941 static boolean 1942 emit_endloop(struct svga_shader_emitter *emit, 1943 const struct tgsi_full_instruction *insn) 1944 { 1945 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP ); 1946 1947 emit->dynamic_branching_level--; 1948 1949 return emit_instruction( emit, inst ); 1950 } 1951 1952 1953 /** 1954 * Translate/emit TGSI BREAK (out of loop) instruction. 1955 */ 1956 static boolean 1957 emit_brk(struct svga_shader_emitter *emit, 1958 const struct tgsi_full_instruction *insn) 1959 { 1960 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK ); 1961 return emit_instruction( emit, inst ); 1962 } 1963 1964 1965 /** 1966 * Emit simple instruction which operates on one scalar value (not 1967 * a vector). Ex: LG2, RCP, RSQ. 1968 */ 1969 static boolean 1970 emit_scalar_op1(struct svga_shader_emitter *emit, 1971 unsigned opcode, 1972 const struct tgsi_full_instruction *insn) 1973 { 1974 SVGA3dShaderInstToken inst; 1975 SVGA3dShaderDestToken dst; 1976 struct src_register src; 1977 1978 inst = inst_token( opcode ); 1979 dst = translate_dst_register( emit, insn, 0 ); 1980 src = translate_src_register( emit, &insn->Src[0] ); 1981 src = scalar( src, TGSI_SWIZZLE_X ); 1982 1983 return submit_op1( emit, inst, dst, src ); 1984 } 1985 1986 1987 /** 1988 * Translate/emit a simple instruction (one which has no special-case 1989 * code) such as ADD, MUL, MIN, MAX. 1990 */ 1991 static boolean 1992 emit_simple_instruction(struct svga_shader_emitter *emit, 1993 unsigned opcode, 1994 const struct tgsi_full_instruction *insn) 1995 { 1996 const struct tgsi_full_src_register *src = insn->Src; 1997 SVGA3dShaderInstToken inst; 1998 SVGA3dShaderDestToken dst; 1999 2000 inst = inst_token( opcode ); 2001 dst = translate_dst_register( emit, insn, 0 ); 2002 2003 switch (insn->Instruction.NumSrcRegs) { 2004 case 0: 2005 return submit_op0( emit, inst, dst ); 2006 case 1: 2007 return submit_op1( emit, inst, dst, 2008 translate_src_register( emit, &src[0] )); 2009 case 2: 2010 return submit_op2( emit, inst, dst, 2011 translate_src_register( emit, &src[0] ), 2012 translate_src_register( emit, &src[1] ) ); 2013 case 3: 2014 return submit_op3( emit, inst, dst, 2015 translate_src_register( emit, &src[0] ), 2016 translate_src_register( emit, &src[1] ), 2017 translate_src_register( emit, &src[2] ) ); 2018 default: 2019 assert(0); 2020 return FALSE; 2021 } 2022 } 2023 2024 2025 /** 2026 * TGSI_OPCODE_MOVE is only special-cased here to detect the 2027 * svga_fragment_shader::constant_color_output case. 2028 */ 2029 static boolean 2030 emit_mov(struct svga_shader_emitter *emit, 2031 const struct tgsi_full_instruction *insn) 2032 { 2033 const struct tgsi_full_src_register *src = &insn->Src[0]; 2034 const struct tgsi_full_dst_register *dst = &insn->Dst[0]; 2035 2036 if (emit->unit == PIPE_SHADER_FRAGMENT && 2037 dst->Register.File == TGSI_FILE_OUTPUT && 2038 dst->Register.Index == 0 && 2039 src->Register.File == TGSI_FILE_CONSTANT && 2040 !src->Register.Indirect) { 2041 emit->constant_color_output = TRUE; 2042 } 2043 2044 return emit_simple_instruction(emit, SVGA3DOP_MOV, insn); 2045 } 2046 2047 2048 /** 2049 * Translate/emit TGSI DDX, DDY instructions. 2050 */ 2051 static boolean 2052 emit_deriv(struct svga_shader_emitter *emit, 2053 const struct tgsi_full_instruction *insn ) 2054 { 2055 if (emit->dynamic_branching_level > 0 && 2056 insn->Src[0].Register.File == TGSI_FILE_TEMPORARY) 2057 { 2058 SVGA3dShaderDestToken dst = 2059 translate_dst_register( emit, insn, 0 ); 2060 2061 /* Deriv opcodes not valid inside dynamic branching, workaround 2062 * by zeroing out the destination. 2063 */ 2064 if (!submit_op1(emit, 2065 inst_token( SVGA3DOP_MOV ), 2066 dst, 2067 get_zero_immediate(emit))) 2068 return FALSE; 2069 2070 return TRUE; 2071 } 2072 else { 2073 unsigned opcode; 2074 const struct tgsi_full_src_register *reg = &insn->Src[0]; 2075 SVGA3dShaderInstToken inst; 2076 SVGA3dShaderDestToken dst; 2077 struct src_register src0; 2078 2079 switch (insn->Instruction.Opcode) { 2080 case TGSI_OPCODE_DDX: 2081 opcode = SVGA3DOP_DSX; 2082 break; 2083 case TGSI_OPCODE_DDY: 2084 opcode = SVGA3DOP_DSY; 2085 break; 2086 default: 2087 return FALSE; 2088 } 2089 2090 inst = inst_token( opcode ); 2091 dst = translate_dst_register( emit, insn, 0 ); 2092 src0 = translate_src_register( emit, reg ); 2093 2094 /* We cannot use negate or abs on source to dsx/dsy instruction. 2095 */ 2096 if (reg->Register.Absolute || 2097 reg->Register.Negate) { 2098 SVGA3dShaderDestToken temp = get_temp( emit ); 2099 2100 if (!emit_repl( emit, temp, &src0 )) 2101 return FALSE; 2102 } 2103 2104 return submit_op1( emit, inst, dst, src0 ); 2105 } 2106 } 2107 2108 2109 /** 2110 * Translate/emit ARL (Address Register Load) instruction. Used to 2111 * move a value into the special 'address' register. Used to implement 2112 * indirect/variable indexing into arrays. 2113 */ 2114 static boolean 2115 emit_arl(struct svga_shader_emitter *emit, 2116 const struct tgsi_full_instruction *insn) 2117 { 2118 ++emit->current_arl; 2119 if (emit->unit == PIPE_SHADER_FRAGMENT) { 2120 /* MOVA not present in pixel shader instruction set. 2121 * Ignore this instruction altogether since it is 2122 * only used for loop counters -- and for that 2123 * we reference aL directly. 2124 */ 2125 return TRUE; 2126 } 2127 if (svga_arl_needs_adjustment( emit )) { 2128 return emit_fake_arl( emit, insn ); 2129 } else { 2130 /* no need to adjust, just emit straight arl */ 2131 return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn); 2132 } 2133 } 2134 2135 2136 static boolean 2137 emit_pow(struct svga_shader_emitter *emit, 2138 const struct tgsi_full_instruction *insn) 2139 { 2140 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2141 struct src_register src0 = translate_src_register( 2142 emit, &insn->Src[0] ); 2143 struct src_register src1 = translate_src_register( 2144 emit, &insn->Src[1] ); 2145 boolean need_tmp = FALSE; 2146 2147 /* POW can only output to a temporary */ 2148 if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY) 2149 need_tmp = TRUE; 2150 2151 /* POW src1 must not be the same register as dst */ 2152 if (alias_src_dst( src1, dst )) 2153 need_tmp = TRUE; 2154 2155 /* it's a scalar op */ 2156 src0 = scalar( src0, TGSI_SWIZZLE_X ); 2157 src1 = scalar( src1, TGSI_SWIZZLE_X ); 2158 2159 if (need_tmp) { 2160 SVGA3dShaderDestToken tmp = 2161 writemask(get_temp( emit ), TGSI_WRITEMASK_X ); 2162 2163 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1)) 2164 return FALSE; 2165 2166 return submit_op1(emit, inst_token( SVGA3DOP_MOV ), 2167 dst, scalar(src(tmp), 0) ); 2168 } 2169 else { 2170 return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1); 2171 } 2172 } 2173 2174 2175 /** 2176 * Emit a LRP (linear interpolation) instruction. 2177 */ 2178 static boolean 2179 submit_lrp(struct svga_shader_emitter *emit, 2180 SVGA3dShaderDestToken dst, 2181 struct src_register src0, 2182 struct src_register src1, 2183 struct src_register src2) 2184 { 2185 SVGA3dShaderDestToken tmp; 2186 boolean need_dst_tmp = FALSE; 2187 2188 /* The dst reg must be a temporary, and not be the same as src0 or src2 */ 2189 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP || 2190 alias_src_dst(src0, dst) || 2191 alias_src_dst(src2, dst)) 2192 need_dst_tmp = TRUE; 2193 2194 if (need_dst_tmp) { 2195 tmp = get_temp( emit ); 2196 tmp.mask = dst.mask; 2197 } 2198 else { 2199 tmp = dst; 2200 } 2201 2202 if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2)) 2203 return FALSE; 2204 2205 if (need_dst_tmp) { 2206 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp ))) 2207 return FALSE; 2208 } 2209 2210 return TRUE; 2211 } 2212 2213 2214 /** 2215 * Translate/emit LRP (Linear Interpolation) instruction. 2216 */ 2217 static boolean 2218 emit_lrp(struct svga_shader_emitter *emit, 2219 const struct tgsi_full_instruction *insn) 2220 { 2221 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2222 const struct src_register src0 = translate_src_register( 2223 emit, &insn->Src[0] ); 2224 const struct src_register src1 = translate_src_register( 2225 emit, &insn->Src[1] ); 2226 const struct src_register src2 = translate_src_register( 2227 emit, &insn->Src[2] ); 2228 2229 return submit_lrp(emit, dst, src0, src1, src2); 2230 } 2231 2232 /** 2233 * Translate/emit DST (Distance function) instruction. 2234 */ 2235 static boolean 2236 emit_dst_insn(struct svga_shader_emitter *emit, 2237 const struct tgsi_full_instruction *insn) 2238 { 2239 if (emit->unit == PIPE_SHADER_VERTEX) { 2240 /* SVGA/DX9 has a DST instruction, but only for vertex shaders: 2241 */ 2242 return emit_simple_instruction(emit, SVGA3DOP_DST, insn); 2243 } 2244 else { 2245 /* result[0] = 1 * 1; 2246 * result[1] = a[1] * b[1]; 2247 * result[2] = a[2] * 1; 2248 * result[3] = 1 * b[3]; 2249 */ 2250 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2251 SVGA3dShaderDestToken tmp; 2252 const struct src_register src0 = translate_src_register( 2253 emit, &insn->Src[0] ); 2254 const struct src_register src1 = translate_src_register( 2255 emit, &insn->Src[1] ); 2256 boolean need_tmp = FALSE; 2257 2258 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP || 2259 alias_src_dst(src0, dst) || 2260 alias_src_dst(src1, dst)) 2261 need_tmp = TRUE; 2262 2263 if (need_tmp) { 2264 tmp = get_temp( emit ); 2265 } 2266 else { 2267 tmp = dst; 2268 } 2269 2270 /* tmp.xw = 1.0 2271 */ 2272 if (tmp.mask & TGSI_WRITEMASK_XW) { 2273 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2274 writemask(tmp, TGSI_WRITEMASK_XW ), 2275 get_one_immediate(emit))) 2276 return FALSE; 2277 } 2278 2279 /* tmp.yz = src0 2280 */ 2281 if (tmp.mask & TGSI_WRITEMASK_YZ) { 2282 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2283 writemask(tmp, TGSI_WRITEMASK_YZ ), 2284 src0)) 2285 return FALSE; 2286 } 2287 2288 /* tmp.yw = tmp * src1 2289 */ 2290 if (tmp.mask & TGSI_WRITEMASK_YW) { 2291 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), 2292 writemask(tmp, TGSI_WRITEMASK_YW ), 2293 src(tmp), 2294 src1)) 2295 return FALSE; 2296 } 2297 2298 /* dst = tmp 2299 */ 2300 if (need_tmp) { 2301 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2302 dst, 2303 src(tmp))) 2304 return FALSE; 2305 } 2306 } 2307 2308 return TRUE; 2309 } 2310 2311 2312 static boolean 2313 emit_exp(struct svga_shader_emitter *emit, 2314 const struct tgsi_full_instruction *insn) 2315 { 2316 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2317 struct src_register src0 = 2318 translate_src_register( emit, &insn->Src[0] ); 2319 SVGA3dShaderDestToken fraction; 2320 2321 if (dst.mask & TGSI_WRITEMASK_Y) 2322 fraction = dst; 2323 else if (dst.mask & TGSI_WRITEMASK_X) 2324 fraction = get_temp( emit ); 2325 else 2326 fraction.value = 0; 2327 2328 /* If y is being written, fill it with src0 - floor(src0). 2329 */ 2330 if (dst.mask & TGSI_WRITEMASK_XY) { 2331 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), 2332 writemask( fraction, TGSI_WRITEMASK_Y ), 2333 src0 )) 2334 return FALSE; 2335 } 2336 2337 /* If x is being written, fill it with 2 ^ floor(src0). 2338 */ 2339 if (dst.mask & TGSI_WRITEMASK_X) { 2340 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), 2341 writemask( dst, TGSI_WRITEMASK_X ), 2342 src0, 2343 scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) ) 2344 return FALSE; 2345 2346 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ), 2347 writemask( dst, TGSI_WRITEMASK_X ), 2348 scalar( src( dst ), TGSI_SWIZZLE_X ) ) ) 2349 return FALSE; 2350 2351 if (!(dst.mask & TGSI_WRITEMASK_Y)) 2352 release_temp( emit, fraction ); 2353 } 2354 2355 /* If z is being written, fill it with 2 ^ src0 (partial precision). 2356 */ 2357 if (dst.mask & TGSI_WRITEMASK_Z) { 2358 if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ), 2359 writemask( dst, TGSI_WRITEMASK_Z ), 2360 src0 ) ) 2361 return FALSE; 2362 } 2363 2364 /* If w is being written, fill it with one. 2365 */ 2366 if (dst.mask & TGSI_WRITEMASK_W) { 2367 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2368 writemask(dst, TGSI_WRITEMASK_W), 2369 get_one_immediate(emit))) 2370 return FALSE; 2371 } 2372 2373 return TRUE; 2374 } 2375 2376 2377 /** 2378 * Translate/emit LIT (Lighting helper) instruction. 2379 */ 2380 static boolean 2381 emit_lit(struct svga_shader_emitter *emit, 2382 const struct tgsi_full_instruction *insn) 2383 { 2384 if (emit->unit == PIPE_SHADER_VERTEX) { 2385 /* SVGA/DX9 has a LIT instruction, but only for vertex shaders: 2386 */ 2387 return emit_simple_instruction(emit, SVGA3DOP_LIT, insn); 2388 } 2389 else { 2390 /* D3D vs. GL semantics can be fairly easily accomodated by 2391 * variations on this sequence. 2392 * 2393 * GL: 2394 * tmp.y = src.x 2395 * tmp.z = pow(src.y,src.w) 2396 * p0 = src0.xxxx > 0 2397 * result = zero.wxxw 2398 * (p0) result.yz = tmp 2399 * 2400 * D3D: 2401 * tmp.y = src.x 2402 * tmp.z = pow(src.y,src.w) 2403 * p0 = src0.xxyy > 0 2404 * result = zero.wxxw 2405 * (p0) result.yz = tmp 2406 * 2407 * Will implement the GL version for now. 2408 */ 2409 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2410 SVGA3dShaderDestToken tmp = get_temp( emit ); 2411 const struct src_register src0 = translate_src_register( 2412 emit, &insn->Src[0] ); 2413 2414 /* tmp = pow(src.y, src.w) 2415 */ 2416 if (dst.mask & TGSI_WRITEMASK_Z) { 2417 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), 2418 tmp, 2419 scalar(src0, 1), 2420 scalar(src0, 3))) 2421 return FALSE; 2422 } 2423 2424 /* tmp.y = src.x 2425 */ 2426 if (dst.mask & TGSI_WRITEMASK_Y) { 2427 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2428 writemask(tmp, TGSI_WRITEMASK_Y ), 2429 scalar(src0, 0))) 2430 return FALSE; 2431 } 2432 2433 /* Can't quite do this with emit conditional due to the extra 2434 * writemask on the predicated mov: 2435 */ 2436 { 2437 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 ); 2438 struct src_register predsrc; 2439 2440 /* D3D vs GL semantics: 2441 */ 2442 if (0) 2443 predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */ 2444 else 2445 predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */ 2446 2447 /* SETP src0.xxyy, GT, {0}.x */ 2448 if (!submit_op2( emit, 2449 inst_token_setp(SVGA3DOPCOMP_GT), 2450 pred_reg, 2451 predsrc, 2452 get_zero_immediate(emit))) 2453 return FALSE; 2454 2455 /* MOV dst, fail */ 2456 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, 2457 get_immediate(emit, 1.0f, 0.0f, 0.0f, 1.0f))) 2458 return FALSE; 2459 2460 /* MOV dst.yz, tmp (predicated) 2461 * 2462 * Note that the predicate reg (and possible modifiers) is passed 2463 * as the first source argument. 2464 */ 2465 if (dst.mask & TGSI_WRITEMASK_YZ) { 2466 if (!submit_op2( emit, 2467 inst_token_predicated(SVGA3DOP_MOV), 2468 writemask(dst, TGSI_WRITEMASK_YZ), 2469 src( pred_reg ), src( tmp ) )) 2470 return FALSE; 2471 } 2472 } 2473 } 2474 2475 return TRUE; 2476 } 2477 2478 2479 static boolean 2480 emit_ex2(struct svga_shader_emitter *emit, 2481 const struct tgsi_full_instruction *insn) 2482 { 2483 SVGA3dShaderInstToken inst; 2484 SVGA3dShaderDestToken dst; 2485 struct src_register src0; 2486 2487 inst = inst_token( SVGA3DOP_EXP ); 2488 dst = translate_dst_register( emit, insn, 0 ); 2489 src0 = translate_src_register( emit, &insn->Src[0] ); 2490 src0 = scalar( src0, TGSI_SWIZZLE_X ); 2491 2492 if (dst.mask != TGSI_WRITEMASK_XYZW) { 2493 SVGA3dShaderDestToken tmp = get_temp( emit ); 2494 2495 if (!submit_op1( emit, inst, tmp, src0 )) 2496 return FALSE; 2497 2498 return submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2499 dst, 2500 scalar( src( tmp ), TGSI_SWIZZLE_X ) ); 2501 } 2502 2503 return submit_op1( emit, inst, dst, src0 ); 2504 } 2505 2506 2507 static boolean 2508 emit_log(struct svga_shader_emitter *emit, 2509 const struct tgsi_full_instruction *insn) 2510 { 2511 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2512 struct src_register src0 = 2513 translate_src_register( emit, &insn->Src[0] ); 2514 SVGA3dShaderDestToken abs_tmp; 2515 struct src_register abs_src0; 2516 SVGA3dShaderDestToken log2_abs; 2517 2518 abs_tmp.value = 0; 2519 2520 if (dst.mask & TGSI_WRITEMASK_Z) 2521 log2_abs = dst; 2522 else if (dst.mask & TGSI_WRITEMASK_XY) 2523 log2_abs = get_temp( emit ); 2524 else 2525 log2_abs.value = 0; 2526 2527 /* If z is being written, fill it with log2( abs( src0 ) ). 2528 */ 2529 if (dst.mask & TGSI_WRITEMASK_XYZ) { 2530 if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS) 2531 abs_src0 = src0; 2532 else { 2533 abs_tmp = get_temp( emit ); 2534 2535 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2536 abs_tmp, 2537 src0 ) ) 2538 return FALSE; 2539 2540 abs_src0 = src( abs_tmp ); 2541 } 2542 2543 abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) ); 2544 2545 if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ), 2546 writemask( log2_abs, TGSI_WRITEMASK_Z ), 2547 abs_src0 ) ) 2548 return FALSE; 2549 } 2550 2551 if (dst.mask & TGSI_WRITEMASK_XY) { 2552 SVGA3dShaderDestToken floor_log2; 2553 2554 if (dst.mask & TGSI_WRITEMASK_X) 2555 floor_log2 = dst; 2556 else 2557 floor_log2 = get_temp( emit ); 2558 2559 /* If x is being written, fill it with floor( log2( abs( src0 ) ) ). 2560 */ 2561 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), 2562 writemask( floor_log2, TGSI_WRITEMASK_X ), 2563 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) ) 2564 return FALSE; 2565 2566 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), 2567 writemask( floor_log2, TGSI_WRITEMASK_X ), 2568 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ), 2569 negate( src( floor_log2 ) ) ) ) 2570 return FALSE; 2571 2572 /* If y is being written, fill it with 2573 * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ). 2574 */ 2575 if (dst.mask & TGSI_WRITEMASK_Y) { 2576 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ), 2577 writemask( dst, TGSI_WRITEMASK_Y ), 2578 negate( scalar( src( floor_log2 ), 2579 TGSI_SWIZZLE_X ) ) ) ) 2580 return FALSE; 2581 2582 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), 2583 writemask( dst, TGSI_WRITEMASK_Y ), 2584 src( dst ), 2585 abs_src0 ) ) 2586 return FALSE; 2587 } 2588 2589 if (!(dst.mask & TGSI_WRITEMASK_X)) 2590 release_temp( emit, floor_log2 ); 2591 2592 if (!(dst.mask & TGSI_WRITEMASK_Z)) 2593 release_temp( emit, log2_abs ); 2594 } 2595 2596 if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod && 2597 src0.base.srcMod != SVGA3DSRCMOD_ABS) 2598 release_temp( emit, abs_tmp ); 2599 2600 /* If w is being written, fill it with one. 2601 */ 2602 if (dst.mask & TGSI_WRITEMASK_W) { 2603 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2604 writemask(dst, TGSI_WRITEMASK_W), 2605 get_one_immediate(emit))) 2606 return FALSE; 2607 } 2608 2609 return TRUE; 2610 } 2611 2612 2613 /** 2614 * Translate TGSI TRUNC or ROUND instruction. 2615 * We need to truncate toward zero. Ex: trunc(-1.9) = -1 2616 * Different approaches are needed for VS versus PS. 2617 */ 2618 static boolean 2619 emit_trunc_round(struct svga_shader_emitter *emit, 2620 const struct tgsi_full_instruction *insn, 2621 boolean round) 2622 { 2623 SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0); 2624 const struct src_register src0 = 2625 translate_src_register(emit, &insn->Src[0] ); 2626 SVGA3dShaderDestToken t1 = get_temp(emit); 2627 2628 if (round) { 2629 SVGA3dShaderDestToken t0 = get_temp(emit); 2630 struct src_register half = get_half_immediate(emit); 2631 2632 /* t0 = abs(src0) + 0.5 */ 2633 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t0, 2634 absolute(src0), half)) 2635 return FALSE; 2636 2637 /* t1 = fract(t0) */ 2638 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, src(t0))) 2639 return FALSE; 2640 2641 /* t1 = t0 - t1 */ 2642 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, src(t0), 2643 negate(src(t1)))) 2644 return FALSE; 2645 } 2646 else { 2647 /* trunc */ 2648 2649 /* t1 = fract(abs(src0)) */ 2650 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, absolute(src0))) 2651 return FALSE; 2652 2653 /* t1 = abs(src0) - t1 */ 2654 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, absolute(src0), 2655 negate(src(t1)))) 2656 return FALSE; 2657 } 2658 2659 /* 2660 * Now we need to multiply t1 by the sign of the original value. 2661 */ 2662 if (emit->unit == PIPE_SHADER_VERTEX) { 2663 /* For VS: use SGN instruction */ 2664 /* Need two extra/dummy registers: */ 2665 SVGA3dShaderDestToken t2 = get_temp(emit), t3 = get_temp(emit), 2666 t4 = get_temp(emit); 2667 2668 /* t2 = sign(src0) */ 2669 if (!submit_op3(emit, inst_token(SVGA3DOP_SGN), t2, src0, 2670 src(t3), src(t4))) 2671 return FALSE; 2672 2673 /* dst = t1 * t2 */ 2674 if (!submit_op2(emit, inst_token(SVGA3DOP_MUL), dst, src(t1), src(t2))) 2675 return FALSE; 2676 } 2677 else { 2678 /* For FS: Use CMP instruction */ 2679 return submit_op3(emit, inst_token( SVGA3DOP_CMP ), dst, 2680 src0, src(t1), negate(src(t1))); 2681 } 2682 2683 return TRUE; 2684 } 2685 2686 2687 /** 2688 * Translate/emit "begin subroutine" instruction/marker/label. 2689 */ 2690 static boolean 2691 emit_bgnsub(struct svga_shader_emitter *emit, 2692 unsigned position, 2693 const struct tgsi_full_instruction *insn) 2694 { 2695 unsigned i; 2696 2697 /* Note that we've finished the main function and are now emitting 2698 * subroutines. This affects how we terminate the generated 2699 * shader. 2700 */ 2701 emit->in_main_func = FALSE; 2702 2703 for (i = 0; i < emit->nr_labels; i++) { 2704 if (emit->label[i] == position) { 2705 return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) && 2706 emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) && 2707 emit_src( emit, src_register( SVGA3DREG_LABEL, i ))); 2708 } 2709 } 2710 2711 assert(0); 2712 return TRUE; 2713 } 2714 2715 2716 /** 2717 * Translate/emit subroutine call instruction. 2718 */ 2719 static boolean 2720 emit_call(struct svga_shader_emitter *emit, 2721 const struct tgsi_full_instruction *insn) 2722 { 2723 unsigned position = insn->Label.Label; 2724 unsigned i; 2725 2726 for (i = 0; i < emit->nr_labels; i++) { 2727 if (emit->label[i] == position) 2728 break; 2729 } 2730 2731 if (emit->nr_labels == ARRAY_SIZE(emit->label)) 2732 return FALSE; 2733 2734 if (i == emit->nr_labels) { 2735 emit->label[i] = position; 2736 emit->nr_labels++; 2737 } 2738 2739 return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) && 2740 emit_src( emit, src_register( SVGA3DREG_LABEL, i ))); 2741 } 2742 2743 2744 /** 2745 * Called at the end of the shader. Actually, emit special "fix-up" 2746 * code for the vertex/fragment shader. 2747 */ 2748 static boolean 2749 emit_end(struct svga_shader_emitter *emit) 2750 { 2751 if (emit->unit == PIPE_SHADER_VERTEX) { 2752 return emit_vs_postamble( emit ); 2753 } 2754 else { 2755 return emit_ps_postamble( emit ); 2756 } 2757 } 2758 2759 2760 /** 2761 * Translate any TGSI instruction to SVGA. 2762 */ 2763 static boolean 2764 svga_emit_instruction(struct svga_shader_emitter *emit, 2765 unsigned position, 2766 const struct tgsi_full_instruction *insn) 2767 { 2768 switch (insn->Instruction.Opcode) { 2769 2770 case TGSI_OPCODE_ARL: 2771 return emit_arl( emit, insn ); 2772 2773 case TGSI_OPCODE_TEX: 2774 case TGSI_OPCODE_TXB: 2775 case TGSI_OPCODE_TXP: 2776 case TGSI_OPCODE_TXL: 2777 case TGSI_OPCODE_TXD: 2778 return emit_tex( emit, insn ); 2779 2780 case TGSI_OPCODE_DDX: 2781 case TGSI_OPCODE_DDY: 2782 return emit_deriv( emit, insn ); 2783 2784 case TGSI_OPCODE_BGNSUB: 2785 return emit_bgnsub( emit, position, insn ); 2786 2787 case TGSI_OPCODE_ENDSUB: 2788 return TRUE; 2789 2790 case TGSI_OPCODE_CAL: 2791 return emit_call( emit, insn ); 2792 2793 case TGSI_OPCODE_FLR: 2794 return emit_floor( emit, insn ); 2795 2796 case TGSI_OPCODE_TRUNC: 2797 return emit_trunc_round( emit, insn, FALSE ); 2798 2799 case TGSI_OPCODE_ROUND: 2800 return emit_trunc_round( emit, insn, TRUE ); 2801 2802 case TGSI_OPCODE_CEIL: 2803 return emit_ceil( emit, insn ); 2804 2805 case TGSI_OPCODE_CMP: 2806 return emit_cmp( emit, insn ); 2807 2808 case TGSI_OPCODE_DIV: 2809 return emit_div( emit, insn ); 2810 2811 case TGSI_OPCODE_DP2: 2812 return emit_dp2( emit, insn ); 2813 2814 case TGSI_OPCODE_COS: 2815 return emit_cos( emit, insn ); 2816 2817 case TGSI_OPCODE_SIN: 2818 return emit_sin( emit, insn ); 2819 2820 case TGSI_OPCODE_END: 2821 /* TGSI always finishes the main func with an END */ 2822 return emit_end( emit ); 2823 2824 case TGSI_OPCODE_KILL_IF: 2825 return emit_kill_if( emit, insn ); 2826 2827 /* Selection opcodes. The underlying language is fairly 2828 * non-orthogonal about these. 2829 */ 2830 case TGSI_OPCODE_SEQ: 2831 return emit_select_op( emit, PIPE_FUNC_EQUAL, insn ); 2832 2833 case TGSI_OPCODE_SNE: 2834 return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn ); 2835 2836 case TGSI_OPCODE_SGT: 2837 return emit_select_op( emit, PIPE_FUNC_GREATER, insn ); 2838 2839 case TGSI_OPCODE_SGE: 2840 return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn ); 2841 2842 case TGSI_OPCODE_SLT: 2843 return emit_select_op( emit, PIPE_FUNC_LESS, insn ); 2844 2845 case TGSI_OPCODE_SLE: 2846 return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn ); 2847 2848 case TGSI_OPCODE_POW: 2849 return emit_pow( emit, insn ); 2850 2851 case TGSI_OPCODE_EX2: 2852 return emit_ex2( emit, insn ); 2853 2854 case TGSI_OPCODE_EXP: 2855 return emit_exp( emit, insn ); 2856 2857 case TGSI_OPCODE_LOG: 2858 return emit_log( emit, insn ); 2859 2860 case TGSI_OPCODE_LG2: 2861 return emit_scalar_op1( emit, SVGA3DOP_LOG, insn ); 2862 2863 case TGSI_OPCODE_RSQ: 2864 return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn ); 2865 2866 case TGSI_OPCODE_RCP: 2867 return emit_scalar_op1( emit, SVGA3DOP_RCP, insn ); 2868 2869 case TGSI_OPCODE_CONT: 2870 /* not expected (we return PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED = 0) */ 2871 return FALSE; 2872 2873 case TGSI_OPCODE_RET: 2874 /* This is a noop -- we tell mesa that we can't support RET 2875 * within a function (early return), so this will always be 2876 * followed by an ENDSUB. 2877 */ 2878 return TRUE; 2879 2880 /* These aren't actually used by any of the frontends we care 2881 * about: 2882 */ 2883 case TGSI_OPCODE_AND: 2884 case TGSI_OPCODE_OR: 2885 case TGSI_OPCODE_I2F: 2886 case TGSI_OPCODE_NOT: 2887 case TGSI_OPCODE_SHL: 2888 case TGSI_OPCODE_ISHR: 2889 case TGSI_OPCODE_XOR: 2890 return FALSE; 2891 2892 case TGSI_OPCODE_IF: 2893 return emit_if( emit, insn ); 2894 case TGSI_OPCODE_ELSE: 2895 return emit_else( emit, insn ); 2896 case TGSI_OPCODE_ENDIF: 2897 return emit_endif( emit, insn ); 2898 2899 case TGSI_OPCODE_BGNLOOP: 2900 return emit_bgnloop( emit, insn ); 2901 case TGSI_OPCODE_ENDLOOP: 2902 return emit_endloop( emit, insn ); 2903 case TGSI_OPCODE_BRK: 2904 return emit_brk( emit, insn ); 2905 2906 case TGSI_OPCODE_KILL: 2907 return emit_kill( emit, insn ); 2908 2909 case TGSI_OPCODE_DST: 2910 return emit_dst_insn( emit, insn ); 2911 2912 case TGSI_OPCODE_LIT: 2913 return emit_lit( emit, insn ); 2914 2915 case TGSI_OPCODE_LRP: 2916 return emit_lrp( emit, insn ); 2917 2918 case TGSI_OPCODE_SSG: 2919 return emit_ssg( emit, insn ); 2920 2921 case TGSI_OPCODE_MOV: 2922 return emit_mov( emit, insn ); 2923 2924 default: 2925 { 2926 unsigned opcode = translate_opcode(insn->Instruction.Opcode); 2927 2928 if (opcode == SVGA3DOP_LAST_INST) 2929 return FALSE; 2930 2931 if (!emit_simple_instruction( emit, opcode, insn )) 2932 return FALSE; 2933 } 2934 } 2935 2936 return TRUE; 2937 } 2938 2939 2940 /** 2941 * Translate/emit a TGSI IMMEDIATE declaration. 2942 * An immediate vector is a constant that's hard-coded into the shader. 2943 */ 2944 static boolean 2945 svga_emit_immediate(struct svga_shader_emitter *emit, 2946 const struct tgsi_full_immediate *imm) 2947 { 2948 static const float id[4] = {0,0,0,1}; 2949 float value[4]; 2950 unsigned i; 2951 2952 assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5); 2953 for (i = 0; i < imm->Immediate.NrTokens - 1; i++) { 2954 float f = imm->u[i].Float; 2955 value[i] = util_is_inf_or_nan(f) ? 0.0f : f; 2956 } 2957 2958 /* If the immediate has less than four values, fill in the remaining 2959 * positions from id={0,0,0,1}. 2960 */ 2961 for ( ; i < 4; i++ ) 2962 value[i] = id[i]; 2963 2964 return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, 2965 emit->imm_start + emit->internal_imm_count++, 2966 value[0], value[1], value[2], value[3]); 2967 } 2968 2969 2970 static boolean 2971 make_immediate(struct svga_shader_emitter *emit, 2972 float a, float b, float c, float d, 2973 struct src_register *out ) 2974 { 2975 unsigned idx = emit->nr_hw_float_const++; 2976 2977 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, 2978 idx, a, b, c, d )) 2979 return FALSE; 2980 2981 *out = src_register( SVGA3DREG_CONST, idx ); 2982 2983 return TRUE; 2984 } 2985 2986 2987 /** 2988 * Emit special VS instructions at top of shader. 2989 */ 2990 static boolean 2991 emit_vs_preamble(struct svga_shader_emitter *emit) 2992 { 2993 if (!emit->key.vs.need_prescale) { 2994 if (!make_immediate( emit, 0, 0, .5, .5, 2995 &emit->imm_0055)) 2996 return FALSE; 2997 } 2998 2999 return TRUE; 3000 } 3001 3002 3003 /** 3004 * Emit special PS instructions at top of shader. 3005 */ 3006 static boolean 3007 emit_ps_preamble(struct svga_shader_emitter *emit) 3008 { 3009 if (emit->ps_reads_pos && emit->info.reads_z) { 3010 /* 3011 * Assemble the position from various bits of inputs. Depth and W are 3012 * passed in a texcoord this is due to D3D's vPos not hold Z or W. 3013 * Also fixup the perspective interpolation. 3014 * 3015 * temp_pos.xy = vPos.xy 3016 * temp_pos.w = rcp(texcoord1.w); 3017 * temp_pos.z = texcoord1.z * temp_pos.w; 3018 */ 3019 if (!submit_op1( emit, 3020 inst_token(SVGA3DOP_MOV), 3021 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_XY ), 3022 emit->ps_true_pos )) 3023 return FALSE; 3024 3025 if (!submit_op1( emit, 3026 inst_token(SVGA3DOP_RCP), 3027 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_W ), 3028 scalar( emit->ps_depth_pos, TGSI_SWIZZLE_W ) )) 3029 return FALSE; 3030 3031 if (!submit_op2( emit, 3032 inst_token(SVGA3DOP_MUL), 3033 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_Z ), 3034 scalar( emit->ps_depth_pos, TGSI_SWIZZLE_Z ), 3035 scalar( src(emit->ps_temp_pos), TGSI_SWIZZLE_W ) )) 3036 return FALSE; 3037 } 3038 3039 return TRUE; 3040 } 3041 3042 3043 /** 3044 * Emit special PS instructions at end of shader. 3045 */ 3046 static boolean 3047 emit_ps_postamble(struct svga_shader_emitter *emit) 3048 { 3049 unsigned i; 3050 3051 /* PS oDepth is incredibly fragile and it's very hard to catch the 3052 * types of usage that break it during shader emit. Easier just to 3053 * redirect the main program to a temporary and then only touch 3054 * oDepth with a hand-crafted MOV below. 3055 */ 3056 if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) { 3057 if (!submit_op1( emit, 3058 inst_token(SVGA3DOP_MOV), 3059 emit->true_pos, 3060 scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) )) 3061 return FALSE; 3062 } 3063 3064 for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { 3065 if (SVGA3dShaderGetRegType(emit->true_color_output[i].value) != 0) { 3066 /* Potentially override output colors with white for XOR 3067 * logicop workaround. 3068 */ 3069 if (emit->unit == PIPE_SHADER_FRAGMENT && 3070 emit->key.fs.white_fragments) { 3071 struct src_register one = get_one_immediate(emit); 3072 3073 if (!submit_op1( emit, 3074 inst_token(SVGA3DOP_MOV), 3075 emit->true_color_output[i], 3076 one )) 3077 return FALSE; 3078 } 3079 else if (emit->unit == PIPE_SHADER_FRAGMENT && 3080 i < emit->key.fs.write_color0_to_n_cbufs) { 3081 /* Write temp color output [0] to true output [i] */ 3082 if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), 3083 emit->true_color_output[i], 3084 src(emit->temp_color_output[0]))) { 3085 return FALSE; 3086 } 3087 } 3088 else { 3089 if (!submit_op1( emit, 3090 inst_token(SVGA3DOP_MOV), 3091 emit->true_color_output[i], 3092 src(emit->temp_color_output[i]) )) 3093 return FALSE; 3094 } 3095 } 3096 } 3097 3098 return TRUE; 3099 } 3100 3101 3102 /** 3103 * Emit special VS instructions at end of shader. 3104 */ 3105 static boolean 3106 emit_vs_postamble(struct svga_shader_emitter *emit) 3107 { 3108 /* PSIZ output is incredibly fragile and it's very hard to catch 3109 * the types of usage that break it during shader emit. Easier 3110 * just to redirect the main program to a temporary and then only 3111 * touch PSIZ with a hand-crafted MOV below. 3112 */ 3113 if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) { 3114 if (!submit_op1( emit, 3115 inst_token(SVGA3DOP_MOV), 3116 emit->true_psiz, 3117 scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) )) 3118 return FALSE; 3119 } 3120 3121 /* Need to perform various manipulations on vertex position to cope 3122 * with the different GL and D3D clip spaces. 3123 */ 3124 if (emit->key.vs.need_prescale) { 3125 SVGA3dShaderDestToken temp_pos = emit->temp_pos; 3126 SVGA3dShaderDestToken depth = emit->depth_pos; 3127 SVGA3dShaderDestToken pos = emit->true_pos; 3128 unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1; 3129 struct src_register prescale_scale = src_register( SVGA3DREG_CONST, 3130 offset + 0 ); 3131 struct src_register prescale_trans = src_register( SVGA3DREG_CONST, 3132 offset + 1 ); 3133 3134 if (!submit_op1( emit, 3135 inst_token(SVGA3DOP_MOV), 3136 writemask(depth, TGSI_WRITEMASK_W), 3137 scalar(src(temp_pos), TGSI_SWIZZLE_W) )) 3138 return FALSE; 3139 3140 /* MUL temp_pos.xyz, temp_pos, prescale.scale 3141 * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos 3142 * --> Note that prescale.trans.w == 0 3143 */ 3144 if (!submit_op2( emit, 3145 inst_token(SVGA3DOP_MUL), 3146 writemask(temp_pos, TGSI_WRITEMASK_XYZ), 3147 src(temp_pos), 3148 prescale_scale )) 3149 return FALSE; 3150 3151 if (!submit_op3( emit, 3152 inst_token(SVGA3DOP_MAD), 3153 pos, 3154 swizzle(src(temp_pos), 3, 3, 3, 3), 3155 prescale_trans, 3156 src(temp_pos))) 3157 return FALSE; 3158 3159 /* Also write to depth value */ 3160 if (!submit_op3( emit, 3161 inst_token(SVGA3DOP_MAD), 3162 writemask(depth, TGSI_WRITEMASK_Z), 3163 swizzle(src(temp_pos), 3, 3, 3, 3), 3164 prescale_trans, 3165 src(temp_pos) )) 3166 return FALSE; 3167 } 3168 else { 3169 SVGA3dShaderDestToken temp_pos = emit->temp_pos; 3170 SVGA3dShaderDestToken depth = emit->depth_pos; 3171 SVGA3dShaderDestToken pos = emit->true_pos; 3172 struct src_register imm_0055 = emit->imm_0055; 3173 3174 /* Adjust GL clipping coordinate space to hardware (D3D-style): 3175 * 3176 * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos 3177 * MOV result.position, temp_pos 3178 */ 3179 if (!submit_op2( emit, 3180 inst_token(SVGA3DOP_DP4), 3181 writemask(temp_pos, TGSI_WRITEMASK_Z), 3182 imm_0055, 3183 src(temp_pos) )) 3184 return FALSE; 3185 3186 if (!submit_op1( emit, 3187 inst_token(SVGA3DOP_MOV), 3188 pos, 3189 src(temp_pos) )) 3190 return FALSE; 3191 3192 /* Move the manipulated depth into the extra texcoord reg */ 3193 if (!submit_op1( emit, 3194 inst_token(SVGA3DOP_MOV), 3195 writemask(depth, TGSI_WRITEMASK_ZW), 3196 src(temp_pos) )) 3197 return FALSE; 3198 } 3199 3200 return TRUE; 3201 } 3202 3203 3204 /** 3205 * For the pixel shader: emit the code which chooses the front 3206 * or back face color depending on triangle orientation. 3207 * This happens at the top of the fragment shader. 3208 * 3209 * 0: IF VFACE :4 3210 * 1: COLOR = FrontColor; 3211 * 2: ELSE 3212 * 3: COLOR = BackColor; 3213 * 4: ENDIF 3214 */ 3215 static boolean 3216 emit_light_twoside(struct svga_shader_emitter *emit) 3217 { 3218 struct src_register vface, zero; 3219 struct src_register front[2]; 3220 struct src_register back[2]; 3221 SVGA3dShaderDestToken color[2]; 3222 int count = emit->internal_color_count; 3223 unsigned i; 3224 SVGA3dShaderInstToken if_token; 3225 3226 if (count == 0) 3227 return TRUE; 3228 3229 vface = get_vface( emit ); 3230 zero = get_zero_immediate(emit); 3231 3232 /* Can't use get_temp() to allocate the color reg as such 3233 * temporaries will be reclaimed after each instruction by the call 3234 * to reset_temp_regs(). 3235 */ 3236 for (i = 0; i < count; i++) { 3237 color[i] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ ); 3238 front[i] = emit->input_map[emit->internal_color_idx[i]]; 3239 3240 /* Back is always the next input: 3241 */ 3242 back[i] = front[i]; 3243 back[i].base.num = front[i].base.num + 1; 3244 3245 /* Reassign the input_map to the actual front-face color: 3246 */ 3247 emit->input_map[emit->internal_color_idx[i]] = src(color[i]); 3248 } 3249 3250 if_token = inst_token( SVGA3DOP_IFC ); 3251 3252 if (emit->key.fs.front_ccw) 3253 if_token.control = SVGA3DOPCOMP_LT; 3254 else 3255 if_token.control = SVGA3DOPCOMP_GT; 3256 3257 if (!(emit_instruction( emit, if_token ) && 3258 emit_src( emit, vface ) && 3259 emit_src( emit, zero ) )) 3260 return FALSE; 3261 3262 for (i = 0; i < count; i++) { 3263 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] )) 3264 return FALSE; 3265 } 3266 3267 if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE)))) 3268 return FALSE; 3269 3270 for (i = 0; i < count; i++) { 3271 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] )) 3272 return FALSE; 3273 } 3274 3275 if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) )) 3276 return FALSE; 3277 3278 return TRUE; 3279 } 3280 3281 3282 /** 3283 * Emit special setup code for the front/back face register in the FS. 3284 * 0: SETP_GT TEMP, VFACE, 0 3285 * where TEMP is a fake frontface register 3286 */ 3287 static boolean 3288 emit_frontface(struct svga_shader_emitter *emit) 3289 { 3290 struct src_register vface; 3291 SVGA3dShaderDestToken temp; 3292 struct src_register pass, fail; 3293 3294 vface = get_vface( emit ); 3295 3296 /* Can't use get_temp() to allocate the fake frontface reg as such 3297 * temporaries will be reclaimed after each instruction by the call 3298 * to reset_temp_regs(). 3299 */ 3300 temp = dst_register( SVGA3DREG_TEMP, 3301 emit->nr_hw_temp++ ); 3302 3303 if (emit->key.fs.front_ccw) { 3304 pass = get_zero_immediate(emit); 3305 fail = get_one_immediate(emit); 3306 } else { 3307 pass = get_one_immediate(emit); 3308 fail = get_zero_immediate(emit); 3309 } 3310 3311 if (!emit_conditional(emit, PIPE_FUNC_GREATER, 3312 temp, vface, get_zero_immediate(emit), 3313 pass, fail)) 3314 return FALSE; 3315 3316 /* Reassign the input_map to the actual front-face color: 3317 */ 3318 emit->input_map[emit->internal_frontface_idx] = src(temp); 3319 3320 return TRUE; 3321 } 3322 3323 3324 /** 3325 * Emit code to invert the T component of the incoming texture coordinate. 3326 * This is used for drawing point sprites when 3327 * pipe_rasterizer_state::sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT. 3328 */ 3329 static boolean 3330 emit_inverted_texcoords(struct svga_shader_emitter *emit) 3331 { 3332 unsigned inverted_texcoords = emit->inverted_texcoords; 3333 3334 while (inverted_texcoords) { 3335 const unsigned unit = ffs(inverted_texcoords) - 1; 3336 3337 assert(emit->inverted_texcoords & (1 << unit)); 3338 3339 assert(unit < ARRAY_SIZE(emit->ps_true_texcoord)); 3340 3341 assert(unit < ARRAY_SIZE(emit->ps_inverted_texcoord_input)); 3342 3343 assert(emit->ps_inverted_texcoord_input[unit] 3344 < ARRAY_SIZE(emit->input_map)); 3345 3346 /* inverted = coord * (1, -1, 1, 1) + (0, 1, 0, 0) */ 3347 if (!submit_op3(emit, 3348 inst_token(SVGA3DOP_MAD), 3349 dst(emit->ps_inverted_texcoord[unit]), 3350 emit->ps_true_texcoord[unit], 3351 get_immediate(emit, 1.0f, -1.0f, 1.0f, 1.0f), 3352 get_immediate(emit, 0.0f, 1.0f, 0.0f, 0.0f))) 3353 return FALSE; 3354 3355 /* Reassign the input_map entry to the new texcoord register */ 3356 emit->input_map[emit->ps_inverted_texcoord_input[unit]] = 3357 emit->ps_inverted_texcoord[unit]; 3358 3359 inverted_texcoords &= ~(1 << unit); 3360 } 3361 3362 return TRUE; 3363 } 3364 3365 3366 /** 3367 * Emit code to adjust vertex shader inputs/attributes: 3368 * - Change range from [0,1] to [-1,1] (for normalized byte/short attribs). 3369 * - Set attrib W component = 1. 3370 */ 3371 static boolean 3372 emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit) 3373 { 3374 unsigned adjust_mask = (emit->key.vs.adjust_attrib_range | 3375 emit->key.vs.adjust_attrib_w_1); 3376 3377 while (adjust_mask) { 3378 /* Adjust vertex attrib range and/or set W component = 1 */ 3379 const unsigned index = u_bit_scan(&adjust_mask); 3380 struct src_register tmp; 3381 3382 /* allocate a temp reg */ 3383 tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp); 3384 emit->nr_hw_temp++; 3385 3386 if (emit->key.vs.adjust_attrib_range & (1 << index)) { 3387 /* The vertex input/attribute is supposed to be a signed value in 3388 * the range [-1,1] but we actually fetched/converted it to the 3389 * range [0,1]. This most likely happens when the app specifies a 3390 * signed byte attribute but we interpreted it as unsigned bytes. 3391 * See also svga_translate_vertex_format(). 3392 * 3393 * Here, we emit some extra instructions to adjust 3394 * the attribute values from [0,1] to [-1,1]. 3395 * 3396 * The adjustment we implement is: 3397 * new_attrib = attrib * 2.0; 3398 * if (attrib >= 0.5) 3399 * new_attrib = new_attrib - 2.0; 3400 * This isn't exactly right (it's off by a bit or so) but close enough. 3401 */ 3402 SVGA3dShaderDestToken pred_reg = dst_register(SVGA3DREG_PREDICATE, 0); 3403 3404 /* tmp = attrib * 2.0 */ 3405 if (!submit_op2(emit, 3406 inst_token(SVGA3DOP_MUL), 3407 dst(tmp), 3408 emit->input_map[index], 3409 get_two_immediate(emit))) 3410 return FALSE; 3411 3412 /* pred = (attrib >= 0.5) */ 3413 if (!submit_op2(emit, 3414 inst_token_setp(SVGA3DOPCOMP_GE), 3415 pred_reg, 3416 emit->input_map[index], /* vert attrib */ 3417 get_half_immediate(emit))) /* 0.5 */ 3418 return FALSE; 3419 3420 /* sub(pred) tmp, tmp, 2.0 */ 3421 if (!submit_op3(emit, 3422 inst_token_predicated(SVGA3DOP_SUB), 3423 dst(tmp), 3424 src(pred_reg), 3425 tmp, 3426 get_two_immediate(emit))) 3427 return FALSE; 3428 } 3429 else { 3430 /* just copy the vertex input attrib to the temp register */ 3431 if (!submit_op1(emit, 3432 inst_token(SVGA3DOP_MOV), 3433 dst(tmp), 3434 emit->input_map[index])) 3435 return FALSE; 3436 } 3437 3438 if (emit->key.vs.adjust_attrib_w_1 & (1 << index)) { 3439 /* move 1 into W position of tmp */ 3440 if (!submit_op1(emit, 3441 inst_token(SVGA3DOP_MOV), 3442 writemask(dst(tmp), TGSI_WRITEMASK_W), 3443 get_one_immediate(emit))) 3444 return FALSE; 3445 } 3446 3447 /* Reassign the input_map entry to the new tmp register */ 3448 emit->input_map[index] = tmp; 3449 } 3450 3451 return TRUE; 3452 } 3453 3454 3455 /** 3456 * Determine if we need to create the "common" immediate value which is 3457 * used for generating useful vector constants such as {0,0,0,0} and 3458 * {1,1,1,1}. 3459 * We could just do this all the time except that we want to conserve 3460 * registers whenever possible. 3461 */ 3462 static boolean 3463 needs_to_create_common_immediate(const struct svga_shader_emitter *emit) 3464 { 3465 unsigned i; 3466 3467 if (emit->unit == PIPE_SHADER_FRAGMENT) { 3468 if (emit->key.fs.light_twoside) 3469 return TRUE; 3470 3471 if (emit->key.fs.white_fragments) 3472 return TRUE; 3473 3474 if (emit->emit_frontface) 3475 return TRUE; 3476 3477 if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 || 3478 emit->info.opcode_count[TGSI_OPCODE_SSG] >= 1 || 3479 emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1) 3480 return TRUE; 3481 3482 if (emit->inverted_texcoords) 3483 return TRUE; 3484 3485 /* look for any PIPE_SWIZZLE_0/ONE terms */ 3486 for (i = 0; i < emit->key.num_textures; i++) { 3487 if (emit->key.tex[i].swizzle_r > PIPE_SWIZZLE_W || 3488 emit->key.tex[i].swizzle_g > PIPE_SWIZZLE_W || 3489 emit->key.tex[i].swizzle_b > PIPE_SWIZZLE_W || 3490 emit->key.tex[i].swizzle_a > PIPE_SWIZZLE_W) 3491 return TRUE; 3492 } 3493 3494 for (i = 0; i < emit->key.num_textures; i++) { 3495 if (emit->key.tex[i].compare_mode 3496 == PIPE_TEX_COMPARE_R_TO_TEXTURE) 3497 return TRUE; 3498 } 3499 } 3500 else if (emit->unit == PIPE_SHADER_VERTEX) { 3501 if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1) 3502 return TRUE; 3503 if (emit->key.vs.adjust_attrib_range || 3504 emit->key.vs.adjust_attrib_w_1) 3505 return TRUE; 3506 } 3507 3508 if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 || 3509 emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 || 3510 emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 || 3511 emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 || 3512 emit->info.opcode_count[TGSI_OPCODE_ROUND] >= 1 || 3513 emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 || 3514 emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 || 3515 emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 || 3516 emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 || 3517 emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 || 3518 emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 || 3519 emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 || 3520 emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 || 3521 emit->info.opcode_count[TGSI_OPCODE_KILL] >= 1) 3522 return TRUE; 3523 3524 return FALSE; 3525 } 3526 3527 3528 /** 3529 * Do we need to create a looping constant? 3530 */ 3531 static boolean 3532 needs_to_create_loop_const(const struct svga_shader_emitter *emit) 3533 { 3534 return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1); 3535 } 3536 3537 3538 static boolean 3539 needs_to_create_arl_consts(const struct svga_shader_emitter *emit) 3540 { 3541 return (emit->num_arl_consts > 0); 3542 } 3543 3544 3545 static boolean 3546 pre_parse_add_indirect( struct svga_shader_emitter *emit, 3547 int num, int current_arl) 3548 { 3549 unsigned i; 3550 assert(num < 0); 3551 3552 for (i = 0; i < emit->num_arl_consts; ++i) { 3553 if (emit->arl_consts[i].arl_num == current_arl) 3554 break; 3555 } 3556 /* new entry */ 3557 if (emit->num_arl_consts == i) { 3558 ++emit->num_arl_consts; 3559 } 3560 emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ? 3561 num : 3562 emit->arl_consts[i].number; 3563 emit->arl_consts[i].arl_num = current_arl; 3564 return TRUE; 3565 } 3566 3567 3568 static boolean 3569 pre_parse_instruction( struct svga_shader_emitter *emit, 3570 const struct tgsi_full_instruction *insn, 3571 int current_arl) 3572 { 3573 if (insn->Src[0].Register.Indirect && 3574 insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) { 3575 const struct tgsi_full_src_register *reg = &insn->Src[0]; 3576 if (reg->Register.Index < 0) { 3577 pre_parse_add_indirect(emit, reg->Register.Index, current_arl); 3578 } 3579 } 3580 3581 if (insn->Src[1].Register.Indirect && 3582 insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) { 3583 const struct tgsi_full_src_register *reg = &insn->Src[1]; 3584 if (reg->Register.Index < 0) { 3585 pre_parse_add_indirect(emit, reg->Register.Index, current_arl); 3586 } 3587 } 3588 3589 if (insn->Src[2].Register.Indirect && 3590 insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) { 3591 const struct tgsi_full_src_register *reg = &insn->Src[2]; 3592 if (reg->Register.Index < 0) { 3593 pre_parse_add_indirect(emit, reg->Register.Index, current_arl); 3594 } 3595 } 3596 3597 return TRUE; 3598 } 3599 3600 3601 static boolean 3602 pre_parse_tokens( struct svga_shader_emitter *emit, 3603 const struct tgsi_token *tokens ) 3604 { 3605 struct tgsi_parse_context parse; 3606 int current_arl = 0; 3607 3608 tgsi_parse_init( &parse, tokens ); 3609 3610 while (!tgsi_parse_end_of_tokens( &parse )) { 3611 tgsi_parse_token( &parse ); 3612 switch (parse.FullToken.Token.Type) { 3613 case TGSI_TOKEN_TYPE_IMMEDIATE: 3614 case TGSI_TOKEN_TYPE_DECLARATION: 3615 break; 3616 case TGSI_TOKEN_TYPE_INSTRUCTION: 3617 if (parse.FullToken.FullInstruction.Instruction.Opcode == 3618 TGSI_OPCODE_ARL) { 3619 ++current_arl; 3620 } 3621 if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction, 3622 current_arl )) 3623 return FALSE; 3624 break; 3625 default: 3626 break; 3627 } 3628 3629 } 3630 return TRUE; 3631 } 3632 3633 3634 static boolean 3635 svga_shader_emit_helpers(struct svga_shader_emitter *emit) 3636 { 3637 if (needs_to_create_common_immediate( emit )) { 3638 create_common_immediate( emit ); 3639 } 3640 if (needs_to_create_loop_const( emit )) { 3641 create_loop_const( emit ); 3642 } 3643 if (needs_to_create_arl_consts( emit )) { 3644 create_arl_consts( emit ); 3645 } 3646 3647 if (emit->unit == PIPE_SHADER_FRAGMENT) { 3648 if (!svga_shader_emit_samplers_decl( emit )) 3649 return FALSE; 3650 3651 if (!emit_ps_preamble( emit )) 3652 return FALSE; 3653 3654 if (emit->key.fs.light_twoside) { 3655 if (!emit_light_twoside( emit )) 3656 return FALSE; 3657 } 3658 if (emit->emit_frontface) { 3659 if (!emit_frontface( emit )) 3660 return FALSE; 3661 } 3662 if (emit->inverted_texcoords) { 3663 if (!emit_inverted_texcoords( emit )) 3664 return FALSE; 3665 } 3666 } 3667 else { 3668 assert(emit->unit == PIPE_SHADER_VERTEX); 3669 if (emit->key.vs.adjust_attrib_range) { 3670 if (!emit_adjusted_vertex_attribs(emit) || 3671 emit->key.vs.adjust_attrib_w_1) { 3672 return FALSE; 3673 } 3674 } 3675 } 3676 3677 return TRUE; 3678 } 3679 3680 3681 /** 3682 * This is the main entrypoint into the TGSI instruction translater. 3683 * Translate TGSI shader tokens into an SVGA shader. 3684 */ 3685 boolean 3686 svga_shader_emit_instructions(struct svga_shader_emitter *emit, 3687 const struct tgsi_token *tokens) 3688 { 3689 struct tgsi_parse_context parse; 3690 const struct tgsi_token *new_tokens = NULL; 3691 boolean ret = TRUE; 3692 boolean helpers_emitted = FALSE; 3693 unsigned line_nr = 0; 3694 3695 if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.pstipple) { 3696 unsigned unit; 3697 3698 new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0, 3699 TGSI_FILE_INPUT); 3700 3701 if (new_tokens) { 3702 /* Setup texture state for stipple */ 3703 emit->sampler_target[unit] = TGSI_TEXTURE_2D; 3704 emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; 3705 emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y; 3706 emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; 3707 emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W; 3708 3709 emit->pstipple_sampler_unit = unit; 3710 3711 tokens = new_tokens; 3712 } 3713 } 3714 3715 tgsi_parse_init( &parse, tokens ); 3716 emit->internal_imm_count = 0; 3717 3718 if (emit->unit == PIPE_SHADER_VERTEX) { 3719 ret = emit_vs_preamble( emit ); 3720 if (!ret) 3721 goto done; 3722 } 3723 3724 pre_parse_tokens(emit, tokens); 3725 3726 while (!tgsi_parse_end_of_tokens( &parse )) { 3727 tgsi_parse_token( &parse ); 3728 3729 switch (parse.FullToken.Token.Type) { 3730 case TGSI_TOKEN_TYPE_IMMEDIATE: 3731 ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate ); 3732 if (!ret) 3733 goto done; 3734 break; 3735 3736 case TGSI_TOKEN_TYPE_DECLARATION: 3737 ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration ); 3738 if (!ret) 3739 goto done; 3740 break; 3741 3742 case TGSI_TOKEN_TYPE_INSTRUCTION: 3743 if (!helpers_emitted) { 3744 if (!svga_shader_emit_helpers( emit )) 3745 goto done; 3746 helpers_emitted = TRUE; 3747 } 3748 ret = svga_emit_instruction( emit, 3749 line_nr++, 3750 &parse.FullToken.FullInstruction ); 3751 if (!ret) 3752 goto done; 3753 break; 3754 default: 3755 break; 3756 } 3757 3758 reset_temp_regs( emit ); 3759 } 3760 3761 /* Need to terminate the current subroutine. Note that the 3762 * hardware doesn't tolerate shaders without sub-routines 3763 * terminating with RET+END. 3764 */ 3765 if (!emit->in_main_func) { 3766 ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) ); 3767 if (!ret) 3768 goto done; 3769 } 3770 3771 assert(emit->dynamic_branching_level == 0); 3772 3773 /* Need to terminate the whole shader: 3774 */ 3775 ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) ); 3776 if (!ret) 3777 goto done; 3778 3779 done: 3780 tgsi_parse_free( &parse ); 3781 if (new_tokens) { 3782 tgsi_free_tokens(new_tokens); 3783 } 3784 3785 return ret; 3786 } 3787