1 /********************************************************** 2 * Copyright 2008-2009 VMware, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **********************************************************/ 25 26 27 #include "pipe/p_shader_tokens.h" 28 #include "tgsi/tgsi_dump.h" 29 #include "tgsi/tgsi_parse.h" 30 #include "util/u_memory.h" 31 #include "util/u_math.h" 32 33 #include "svga_tgsi_emit.h" 34 #include "svga_context.h" 35 36 37 static boolean emit_vs_postamble( struct svga_shader_emitter *emit ); 38 static boolean emit_ps_postamble( struct svga_shader_emitter *emit ); 39 40 41 static unsigned 42 translate_opcode( 43 uint opcode ) 44 { 45 switch (opcode) { 46 case TGSI_OPCODE_ABS: return SVGA3DOP_ABS; 47 case TGSI_OPCODE_ADD: return SVGA3DOP_ADD; 48 case TGSI_OPCODE_BREAKC: return SVGA3DOP_BREAKC; 49 case TGSI_OPCODE_DP2A: return SVGA3DOP_DP2ADD; 50 case TGSI_OPCODE_DP3: return SVGA3DOP_DP3; 51 case TGSI_OPCODE_DP4: return SVGA3DOP_DP4; 52 case TGSI_OPCODE_FRC: return SVGA3DOP_FRC; 53 case TGSI_OPCODE_MAD: return SVGA3DOP_MAD; 54 case TGSI_OPCODE_MAX: return SVGA3DOP_MAX; 55 case TGSI_OPCODE_MIN: return SVGA3DOP_MIN; 56 case TGSI_OPCODE_MOV: return SVGA3DOP_MOV; 57 case TGSI_OPCODE_MUL: return SVGA3DOP_MUL; 58 case TGSI_OPCODE_NOP: return SVGA3DOP_NOP; 59 case TGSI_OPCODE_NRM4: return SVGA3DOP_NRM; 60 default: 61 debug_printf("Unkown opcode %u\n", opcode); 62 assert( 0 ); 63 return SVGA3DOP_LAST_INST; 64 } 65 } 66 67 68 static unsigned translate_file( unsigned file ) 69 { 70 switch (file) { 71 case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP; 72 case TGSI_FILE_INPUT: return SVGA3DREG_INPUT; 73 case TGSI_FILE_OUTPUT: return SVGA3DREG_OUTPUT; /* VS3.0+ only */ 74 case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST; 75 case TGSI_FILE_CONSTANT: return SVGA3DREG_CONST; 76 case TGSI_FILE_SAMPLER: return SVGA3DREG_SAMPLER; 77 case TGSI_FILE_ADDRESS: return SVGA3DREG_ADDR; 78 default: 79 assert( 0 ); 80 return SVGA3DREG_TEMP; 81 } 82 } 83 84 85 static SVGA3dShaderDestToken 86 translate_dst_register( struct svga_shader_emitter *emit, 87 const struct tgsi_full_instruction *insn, 88 unsigned idx ) 89 { 90 const struct tgsi_full_dst_register *reg = &insn->Dst[idx]; 91 SVGA3dShaderDestToken dest; 92 93 switch (reg->Register.File) { 94 case TGSI_FILE_OUTPUT: 95 /* Output registers encode semantic information in their name. 96 * Need to lookup a table built at decl time: 97 */ 98 dest = emit->output_map[reg->Register.Index]; 99 break; 100 101 default: 102 { 103 unsigned index = reg->Register.Index; 104 assert(index < SVGA3D_TEMPREG_MAX); 105 index = MIN2(index, SVGA3D_TEMPREG_MAX - 1); 106 dest = dst_register(translate_file(reg->Register.File), index); 107 } 108 break; 109 } 110 111 dest.mask = reg->Register.WriteMask; 112 assert(dest.mask); 113 114 if (insn->Instruction.Saturate) 115 dest.dstMod = SVGA3DDSTMOD_SATURATE; 116 117 return dest; 118 } 119 120 121 static struct src_register 122 swizzle( struct src_register src, 123 int x, 124 int y, 125 int z, 126 int w ) 127 { 128 x = (src.base.swizzle >> (x * 2)) & 0x3; 129 y = (src.base.swizzle >> (y * 2)) & 0x3; 130 z = (src.base.swizzle >> (z * 2)) & 0x3; 131 w = (src.base.swizzle >> (w * 2)) & 0x3; 132 133 src.base.swizzle = TRANSLATE_SWIZZLE(x,y,z,w); 134 135 return src; 136 } 137 138 static struct src_register 139 scalar( struct src_register src, 140 int comp ) 141 { 142 return swizzle( src, comp, comp, comp, comp ); 143 } 144 145 static INLINE boolean 146 svga_arl_needs_adjustment( const struct svga_shader_emitter *emit ) 147 { 148 int i; 149 150 for (i = 0; i < emit->num_arl_consts; ++i) { 151 if (emit->arl_consts[i].arl_num == emit->current_arl) 152 return TRUE; 153 } 154 return FALSE; 155 } 156 157 static INLINE int 158 svga_arl_adjustment( const struct svga_shader_emitter *emit ) 159 { 160 int i; 161 162 for (i = 0; i < emit->num_arl_consts; ++i) { 163 if (emit->arl_consts[i].arl_num == emit->current_arl) 164 return emit->arl_consts[i].number; 165 } 166 return 0; 167 } 168 169 static struct src_register 170 translate_src_register( const struct svga_shader_emitter *emit, 171 const struct tgsi_full_src_register *reg ) 172 { 173 struct src_register src; 174 175 switch (reg->Register.File) { 176 case TGSI_FILE_INPUT: 177 /* Input registers are referred to by their semantic name rather 178 * than by index. Use the mapping build up from the decls: 179 */ 180 src = emit->input_map[reg->Register.Index]; 181 break; 182 183 case TGSI_FILE_IMMEDIATE: 184 /* Immediates are appended after TGSI constants in the D3D 185 * constant buffer. 186 */ 187 src = src_register( translate_file( reg->Register.File ), 188 reg->Register.Index + emit->imm_start ); 189 break; 190 191 default: 192 src = src_register( translate_file( reg->Register.File ), 193 reg->Register.Index ); 194 195 break; 196 } 197 198 /* Indirect addressing. 199 */ 200 if (reg->Register.Indirect) { 201 if (emit->unit == PIPE_SHADER_FRAGMENT) { 202 /* Pixel shaders have only loop registers for relative 203 * addressing into inputs. Ignore the redundant address 204 * register, the contents of aL should be in sync with it. 205 */ 206 if (reg->Register.File == TGSI_FILE_INPUT) { 207 src.base.relAddr = 1; 208 src.indirect = src_token(SVGA3DREG_LOOP, 0); 209 } 210 } 211 else { 212 /* Constant buffers only. 213 */ 214 if (reg->Register.File == TGSI_FILE_CONSTANT) { 215 /* we shift the offset towards the minimum */ 216 if (svga_arl_needs_adjustment( emit )) { 217 src.base.num -= svga_arl_adjustment( emit ); 218 } 219 src.base.relAddr = 1; 220 221 /* Not really sure what should go in the second token: 222 */ 223 src.indirect = src_token( SVGA3DREG_ADDR, 224 reg->Indirect.Index ); 225 226 src.indirect.swizzle = SWIZZLE_XXXX; 227 } 228 } 229 } 230 231 src = swizzle( src, 232 reg->Register.SwizzleX, 233 reg->Register.SwizzleY, 234 reg->Register.SwizzleZ, 235 reg->Register.SwizzleW ); 236 237 /* src.mod isn't a bitfield, unfortunately: 238 * See tgsi_util_get_full_src_register_sign_mode for implementation details. 239 */ 240 if (reg->Register.Absolute) { 241 if (reg->Register.Negate) 242 src.base.srcMod = SVGA3DSRCMOD_ABSNEG; 243 else 244 src.base.srcMod = SVGA3DSRCMOD_ABS; 245 } 246 else { 247 if (reg->Register.Negate) 248 src.base.srcMod = SVGA3DSRCMOD_NEG; 249 else 250 src.base.srcMod = SVGA3DSRCMOD_NONE; 251 } 252 253 return src; 254 } 255 256 257 /* 258 * Get a temporary register. 259 * Note: if we exceed the temporary register limit we just use 260 * register SVGA3D_TEMPREG_MAX - 1. 261 */ 262 static INLINE SVGA3dShaderDestToken 263 get_temp( struct svga_shader_emitter *emit ) 264 { 265 int i = emit->nr_hw_temp + emit->internal_temp_count++; 266 assert(i < SVGA3D_TEMPREG_MAX); 267 i = MIN2(i, SVGA3D_TEMPREG_MAX - 1); 268 return dst_register( SVGA3DREG_TEMP, i ); 269 } 270 271 /* Release a single temp. Currently only effective if it was the last 272 * allocated temp, otherwise release will be delayed until the next 273 * call to reset_temp_regs(). 274 */ 275 static INLINE void 276 release_temp( struct svga_shader_emitter *emit, 277 SVGA3dShaderDestToken temp ) 278 { 279 if (temp.num == emit->internal_temp_count - 1) 280 emit->internal_temp_count--; 281 } 282 283 static void reset_temp_regs( struct svga_shader_emitter *emit ) 284 { 285 emit->internal_temp_count = 0; 286 } 287 288 289 /* Replace the src with the temporary specified in the dst, but copying 290 * only the necessary channels, and preserving the original swizzle (which is 291 * important given that several opcodes have constraints in the allowed 292 * swizzles). 293 */ 294 static boolean emit_repl( struct svga_shader_emitter *emit, 295 SVGA3dShaderDestToken dst, 296 struct src_register *src0) 297 { 298 unsigned src0_swizzle; 299 unsigned chan; 300 301 assert(SVGA3dShaderGetRegType(dst.value) == SVGA3DREG_TEMP); 302 303 src0_swizzle = src0->base.swizzle; 304 305 dst.mask = 0; 306 for (chan = 0; chan < 4; ++chan) { 307 unsigned swizzle = (src0_swizzle >> (chan *2)) & 0x3; 308 dst.mask |= 1 << swizzle; 309 } 310 assert(dst.mask); 311 312 src0->base.swizzle = SVGA3DSWIZZLE_NONE; 313 314 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, *src0 )) 315 return FALSE; 316 317 *src0 = src( dst ); 318 src0->base.swizzle = src0_swizzle; 319 320 return TRUE; 321 } 322 323 324 static boolean submit_op0( struct svga_shader_emitter *emit, 325 SVGA3dShaderInstToken inst, 326 SVGA3dShaderDestToken dest ) 327 { 328 return (emit_instruction( emit, inst ) && 329 emit_dst( emit, dest )); 330 } 331 332 static boolean submit_op1( struct svga_shader_emitter *emit, 333 SVGA3dShaderInstToken inst, 334 SVGA3dShaderDestToken dest, 335 struct src_register src0 ) 336 { 337 return emit_op1( emit, inst, dest, src0 ); 338 } 339 340 341 /* SVGA shaders may not refer to >1 constant register in a single 342 * instruction. This function checks for that usage and inserts a 343 * move to temporary if detected. 344 * 345 * The same applies to input registers -- at most a single input 346 * register may be read by any instruction. 347 */ 348 static boolean submit_op2( struct svga_shader_emitter *emit, 349 SVGA3dShaderInstToken inst, 350 SVGA3dShaderDestToken dest, 351 struct src_register src0, 352 struct src_register src1 ) 353 { 354 SVGA3dShaderDestToken temp; 355 SVGA3dShaderRegType type0, type1; 356 boolean need_temp = FALSE; 357 358 temp.value = 0; 359 type0 = SVGA3dShaderGetRegType( src0.base.value ); 360 type1 = SVGA3dShaderGetRegType( src1.base.value ); 361 362 if (type0 == SVGA3DREG_CONST && 363 type1 == SVGA3DREG_CONST && 364 src0.base.num != src1.base.num) 365 need_temp = TRUE; 366 367 if (type0 == SVGA3DREG_INPUT && 368 type1 == SVGA3DREG_INPUT && 369 src0.base.num != src1.base.num) 370 need_temp = TRUE; 371 372 if (need_temp) { 373 temp = get_temp( emit ); 374 375 if (!emit_repl( emit, temp, &src0 )) 376 return FALSE; 377 } 378 379 if (!emit_op2( emit, inst, dest, src0, src1 )) 380 return FALSE; 381 382 if (need_temp) 383 release_temp( emit, temp ); 384 385 return TRUE; 386 } 387 388 389 /* SVGA shaders may not refer to >1 constant register in a single 390 * instruction. This function checks for that usage and inserts a 391 * move to temporary if detected. 392 */ 393 static boolean submit_op3( struct svga_shader_emitter *emit, 394 SVGA3dShaderInstToken inst, 395 SVGA3dShaderDestToken dest, 396 struct src_register src0, 397 struct src_register src1, 398 struct src_register src2 ) 399 { 400 SVGA3dShaderDestToken temp0; 401 SVGA3dShaderDestToken temp1; 402 boolean need_temp0 = FALSE; 403 boolean need_temp1 = FALSE; 404 SVGA3dShaderRegType type0, type1, type2; 405 406 temp0.value = 0; 407 temp1.value = 0; 408 type0 = SVGA3dShaderGetRegType( src0.base.value ); 409 type1 = SVGA3dShaderGetRegType( src1.base.value ); 410 type2 = SVGA3dShaderGetRegType( src2.base.value ); 411 412 if (inst.op != SVGA3DOP_SINCOS) { 413 if (type0 == SVGA3DREG_CONST && 414 ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) || 415 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num))) 416 need_temp0 = TRUE; 417 418 if (type1 == SVGA3DREG_CONST && 419 (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num)) 420 need_temp1 = TRUE; 421 } 422 423 if (type0 == SVGA3DREG_INPUT && 424 ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) || 425 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num))) 426 need_temp0 = TRUE; 427 428 if (type1 == SVGA3DREG_INPUT && 429 (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num)) 430 need_temp1 = TRUE; 431 432 if (need_temp0) { 433 temp0 = get_temp( emit ); 434 435 if (!emit_repl( emit, temp0, &src0 )) 436 return FALSE; 437 } 438 439 if (need_temp1) { 440 temp1 = get_temp( emit ); 441 442 if (!emit_repl( emit, temp1, &src1 )) 443 return FALSE; 444 } 445 446 if (!emit_op3( emit, inst, dest, src0, src1, src2 )) 447 return FALSE; 448 449 if (need_temp1) 450 release_temp( emit, temp1 ); 451 if (need_temp0) 452 release_temp( emit, temp0 ); 453 return TRUE; 454 } 455 456 457 458 459 /* SVGA shaders may not refer to >1 constant register in a single 460 * instruction. This function checks for that usage and inserts a 461 * move to temporary if detected. 462 */ 463 static boolean submit_op4( struct svga_shader_emitter *emit, 464 SVGA3dShaderInstToken inst, 465 SVGA3dShaderDestToken dest, 466 struct src_register src0, 467 struct src_register src1, 468 struct src_register src2, 469 struct src_register src3) 470 { 471 SVGA3dShaderDestToken temp0; 472 SVGA3dShaderDestToken temp3; 473 boolean need_temp0 = FALSE; 474 boolean need_temp3 = FALSE; 475 SVGA3dShaderRegType type0, type1, type2, type3; 476 477 temp0.value = 0; 478 temp3.value = 0; 479 type0 = SVGA3dShaderGetRegType( src0.base.value ); 480 type1 = SVGA3dShaderGetRegType( src1.base.value ); 481 type2 = SVGA3dShaderGetRegType( src2.base.value ); 482 type3 = SVGA3dShaderGetRegType( src2.base.value ); 483 484 /* Make life a little easier - this is only used by the TXD 485 * instruction which is guaranteed not to have a constant/input reg 486 * in one slot at least: 487 */ 488 assert(type1 == SVGA3DREG_SAMPLER); 489 490 if (type0 == SVGA3DREG_CONST && 491 ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) || 492 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num))) 493 need_temp0 = TRUE; 494 495 if (type3 == SVGA3DREG_CONST && 496 (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num)) 497 need_temp3 = TRUE; 498 499 if (type0 == SVGA3DREG_INPUT && 500 ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) || 501 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num))) 502 need_temp0 = TRUE; 503 504 if (type3 == SVGA3DREG_INPUT && 505 (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num)) 506 need_temp3 = TRUE; 507 508 if (need_temp0) { 509 temp0 = get_temp( emit ); 510 511 if (!emit_repl( emit, temp0, &src0 )) 512 return FALSE; 513 } 514 515 if (need_temp3) { 516 temp3 = get_temp( emit ); 517 518 if (!emit_repl( emit, temp3, &src3 )) 519 return FALSE; 520 } 521 522 if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 )) 523 return FALSE; 524 525 if (need_temp3) 526 release_temp( emit, temp3 ); 527 if (need_temp0) 528 release_temp( emit, temp0 ); 529 return TRUE; 530 } 531 532 533 static boolean alias_src_dst( struct src_register src, 534 SVGA3dShaderDestToken dst ) 535 { 536 if (src.base.num != dst.num) 537 return FALSE; 538 539 if (SVGA3dShaderGetRegType(dst.value) != 540 SVGA3dShaderGetRegType(src.base.value)) 541 return FALSE; 542 543 return TRUE; 544 } 545 546 547 static boolean submit_lrp(struct svga_shader_emitter *emit, 548 SVGA3dShaderDestToken dst, 549 struct src_register src0, 550 struct src_register src1, 551 struct src_register src2) 552 { 553 SVGA3dShaderDestToken tmp; 554 boolean need_dst_tmp = FALSE; 555 556 /* The dst reg must be a temporary, and not be the same as src0 or src2 */ 557 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP || 558 alias_src_dst(src0, dst) || 559 alias_src_dst(src2, dst)) 560 need_dst_tmp = TRUE; 561 562 if (need_dst_tmp) { 563 tmp = get_temp( emit ); 564 tmp.mask = dst.mask; 565 } 566 else { 567 tmp = dst; 568 } 569 570 if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2)) 571 return FALSE; 572 573 if (need_dst_tmp) { 574 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp ))) 575 return FALSE; 576 } 577 578 return TRUE; 579 } 580 581 582 static boolean emit_def_const( struct svga_shader_emitter *emit, 583 SVGA3dShaderConstType type, 584 unsigned idx, 585 float a, 586 float b, 587 float c, 588 float d ) 589 { 590 SVGA3DOpDefArgs def; 591 SVGA3dShaderInstToken opcode; 592 593 switch (type) { 594 case SVGA3D_CONST_TYPE_FLOAT: 595 opcode = inst_token( SVGA3DOP_DEF ); 596 def.dst = dst_register( SVGA3DREG_CONST, idx ); 597 def.constValues[0] = a; 598 def.constValues[1] = b; 599 def.constValues[2] = c; 600 def.constValues[3] = d; 601 break; 602 case SVGA3D_CONST_TYPE_INT: 603 opcode = inst_token( SVGA3DOP_DEFI ); 604 def.dst = dst_register( SVGA3DREG_CONSTINT, idx ); 605 def.constIValues[0] = (int)a; 606 def.constIValues[1] = (int)b; 607 def.constIValues[2] = (int)c; 608 def.constIValues[3] = (int)d; 609 break; 610 default: 611 assert(0); 612 opcode = inst_token( SVGA3DOP_NOP ); 613 break; 614 } 615 616 if (!emit_instruction(emit, opcode) || 617 !svga_shader_emit_dwords( emit, def.values, Elements(def.values))) 618 return FALSE; 619 620 return TRUE; 621 } 622 623 static INLINE boolean 624 create_zero_immediate( struct svga_shader_emitter *emit ) 625 { 626 unsigned idx = emit->nr_hw_float_const++; 627 628 /* Emit the constant (0, 0.5, -1, 1) and use swizzling to generate 629 * other useful vectors. 630 */ 631 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, 632 idx, 0, 0.5, -1, 1 )) 633 return FALSE; 634 635 emit->zero_immediate_idx = idx; 636 emit->created_zero_immediate = TRUE; 637 638 return TRUE; 639 } 640 641 static INLINE boolean 642 create_loop_const( struct svga_shader_emitter *emit ) 643 { 644 unsigned idx = emit->nr_hw_int_const++; 645 646 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx, 647 255, /* iteration count */ 648 0, /* initial value */ 649 1, /* step size */ 650 0 /* not used, must be 0 */)) 651 return FALSE; 652 653 emit->loop_const_idx = idx; 654 emit->created_loop_const = TRUE; 655 656 return TRUE; 657 } 658 659 static INLINE boolean 660 create_arl_consts( struct svga_shader_emitter *emit ) 661 { 662 int i; 663 664 for (i = 0; i < emit->num_arl_consts; i += 4) { 665 int j; 666 unsigned idx = emit->nr_hw_float_const++; 667 float vals[4]; 668 for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) { 669 vals[j] = emit->arl_consts[i + j].number; 670 emit->arl_consts[i + j].idx = idx; 671 switch (j) { 672 case 0: 673 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X; 674 break; 675 case 1: 676 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y; 677 break; 678 case 2: 679 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z; 680 break; 681 case 3: 682 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W; 683 break; 684 } 685 } 686 while (j < 4) 687 vals[j++] = 0; 688 689 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, 690 vals[0], vals[1], 691 vals[2], vals[3])) 692 return FALSE; 693 } 694 695 return TRUE; 696 } 697 698 static INLINE struct src_register 699 get_vface( struct svga_shader_emitter *emit ) 700 { 701 assert(emit->emitted_vface); 702 return src_register(SVGA3DREG_MISCTYPE, SVGA3DMISCREG_FACE); 703 } 704 705 /* returns {0, 0, 0, 1} immediate */ 706 static INLINE struct src_register 707 get_zero_immediate( struct svga_shader_emitter *emit ) 708 { 709 assert(emit->created_zero_immediate); 710 assert(emit->zero_immediate_idx >= 0); 711 return swizzle(src_register( SVGA3DREG_CONST, 712 emit->zero_immediate_idx), 713 0, 0, 0, 3); 714 } 715 716 /* returns {1, 1, 1, -1} immediate */ 717 static INLINE struct src_register 718 get_pos_neg_one_immediate( struct svga_shader_emitter *emit ) 719 { 720 assert(emit->created_zero_immediate); 721 assert(emit->zero_immediate_idx >= 0); 722 return swizzle(src_register( SVGA3DREG_CONST, 723 emit->zero_immediate_idx), 724 3, 3, 3, 2); 725 } 726 727 /* returns {0.5, 0.5, 0.5, 0.5} immediate */ 728 static INLINE struct src_register 729 get_half_immediate( struct svga_shader_emitter *emit ) 730 { 731 assert(emit->created_zero_immediate); 732 assert(emit->zero_immediate_idx >= 0); 733 return swizzle(src_register(SVGA3DREG_CONST, emit->zero_immediate_idx), 734 1, 1, 1, 1); 735 } 736 737 /* returns the loop const */ 738 static INLINE struct src_register 739 get_loop_const( struct svga_shader_emitter *emit ) 740 { 741 assert(emit->created_loop_const); 742 assert(emit->loop_const_idx >= 0); 743 return src_register( SVGA3DREG_CONSTINT, 744 emit->loop_const_idx ); 745 } 746 747 static INLINE struct src_register 748 get_fake_arl_const( struct svga_shader_emitter *emit ) 749 { 750 struct src_register reg; 751 int idx = 0, swizzle = 0, i; 752 753 for (i = 0; i < emit->num_arl_consts; ++ i) { 754 if (emit->arl_consts[i].arl_num == emit->current_arl) { 755 idx = emit->arl_consts[i].idx; 756 swizzle = emit->arl_consts[i].swizzle; 757 } 758 } 759 760 reg = src_register( SVGA3DREG_CONST, idx ); 761 return scalar(reg, swizzle); 762 } 763 764 static INLINE struct src_register 765 get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num ) 766 { 767 int idx; 768 struct src_register reg; 769 770 /* the width/height indexes start right after constants */ 771 idx = emit->key.fkey.tex[sampler_num].width_height_idx + 772 emit->info.file_max[TGSI_FILE_CONSTANT] + 1; 773 774 reg = src_register( SVGA3DREG_CONST, idx ); 775 return reg; 776 } 777 778 static boolean emit_fake_arl(struct svga_shader_emitter *emit, 779 const struct tgsi_full_instruction *insn) 780 { 781 const struct src_register src0 = translate_src_register( 782 emit, &insn->Src[0] ); 783 struct src_register src1 = get_fake_arl_const( emit ); 784 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 785 SVGA3dShaderDestToken tmp = get_temp( emit ); 786 787 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0)) 788 return FALSE; 789 790 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ), 791 src1)) 792 return FALSE; 793 794 /* replicate the original swizzle */ 795 src1 = src(tmp); 796 src1.base.swizzle = src0.base.swizzle; 797 798 return submit_op1( emit, inst_token( SVGA3DOP_MOVA ), 799 dst, src1 ); 800 } 801 802 static boolean emit_if(struct svga_shader_emitter *emit, 803 const struct tgsi_full_instruction *insn) 804 { 805 struct src_register src0 = translate_src_register( 806 emit, &insn->Src[0] ); 807 struct src_register zero = get_zero_immediate( emit ); 808 SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC ); 809 810 if_token.control = SVGA3DOPCOMPC_NE; 811 zero = scalar(zero, TGSI_SWIZZLE_X); 812 813 if (SVGA3dShaderGetRegType(src0.base.value) == SVGA3DREG_CONST) { 814 /* 815 * Max different constant registers readable per IFC instruction is 1. 816 */ 817 SVGA3dShaderDestToken tmp = get_temp( emit ); 818 819 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0)) 820 return FALSE; 821 822 src0 = scalar(src( tmp ), TGSI_SWIZZLE_X); 823 } 824 825 emit->dynamic_branching_level++; 826 827 return (emit_instruction( emit, if_token ) && 828 emit_src( emit, src0 ) && 829 emit_src( emit, zero ) ); 830 } 831 832 static boolean emit_endif(struct svga_shader_emitter *emit, 833 const struct tgsi_full_instruction *insn) 834 { 835 emit->dynamic_branching_level--; 836 837 return emit_instruction(emit, inst_token(SVGA3DOP_ENDIF)); 838 } 839 840 static boolean emit_else(struct svga_shader_emitter *emit, 841 const struct tgsi_full_instruction *insn) 842 { 843 return emit_instruction(emit, inst_token(SVGA3DOP_ELSE)); 844 } 845 846 /* Translate the following TGSI FLR instruction. 847 * FLR DST, SRC 848 * To the following SVGA3D instruction sequence. 849 * FRC TMP, SRC 850 * SUB DST, SRC, TMP 851 */ 852 static boolean emit_floor(struct svga_shader_emitter *emit, 853 const struct tgsi_full_instruction *insn ) 854 { 855 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 856 const struct src_register src0 = translate_src_register( 857 emit, &insn->Src[0] ); 858 SVGA3dShaderDestToken temp = get_temp( emit ); 859 860 /* FRC TMP, SRC */ 861 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 )) 862 return FALSE; 863 864 /* SUB DST, SRC, TMP */ 865 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0, 866 negate( src( temp ) ) )) 867 return FALSE; 868 869 return TRUE; 870 } 871 872 873 /* Translate the following TGSI CEIL instruction. 874 * CEIL DST, SRC 875 * To the following SVGA3D instruction sequence. 876 * FRC TMP, -SRC 877 * ADD DST, SRC, TMP 878 */ 879 static boolean emit_ceil(struct svga_shader_emitter *emit, 880 const struct tgsi_full_instruction *insn) 881 { 882 SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0); 883 const struct src_register src0 = translate_src_register(emit, &insn->Src[0]); 884 SVGA3dShaderDestToken temp = get_temp(emit); 885 886 /* FRC TMP, -SRC */ 887 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), temp, negate(src0))) 888 return FALSE; 889 890 /* ADD DST, SRC, TMP */ 891 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), dst, src0, src(temp))) 892 return FALSE; 893 894 return TRUE; 895 } 896 897 898 /* Translate the following TGSI DIV instruction. 899 * DIV DST.xy, SRC0, SRC1 900 * To the following SVGA3D instruction sequence. 901 * RCP TMP.x, SRC1.xxxx 902 * RCP TMP.y, SRC1.yyyy 903 * MUL DST.xy, SRC0, TMP 904 */ 905 static boolean emit_div(struct svga_shader_emitter *emit, 906 const struct tgsi_full_instruction *insn ) 907 { 908 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 909 const struct src_register src0 = translate_src_register( 910 emit, &insn->Src[0] ); 911 const struct src_register src1 = translate_src_register( 912 emit, &insn->Src[1] ); 913 SVGA3dShaderDestToken temp = get_temp( emit ); 914 int i; 915 916 /* For each enabled element, perform a RCP instruction. Note that 917 * RCP is scalar in SVGA3D: 918 */ 919 for (i = 0; i < 4; i++) { 920 unsigned channel = 1 << i; 921 if (dst.mask & channel) { 922 /* RCP TMP.?, SRC1.???? */ 923 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ), 924 writemask(temp, channel), 925 scalar(src1, i) )) 926 return FALSE; 927 } 928 } 929 930 /* Vector mul: 931 * MUL DST, SRC0, TMP 932 */ 933 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0, 934 src( temp ) )) 935 return FALSE; 936 937 return TRUE; 938 } 939 940 /* Translate the following TGSI DP2 instruction. 941 * DP2 DST, SRC1, SRC2 942 * To the following SVGA3D instruction sequence. 943 * MUL TMP, SRC1, SRC2 944 * ADD DST, TMP.xxxx, TMP.yyyy 945 */ 946 static boolean emit_dp2(struct svga_shader_emitter *emit, 947 const struct tgsi_full_instruction *insn ) 948 { 949 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 950 const struct src_register src0 = translate_src_register( 951 emit, &insn->Src[0] ); 952 const struct src_register src1 = translate_src_register( 953 emit, &insn->Src[1] ); 954 SVGA3dShaderDestToken temp = get_temp( emit ); 955 struct src_register temp_src0, temp_src1; 956 957 /* MUL TMP, SRC1, SRC2 */ 958 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 )) 959 return FALSE; 960 961 temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X); 962 temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y); 963 964 /* ADD DST, TMP.xxxx, TMP.yyyy */ 965 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, 966 temp_src0, temp_src1 )) 967 return FALSE; 968 969 return TRUE; 970 } 971 972 973 /* Translate the following TGSI DPH instruction. 974 * DPH DST, SRC1, SRC2 975 * To the following SVGA3D instruction sequence. 976 * DP3 TMP, SRC1, SRC2 977 * ADD DST, TMP, SRC2.wwww 978 */ 979 static boolean emit_dph(struct svga_shader_emitter *emit, 980 const struct tgsi_full_instruction *insn ) 981 { 982 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 983 const struct src_register src0 = translate_src_register( 984 emit, &insn->Src[0] ); 985 struct src_register src1 = translate_src_register( 986 emit, &insn->Src[1] ); 987 SVGA3dShaderDestToken temp = get_temp( emit ); 988 989 /* DP3 TMP, SRC1, SRC2 */ 990 if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 )) 991 return FALSE; 992 993 src1 = scalar(src1, TGSI_SWIZZLE_W); 994 995 /* ADD DST, TMP, SRC2.wwww */ 996 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, 997 src( temp ), src1 )) 998 return FALSE; 999 1000 return TRUE; 1001 } 1002 1003 /* Translate the following TGSI DST instruction. 1004 * NRM DST, SRC 1005 * To the following SVGA3D instruction sequence. 1006 * DP3 TMP, SRC, SRC 1007 * RSQ TMP, TMP 1008 * MUL DST, SRC, TMP 1009 */ 1010 static boolean emit_nrm(struct svga_shader_emitter *emit, 1011 const struct tgsi_full_instruction *insn ) 1012 { 1013 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1014 const struct src_register src0 = translate_src_register( 1015 emit, &insn->Src[0] ); 1016 SVGA3dShaderDestToken temp = get_temp( emit ); 1017 1018 /* DP3 TMP, SRC, SRC */ 1019 if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src0 )) 1020 return FALSE; 1021 1022 /* RSQ TMP, TMP */ 1023 if (!submit_op1( emit, inst_token( SVGA3DOP_RSQ ), temp, src( temp ))) 1024 return FALSE; 1025 1026 /* MUL DST, SRC, TMP */ 1027 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, 1028 src0, src( temp ))) 1029 return FALSE; 1030 1031 return TRUE; 1032 1033 } 1034 1035 static boolean do_emit_sincos(struct svga_shader_emitter *emit, 1036 SVGA3dShaderDestToken dst, 1037 struct src_register src0) 1038 { 1039 src0 = scalar(src0, TGSI_SWIZZLE_X); 1040 return submit_op1(emit, inst_token(SVGA3DOP_SINCOS), dst, src0); 1041 } 1042 1043 static boolean emit_sincos(struct svga_shader_emitter *emit, 1044 const struct tgsi_full_instruction *insn) 1045 { 1046 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1047 struct src_register src0 = translate_src_register( 1048 emit, &insn->Src[0] ); 1049 SVGA3dShaderDestToken temp = get_temp( emit ); 1050 1051 /* SCS TMP SRC */ 1052 if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 )) 1053 return FALSE; 1054 1055 /* MOV DST TMP */ 1056 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) )) 1057 return FALSE; 1058 1059 return TRUE; 1060 } 1061 1062 /* 1063 * SCS TMP SRC 1064 * MOV DST TMP.yyyy 1065 */ 1066 static boolean emit_sin(struct svga_shader_emitter *emit, 1067 const struct tgsi_full_instruction *insn ) 1068 { 1069 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1070 struct src_register src0 = translate_src_register( 1071 emit, &insn->Src[0] ); 1072 SVGA3dShaderDestToken temp = get_temp( emit ); 1073 1074 /* SCS TMP SRC */ 1075 if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0)) 1076 return FALSE; 1077 1078 src0 = scalar(src( temp ), TGSI_SWIZZLE_Y); 1079 1080 /* MOV DST TMP.yyyy */ 1081 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 )) 1082 return FALSE; 1083 1084 return TRUE; 1085 } 1086 1087 /* 1088 * SCS TMP SRC 1089 * MOV DST TMP.xxxx 1090 */ 1091 static boolean emit_cos(struct svga_shader_emitter *emit, 1092 const struct tgsi_full_instruction *insn ) 1093 { 1094 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1095 struct src_register src0 = translate_src_register( 1096 emit, &insn->Src[0] ); 1097 SVGA3dShaderDestToken temp = get_temp( emit ); 1098 1099 /* SCS TMP SRC */ 1100 if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 )) 1101 return FALSE; 1102 1103 src0 = scalar(src( temp ), TGSI_SWIZZLE_X); 1104 1105 /* MOV DST TMP.xxxx */ 1106 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 )) 1107 return FALSE; 1108 1109 return TRUE; 1110 } 1111 1112 static boolean emit_ssg(struct svga_shader_emitter *emit, 1113 const struct tgsi_full_instruction *insn ) 1114 { 1115 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1116 struct src_register src0 = translate_src_register( 1117 emit, &insn->Src[0] ); 1118 SVGA3dShaderDestToken temp0 = get_temp( emit ); 1119 SVGA3dShaderDestToken temp1 = get_temp( emit ); 1120 struct src_register zero, one; 1121 1122 if (emit->unit == PIPE_SHADER_VERTEX) { 1123 /* SGN DST, SRC0, TMP0, TMP1 */ 1124 return submit_op3( emit, inst_token( SVGA3DOP_SGN ), dst, src0, 1125 src( temp0 ), src( temp1 ) ); 1126 } 1127 1128 zero = get_zero_immediate( emit ); 1129 one = scalar( zero, TGSI_SWIZZLE_W ); 1130 zero = scalar( zero, TGSI_SWIZZLE_X ); 1131 1132 /* CMP TMP0, SRC0, one, zero */ 1133 if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ), 1134 writemask( temp0, dst.mask ), src0, one, zero )) 1135 return FALSE; 1136 1137 /* CMP TMP1, negate(SRC0), negate(one), zero */ 1138 if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ), 1139 writemask( temp1, dst.mask ), negate( src0 ), negate( one ), 1140 zero )) 1141 return FALSE; 1142 1143 /* ADD DST, TMP0, TMP1 */ 1144 return submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src( temp0 ), 1145 src( temp1 ) ); 1146 } 1147 1148 /* 1149 * ADD DST SRC0, negate(SRC0) 1150 */ 1151 static boolean emit_sub(struct svga_shader_emitter *emit, 1152 const struct tgsi_full_instruction *insn) 1153 { 1154 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1155 struct src_register src0 = translate_src_register( 1156 emit, &insn->Src[0] ); 1157 struct src_register src1 = translate_src_register( 1158 emit, &insn->Src[1] ); 1159 1160 src1 = negate(src1); 1161 1162 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, 1163 src0, src1 )) 1164 return FALSE; 1165 1166 return TRUE; 1167 } 1168 1169 1170 static boolean emit_kil(struct svga_shader_emitter *emit, 1171 const struct tgsi_full_instruction *insn ) 1172 { 1173 const struct tgsi_full_src_register *reg = &insn->Src[0]; 1174 struct src_register src0, srcIn; 1175 /* is the W component tested in another position? */ 1176 const boolean w_tested = (reg->Register.SwizzleW == reg->Register.SwizzleX || 1177 reg->Register.SwizzleW == reg->Register.SwizzleY || 1178 reg->Register.SwizzleW == reg->Register.SwizzleZ); 1179 const boolean special = (reg->Register.Absolute || 1180 reg->Register.Negate || 1181 reg->Register.Indirect || 1182 reg->Register.SwizzleX != 0 || 1183 reg->Register.SwizzleY != 1 || 1184 reg->Register.SwizzleZ != 2 || 1185 reg->Register.File != TGSI_FILE_TEMPORARY); 1186 SVGA3dShaderDestToken temp; 1187 1188 src0 = srcIn = translate_src_register( emit, reg ); 1189 1190 if (special || !w_tested) { 1191 /* need a temp reg */ 1192 temp = get_temp( emit ); 1193 } 1194 1195 if (special) { 1196 /* move the source into a temp register */ 1197 submit_op1( emit, inst_token( SVGA3DOP_MOV ), 1198 writemask( temp, TGSI_WRITEMASK_XYZ ), 1199 src0 ); 1200 1201 src0 = src( temp ); 1202 } 1203 1204 /* do the texkill (on the xyz components) */ 1205 if (!submit_op0( emit, inst_token( SVGA3DOP_TEXKILL ), dst(src0) )) 1206 return FALSE; 1207 1208 if (!w_tested) { 1209 /* need to emit a second texkill to test the W component */ 1210 /* put src.wwww into temp register */ 1211 if (!submit_op1(emit, 1212 inst_token( SVGA3DOP_MOV ), 1213 writemask( temp, TGSI_WRITEMASK_XYZ ), 1214 scalar(srcIn, TGSI_SWIZZLE_W))) 1215 return FALSE; 1216 1217 /* second texkill */ 1218 if (!submit_op0( emit, inst_token( SVGA3DOP_TEXKILL ), temp )) 1219 return FALSE; 1220 } 1221 1222 return TRUE; 1223 } 1224 1225 1226 /* mesa state tracker always emits kilp as an unconditional 1227 * kil */ 1228 static boolean emit_kilp(struct svga_shader_emitter *emit, 1229 const struct tgsi_full_instruction *insn ) 1230 { 1231 SVGA3dShaderInstToken inst; 1232 SVGA3dShaderDestToken temp; 1233 struct src_register one = scalar( get_zero_immediate( emit ), 1234 TGSI_SWIZZLE_W ); 1235 1236 inst = inst_token( SVGA3DOP_TEXKILL ); 1237 1238 /* texkill doesn't allow negation on the operand so lets move 1239 * negation of {1} to a temp register */ 1240 temp = get_temp( emit ); 1241 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, 1242 negate( one ) )) 1243 return FALSE; 1244 1245 return submit_op0( emit, inst, temp ); 1246 } 1247 1248 1249 /** 1250 * Test if r1 and r2 are the same register. 1251 */ 1252 static boolean 1253 same_register(struct src_register r1, struct src_register r2) 1254 { 1255 return (r1.base.num == r2.base.num && 1256 r1.base.type_upper == r2.base.type_upper && 1257 r1.base.type_lower == r2.base.type_lower); 1258 } 1259 1260 1261 1262 /* Implement conditionals by initializing destination reg to 'fail', 1263 * then set predicate reg with UFOP_SETP, then move 'pass' to dest 1264 * based on predicate reg. 1265 * 1266 * SETP src0, cmp, src1 -- do this first to avoid aliasing problems. 1267 * MOV dst, fail 1268 * MOV dst, pass, p0 1269 */ 1270 static boolean 1271 emit_conditional(struct svga_shader_emitter *emit, 1272 unsigned compare_func, 1273 SVGA3dShaderDestToken dst, 1274 struct src_register src0, 1275 struct src_register src1, 1276 struct src_register pass, 1277 struct src_register fail) 1278 { 1279 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 ); 1280 SVGA3dShaderInstToken setp_token, mov_token; 1281 setp_token = inst_token( SVGA3DOP_SETP ); 1282 1283 switch (compare_func) { 1284 case PIPE_FUNC_NEVER: 1285 return submit_op1( emit, inst_token( SVGA3DOP_MOV ), 1286 dst, fail ); 1287 break; 1288 case PIPE_FUNC_LESS: 1289 setp_token.control = SVGA3DOPCOMP_LT; 1290 break; 1291 case PIPE_FUNC_EQUAL: 1292 setp_token.control = SVGA3DOPCOMP_EQ; 1293 break; 1294 case PIPE_FUNC_LEQUAL: 1295 setp_token.control = SVGA3DOPCOMP_LE; 1296 break; 1297 case PIPE_FUNC_GREATER: 1298 setp_token.control = SVGA3DOPCOMP_GT; 1299 break; 1300 case PIPE_FUNC_NOTEQUAL: 1301 setp_token.control = SVGA3DOPCOMPC_NE; 1302 break; 1303 case PIPE_FUNC_GEQUAL: 1304 setp_token.control = SVGA3DOPCOMP_GE; 1305 break; 1306 case PIPE_FUNC_ALWAYS: 1307 return submit_op1( emit, inst_token( SVGA3DOP_MOV ), 1308 dst, pass ); 1309 break; 1310 } 1311 1312 if (same_register(src(dst), pass)) { 1313 /* We'll get bad results if the dst and pass registers are the same 1314 * so use a temp register containing pass. 1315 */ 1316 SVGA3dShaderDestToken temp = get_temp(emit); 1317 if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), temp, pass)) 1318 return FALSE; 1319 pass = src(temp); 1320 } 1321 1322 /* SETP src0, COMPOP, src1 */ 1323 if (!submit_op2( emit, setp_token, pred_reg, 1324 src0, src1 )) 1325 return FALSE; 1326 1327 mov_token = inst_token( SVGA3DOP_MOV ); 1328 1329 /* MOV dst, fail */ 1330 if (!submit_op1( emit, mov_token, dst, 1331 fail )) 1332 return FALSE; 1333 1334 /* MOV dst, pass (predicated) 1335 * 1336 * Note that the predicate reg (and possible modifiers) is passed 1337 * as the first source argument. 1338 */ 1339 mov_token.predicated = 1; 1340 if (!submit_op2( emit, mov_token, dst, 1341 src( pred_reg ), pass )) 1342 return FALSE; 1343 1344 return TRUE; 1345 } 1346 1347 1348 static boolean 1349 emit_select(struct svga_shader_emitter *emit, 1350 unsigned compare_func, 1351 SVGA3dShaderDestToken dst, 1352 struct src_register src0, 1353 struct src_register src1 ) 1354 { 1355 /* There are some SVGA instructions which implement some selects 1356 * directly, but they are only available in the vertex shader. 1357 */ 1358 if (emit->unit == PIPE_SHADER_VERTEX) { 1359 switch (compare_func) { 1360 case PIPE_FUNC_GEQUAL: 1361 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 ); 1362 case PIPE_FUNC_LEQUAL: 1363 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 ); 1364 case PIPE_FUNC_GREATER: 1365 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 ); 1366 case PIPE_FUNC_LESS: 1367 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 ); 1368 default: 1369 break; 1370 } 1371 } 1372 1373 1374 /* Otherwise, need to use the setp approach: 1375 */ 1376 { 1377 struct src_register one, zero; 1378 /* zero immediate is 0,0,0,1 */ 1379 zero = get_zero_immediate( emit ); 1380 one = scalar( zero, TGSI_SWIZZLE_W ); 1381 zero = scalar( zero, TGSI_SWIZZLE_X ); 1382 1383 return emit_conditional( 1384 emit, 1385 compare_func, 1386 dst, 1387 src0, 1388 src1, 1389 one, zero); 1390 } 1391 } 1392 1393 1394 static boolean emit_select_op(struct svga_shader_emitter *emit, 1395 unsigned compare, 1396 const struct tgsi_full_instruction *insn) 1397 { 1398 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1399 struct src_register src0 = translate_src_register( 1400 emit, &insn->Src[0] ); 1401 struct src_register src1 = translate_src_register( 1402 emit, &insn->Src[1] ); 1403 1404 return emit_select( emit, compare, dst, src0, src1 ); 1405 } 1406 1407 1408 /** 1409 * Translate TGSI CMP instruction. 1410 */ 1411 static boolean 1412 emit_cmp(struct svga_shader_emitter *emit, 1413 const struct tgsi_full_instruction *insn) 1414 { 1415 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1416 const struct src_register src0 = 1417 translate_src_register(emit, &insn->Src[0] ); 1418 const struct src_register src1 = 1419 translate_src_register(emit, &insn->Src[1] ); 1420 const struct src_register src2 = 1421 translate_src_register(emit, &insn->Src[2] ); 1422 1423 if (emit->unit == PIPE_SHADER_VERTEX) { 1424 struct src_register zero = 1425 scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X); 1426 /* We used to simulate CMP with SLT+LRP. But that didn't work when 1427 * src1 or src2 was Inf/NaN. In particular, GLSL sqrt(0) failed 1428 * because it involves a CMP to handle the 0 case. 1429 * Use a conditional expression instead. 1430 */ 1431 return emit_conditional(emit, PIPE_FUNC_LESS, dst, 1432 src0, zero, src1, src2); 1433 } 1434 else { 1435 assert(emit->unit == PIPE_SHADER_FRAGMENT); 1436 1437 /* CMP DST, SRC0, SRC2, SRC1 */ 1438 return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, 1439 src0, src2, src1); 1440 } 1441 } 1442 1443 1444 /* Translate texture instructions to SVGA3D representation. 1445 */ 1446 static boolean emit_tex2(struct svga_shader_emitter *emit, 1447 const struct tgsi_full_instruction *insn, 1448 SVGA3dShaderDestToken dst ) 1449 { 1450 SVGA3dShaderInstToken inst; 1451 struct src_register texcoord; 1452 struct src_register sampler; 1453 SVGA3dShaderDestToken tmp; 1454 1455 inst.value = 0; 1456 1457 switch (insn->Instruction.Opcode) { 1458 case TGSI_OPCODE_TEX: 1459 inst.op = SVGA3DOP_TEX; 1460 break; 1461 case TGSI_OPCODE_TXP: 1462 inst.op = SVGA3DOP_TEX; 1463 inst.control = SVGA3DOPCONT_PROJECT; 1464 break; 1465 case TGSI_OPCODE_TXB: 1466 inst.op = SVGA3DOP_TEX; 1467 inst.control = SVGA3DOPCONT_BIAS; 1468 break; 1469 case TGSI_OPCODE_TXL: 1470 inst.op = SVGA3DOP_TEXLDL; 1471 break; 1472 default: 1473 assert(0); 1474 return FALSE; 1475 } 1476 1477 texcoord = translate_src_register( emit, &insn->Src[0] ); 1478 sampler = translate_src_register( emit, &insn->Src[1] ); 1479 1480 if (emit->key.fkey.tex[sampler.base.num].unnormalized || 1481 emit->dynamic_branching_level > 0) 1482 tmp = get_temp( emit ); 1483 1484 /* Can't do mipmapping inside dynamic branch constructs. Force LOD 1485 * zero in that case. 1486 */ 1487 if (emit->dynamic_branching_level > 0 && 1488 inst.op == SVGA3DOP_TEX && 1489 SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) { 1490 struct src_register zero = get_zero_immediate( emit ); 1491 1492 /* MOV tmp, texcoord */ 1493 if (!submit_op1( emit, 1494 inst_token( SVGA3DOP_MOV ), 1495 tmp, 1496 texcoord )) 1497 return FALSE; 1498 1499 /* MOV tmp.w, zero */ 1500 if (!submit_op1( emit, 1501 inst_token( SVGA3DOP_MOV ), 1502 writemask( tmp, TGSI_WRITEMASK_W ), 1503 scalar( zero, TGSI_SWIZZLE_X ))) 1504 return FALSE; 1505 1506 texcoord = src( tmp ); 1507 inst.op = SVGA3DOP_TEXLDL; 1508 } 1509 1510 /* Explicit normalization of texcoords: 1511 */ 1512 if (emit->key.fkey.tex[sampler.base.num].unnormalized) { 1513 struct src_register wh = get_tex_dimensions( emit, sampler.base.num ); 1514 1515 /* MUL tmp, SRC0, WH */ 1516 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), 1517 tmp, texcoord, wh )) 1518 return FALSE; 1519 1520 texcoord = src( tmp ); 1521 } 1522 1523 return submit_op2( emit, inst, dst, texcoord, sampler ); 1524 } 1525 1526 1527 1528 1529 /* Translate texture instructions to SVGA3D representation. 1530 */ 1531 static boolean emit_tex4(struct svga_shader_emitter *emit, 1532 const struct tgsi_full_instruction *insn, 1533 SVGA3dShaderDestToken dst ) 1534 { 1535 SVGA3dShaderInstToken inst; 1536 struct src_register texcoord; 1537 struct src_register ddx; 1538 struct src_register ddy; 1539 struct src_register sampler; 1540 1541 texcoord = translate_src_register( emit, &insn->Src[0] ); 1542 ddx = translate_src_register( emit, &insn->Src[1] ); 1543 ddy = translate_src_register( emit, &insn->Src[2] ); 1544 sampler = translate_src_register( emit, &insn->Src[3] ); 1545 1546 inst.value = 0; 1547 1548 switch (insn->Instruction.Opcode) { 1549 case TGSI_OPCODE_TXD: 1550 inst.op = SVGA3DOP_TEXLDD; /* 4 args! */ 1551 break; 1552 default: 1553 assert(0); 1554 return FALSE; 1555 } 1556 1557 return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy ); 1558 } 1559 1560 1561 /** 1562 * Emit texture swizzle code. 1563 */ 1564 static boolean emit_tex_swizzle( struct svga_shader_emitter *emit, 1565 SVGA3dShaderDestToken dst, 1566 struct src_register src, 1567 unsigned swizzle_x, 1568 unsigned swizzle_y, 1569 unsigned swizzle_z, 1570 unsigned swizzle_w) 1571 { 1572 const unsigned swizzleIn[4] = {swizzle_x, swizzle_y, swizzle_z, swizzle_w}; 1573 unsigned srcSwizzle[4]; 1574 unsigned srcWritemask = 0x0, zeroWritemask = 0x0, oneWritemask = 0x0; 1575 int i; 1576 1577 /* build writemasks and srcSwizzle terms */ 1578 for (i = 0; i < 4; i++) { 1579 if (swizzleIn[i] == PIPE_SWIZZLE_ZERO) { 1580 srcSwizzle[i] = TGSI_SWIZZLE_X + i; 1581 zeroWritemask |= (1 << i); 1582 } 1583 else if (swizzleIn[i] == PIPE_SWIZZLE_ONE) { 1584 srcSwizzle[i] = TGSI_SWIZZLE_X + i; 1585 oneWritemask |= (1 << i); 1586 } 1587 else { 1588 srcSwizzle[i] = swizzleIn[i]; 1589 srcWritemask |= (1 << i); 1590 } 1591 } 1592 1593 /* write x/y/z/w comps */ 1594 if (dst.mask & srcWritemask) { 1595 if (!submit_op1(emit, 1596 inst_token(SVGA3DOP_MOV), 1597 writemask(dst, srcWritemask), 1598 swizzle(src, 1599 srcSwizzle[0], 1600 srcSwizzle[1], 1601 srcSwizzle[2], 1602 srcSwizzle[3]))) 1603 return FALSE; 1604 } 1605 1606 /* write 0 comps */ 1607 if (dst.mask & zeroWritemask) { 1608 if (!submit_op1(emit, 1609 inst_token(SVGA3DOP_MOV), 1610 writemask(dst, zeroWritemask), 1611 scalar(get_zero_immediate(emit), TGSI_SWIZZLE_X))) 1612 return FALSE; 1613 } 1614 1615 /* write 1 comps */ 1616 if (dst.mask & oneWritemask) { 1617 if (!submit_op1(emit, 1618 inst_token(SVGA3DOP_MOV), 1619 writemask(dst, oneWritemask), 1620 scalar(get_zero_immediate(emit), TGSI_SWIZZLE_W))) 1621 return FALSE; 1622 } 1623 1624 return TRUE; 1625 } 1626 1627 1628 static boolean emit_tex(struct svga_shader_emitter *emit, 1629 const struct tgsi_full_instruction *insn ) 1630 { 1631 SVGA3dShaderDestToken dst = 1632 translate_dst_register( emit, insn, 0 ); 1633 struct src_register src0 = 1634 translate_src_register( emit, &insn->Src[0] ); 1635 struct src_register src1 = 1636 translate_src_register( emit, &insn->Src[1] ); 1637 1638 SVGA3dShaderDestToken tex_result; 1639 const unsigned unit = src1.base.num; 1640 1641 /* check for shadow samplers */ 1642 boolean compare = (emit->key.fkey.tex[unit].compare_mode == 1643 PIPE_TEX_COMPARE_R_TO_TEXTURE); 1644 1645 /* texture swizzle */ 1646 boolean swizzle = (emit->key.fkey.tex[unit].swizzle_r != PIPE_SWIZZLE_RED || 1647 emit->key.fkey.tex[unit].swizzle_g != PIPE_SWIZZLE_GREEN || 1648 emit->key.fkey.tex[unit].swizzle_b != PIPE_SWIZZLE_BLUE || 1649 emit->key.fkey.tex[unit].swizzle_a != PIPE_SWIZZLE_ALPHA); 1650 1651 boolean saturate = insn->Instruction.Saturate != TGSI_SAT_NONE; 1652 1653 /* If doing compare processing or tex swizzle or saturation, we need to put 1654 * the fetched color into a temporary so it can be used as a source later on. 1655 */ 1656 if (compare || swizzle || saturate) { 1657 tex_result = get_temp( emit ); 1658 } 1659 else { 1660 tex_result = dst; 1661 } 1662 1663 switch(insn->Instruction.Opcode) { 1664 case TGSI_OPCODE_TEX: 1665 case TGSI_OPCODE_TXB: 1666 case TGSI_OPCODE_TXP: 1667 case TGSI_OPCODE_TXL: 1668 if (!emit_tex2( emit, insn, tex_result )) 1669 return FALSE; 1670 break; 1671 case TGSI_OPCODE_TXD: 1672 if (!emit_tex4( emit, insn, tex_result )) 1673 return FALSE; 1674 break; 1675 default: 1676 assert(0); 1677 } 1678 1679 if (compare) { 1680 SVGA3dShaderDestToken dst2; 1681 1682 if (swizzle || saturate) 1683 dst2 = tex_result; 1684 else 1685 dst2 = dst; 1686 1687 if (dst.mask & TGSI_WRITEMASK_XYZ) { 1688 SVGA3dShaderDestToken src0_zdivw = get_temp( emit ); 1689 /* When sampling a depth texture, the result of the comparison is in 1690 * the Y component. 1691 */ 1692 struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y); 1693 struct src_register r_coord; 1694 1695 if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) { 1696 /* Divide texcoord R by Q */ 1697 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ), 1698 writemask(src0_zdivw, TGSI_WRITEMASK_X), 1699 scalar(src0, TGSI_SWIZZLE_W) )) 1700 return FALSE; 1701 1702 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), 1703 writemask(src0_zdivw, TGSI_WRITEMASK_X), 1704 scalar(src0, TGSI_SWIZZLE_Z), 1705 scalar(src(src0_zdivw), TGSI_SWIZZLE_X) )) 1706 return FALSE; 1707 1708 r_coord = scalar(src(src0_zdivw), TGSI_SWIZZLE_X); 1709 } 1710 else { 1711 r_coord = scalar(src0, TGSI_SWIZZLE_Z); 1712 } 1713 1714 /* Compare texture sample value against R component of texcoord */ 1715 if (!emit_select(emit, 1716 emit->key.fkey.tex[unit].compare_func, 1717 writemask( dst2, TGSI_WRITEMASK_XYZ ), 1718 r_coord, 1719 tex_src_x)) 1720 return FALSE; 1721 } 1722 1723 if (dst.mask & TGSI_WRITEMASK_W) { 1724 struct src_register one = 1725 scalar( get_zero_immediate( emit ), TGSI_SWIZZLE_W ); 1726 1727 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 1728 writemask( dst2, TGSI_WRITEMASK_W ), 1729 one )) 1730 return FALSE; 1731 } 1732 } 1733 1734 if (saturate && !swizzle) { 1735 /* MOV_SAT real_dst, dst */ 1736 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) )) 1737 return FALSE; 1738 } 1739 else if (swizzle) { 1740 /* swizzle from tex_result to dst (handles saturation too, if any) */ 1741 emit_tex_swizzle(emit, 1742 dst, src(tex_result), 1743 emit->key.fkey.tex[unit].swizzle_r, 1744 emit->key.fkey.tex[unit].swizzle_g, 1745 emit->key.fkey.tex[unit].swizzle_b, 1746 emit->key.fkey.tex[unit].swizzle_a); 1747 } 1748 1749 return TRUE; 1750 } 1751 1752 static boolean emit_bgnloop2( struct svga_shader_emitter *emit, 1753 const struct tgsi_full_instruction *insn ) 1754 { 1755 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP ); 1756 struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 ); 1757 struct src_register const_int = get_loop_const( emit ); 1758 1759 emit->dynamic_branching_level++; 1760 1761 return (emit_instruction( emit, inst ) && 1762 emit_src( emit, loop_reg ) && 1763 emit_src( emit, const_int ) ); 1764 } 1765 1766 static boolean emit_endloop2( struct svga_shader_emitter *emit, 1767 const struct tgsi_full_instruction *insn ) 1768 { 1769 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP ); 1770 1771 emit->dynamic_branching_level--; 1772 1773 return emit_instruction( emit, inst ); 1774 } 1775 1776 static boolean emit_brk( struct svga_shader_emitter *emit, 1777 const struct tgsi_full_instruction *insn ) 1778 { 1779 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK ); 1780 return emit_instruction( emit, inst ); 1781 } 1782 1783 static boolean emit_scalar_op1( struct svga_shader_emitter *emit, 1784 unsigned opcode, 1785 const struct tgsi_full_instruction *insn ) 1786 { 1787 SVGA3dShaderInstToken inst; 1788 SVGA3dShaderDestToken dst; 1789 struct src_register src; 1790 1791 inst = inst_token( opcode ); 1792 dst = translate_dst_register( emit, insn, 0 ); 1793 src = translate_src_register( emit, &insn->Src[0] ); 1794 src = scalar( src, TGSI_SWIZZLE_X ); 1795 1796 return submit_op1( emit, inst, dst, src ); 1797 } 1798 1799 1800 static boolean emit_simple_instruction(struct svga_shader_emitter *emit, 1801 unsigned opcode, 1802 const struct tgsi_full_instruction *insn ) 1803 { 1804 const struct tgsi_full_src_register *src = insn->Src; 1805 SVGA3dShaderInstToken inst; 1806 SVGA3dShaderDestToken dst; 1807 1808 inst = inst_token( opcode ); 1809 dst = translate_dst_register( emit, insn, 0 ); 1810 1811 switch (insn->Instruction.NumSrcRegs) { 1812 case 0: 1813 return submit_op0( emit, inst, dst ); 1814 case 1: 1815 return submit_op1( emit, inst, dst, 1816 translate_src_register( emit, &src[0] )); 1817 case 2: 1818 return submit_op2( emit, inst, dst, 1819 translate_src_register( emit, &src[0] ), 1820 translate_src_register( emit, &src[1] ) ); 1821 case 3: 1822 return submit_op3( emit, inst, dst, 1823 translate_src_register( emit, &src[0] ), 1824 translate_src_register( emit, &src[1] ), 1825 translate_src_register( emit, &src[2] ) ); 1826 default: 1827 assert(0); 1828 return FALSE; 1829 } 1830 } 1831 1832 1833 static boolean emit_deriv(struct svga_shader_emitter *emit, 1834 const struct tgsi_full_instruction *insn ) 1835 { 1836 if (emit->dynamic_branching_level > 0 && 1837 insn->Src[0].Register.File == TGSI_FILE_TEMPORARY) 1838 { 1839 struct src_register zero = get_zero_immediate( emit ); 1840 SVGA3dShaderDestToken dst = 1841 translate_dst_register( emit, insn, 0 ); 1842 1843 /* Deriv opcodes not valid inside dynamic branching, workaround 1844 * by zeroing out the destination. 1845 */ 1846 if (!submit_op1(emit, 1847 inst_token( SVGA3DOP_MOV ), 1848 dst, 1849 scalar(zero, TGSI_SWIZZLE_X))) 1850 return FALSE; 1851 1852 return TRUE; 1853 } 1854 else { 1855 unsigned opcode; 1856 const struct tgsi_full_src_register *reg = &insn->Src[0]; 1857 SVGA3dShaderInstToken inst; 1858 SVGA3dShaderDestToken dst; 1859 struct src_register src0; 1860 1861 switch (insn->Instruction.Opcode) { 1862 case TGSI_OPCODE_DDX: 1863 opcode = SVGA3DOP_DSX; 1864 break; 1865 case TGSI_OPCODE_DDY: 1866 opcode = SVGA3DOP_DSY; 1867 break; 1868 default: 1869 return FALSE; 1870 } 1871 1872 inst = inst_token( opcode ); 1873 dst = translate_dst_register( emit, insn, 0 ); 1874 src0 = translate_src_register( emit, reg ); 1875 1876 /* We cannot use negate or abs on source to dsx/dsy instruction. 1877 */ 1878 if (reg->Register.Absolute || 1879 reg->Register.Negate) { 1880 SVGA3dShaderDestToken temp = get_temp( emit ); 1881 1882 if (!emit_repl( emit, temp, &src0 )) 1883 return FALSE; 1884 } 1885 1886 return submit_op1( emit, inst, dst, src0 ); 1887 } 1888 } 1889 1890 static boolean emit_arl(struct svga_shader_emitter *emit, 1891 const struct tgsi_full_instruction *insn) 1892 { 1893 ++emit->current_arl; 1894 if (emit->unit == PIPE_SHADER_FRAGMENT) { 1895 /* MOVA not present in pixel shader instruction set. 1896 * Ignore this instruction altogether since it is 1897 * only used for loop counters -- and for that 1898 * we reference aL directly. 1899 */ 1900 return TRUE; 1901 } 1902 if (svga_arl_needs_adjustment( emit )) { 1903 return emit_fake_arl( emit, insn ); 1904 } else { 1905 /* no need to adjust, just emit straight arl */ 1906 return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn); 1907 } 1908 } 1909 1910 static boolean emit_pow(struct svga_shader_emitter *emit, 1911 const struct tgsi_full_instruction *insn) 1912 { 1913 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1914 struct src_register src0 = translate_src_register( 1915 emit, &insn->Src[0] ); 1916 struct src_register src1 = translate_src_register( 1917 emit, &insn->Src[1] ); 1918 boolean need_tmp = FALSE; 1919 1920 /* POW can only output to a temporary */ 1921 if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY) 1922 need_tmp = TRUE; 1923 1924 /* POW src1 must not be the same register as dst */ 1925 if (alias_src_dst( src1, dst )) 1926 need_tmp = TRUE; 1927 1928 /* it's a scalar op */ 1929 src0 = scalar( src0, TGSI_SWIZZLE_X ); 1930 src1 = scalar( src1, TGSI_SWIZZLE_X ); 1931 1932 if (need_tmp) { 1933 SVGA3dShaderDestToken tmp = writemask(get_temp( emit ), TGSI_WRITEMASK_X ); 1934 1935 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1)) 1936 return FALSE; 1937 1938 return submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, scalar(src(tmp), 0) ); 1939 } 1940 else { 1941 return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1); 1942 } 1943 } 1944 1945 static boolean emit_xpd(struct svga_shader_emitter *emit, 1946 const struct tgsi_full_instruction *insn) 1947 { 1948 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1949 const struct src_register src0 = translate_src_register( 1950 emit, &insn->Src[0] ); 1951 const struct src_register src1 = translate_src_register( 1952 emit, &insn->Src[1] ); 1953 boolean need_dst_tmp = FALSE; 1954 1955 /* XPD can only output to a temporary */ 1956 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP) 1957 need_dst_tmp = TRUE; 1958 1959 /* The dst reg must not be the same as src0 or src1*/ 1960 if (alias_src_dst(src0, dst) || 1961 alias_src_dst(src1, dst)) 1962 need_dst_tmp = TRUE; 1963 1964 if (need_dst_tmp) { 1965 SVGA3dShaderDestToken tmp = get_temp( emit ); 1966 1967 /* Obey DX9 restrictions on mask: 1968 */ 1969 tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ; 1970 1971 if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1)) 1972 return FALSE; 1973 1974 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp ))) 1975 return FALSE; 1976 } 1977 else { 1978 if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1)) 1979 return FALSE; 1980 } 1981 1982 /* Need to emit 1.0 to dst.w? 1983 */ 1984 if (dst.mask & TGSI_WRITEMASK_W) { 1985 struct src_register zero = get_zero_immediate( emit ); 1986 1987 if (!submit_op1(emit, 1988 inst_token( SVGA3DOP_MOV ), 1989 writemask(dst, TGSI_WRITEMASK_W), 1990 zero)) 1991 return FALSE; 1992 } 1993 1994 return TRUE; 1995 } 1996 1997 1998 static boolean emit_lrp(struct svga_shader_emitter *emit, 1999 const struct tgsi_full_instruction *insn) 2000 { 2001 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2002 const struct src_register src0 = translate_src_register( 2003 emit, &insn->Src[0] ); 2004 const struct src_register src1 = translate_src_register( 2005 emit, &insn->Src[1] ); 2006 const struct src_register src2 = translate_src_register( 2007 emit, &insn->Src[2] ); 2008 2009 return submit_lrp(emit, dst, src0, src1, src2); 2010 } 2011 2012 2013 static boolean emit_dst_insn(struct svga_shader_emitter *emit, 2014 const struct tgsi_full_instruction *insn ) 2015 { 2016 if (emit->unit == PIPE_SHADER_VERTEX) { 2017 /* SVGA/DX9 has a DST instruction, but only for vertex shaders: 2018 */ 2019 return emit_simple_instruction(emit, SVGA3DOP_DST, insn); 2020 } 2021 else { 2022 2023 /* result[0] = 1 * 1; 2024 * result[1] = a[1] * b[1]; 2025 * result[2] = a[2] * 1; 2026 * result[3] = 1 * b[3]; 2027 */ 2028 2029 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2030 SVGA3dShaderDestToken tmp; 2031 const struct src_register src0 = translate_src_register( 2032 emit, &insn->Src[0] ); 2033 const struct src_register src1 = translate_src_register( 2034 emit, &insn->Src[1] ); 2035 struct src_register zero = get_zero_immediate( emit ); 2036 boolean need_tmp = FALSE; 2037 2038 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP || 2039 alias_src_dst(src0, dst) || 2040 alias_src_dst(src1, dst)) 2041 need_tmp = TRUE; 2042 2043 if (need_tmp) { 2044 tmp = get_temp( emit ); 2045 } 2046 else { 2047 tmp = dst; 2048 } 2049 2050 /* tmp.xw = 1.0 2051 */ 2052 if (tmp.mask & TGSI_WRITEMASK_XW) { 2053 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2054 writemask(tmp, TGSI_WRITEMASK_XW ), 2055 scalar( zero, 3 ))) 2056 return FALSE; 2057 } 2058 2059 /* tmp.yz = src0 2060 */ 2061 if (tmp.mask & TGSI_WRITEMASK_YZ) { 2062 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2063 writemask(tmp, TGSI_WRITEMASK_YZ ), 2064 src0)) 2065 return FALSE; 2066 } 2067 2068 /* tmp.yw = tmp * src1 2069 */ 2070 if (tmp.mask & TGSI_WRITEMASK_YW) { 2071 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), 2072 writemask(tmp, TGSI_WRITEMASK_YW ), 2073 src(tmp), 2074 src1)) 2075 return FALSE; 2076 } 2077 2078 /* dst = tmp 2079 */ 2080 if (need_tmp) { 2081 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2082 dst, 2083 src(tmp))) 2084 return FALSE; 2085 } 2086 } 2087 2088 return TRUE; 2089 } 2090 2091 2092 static boolean emit_exp(struct svga_shader_emitter *emit, 2093 const struct tgsi_full_instruction *insn) 2094 { 2095 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2096 struct src_register src0 = 2097 translate_src_register( emit, &insn->Src[0] ); 2098 struct src_register zero = get_zero_immediate( emit ); 2099 SVGA3dShaderDestToken fraction; 2100 2101 if (dst.mask & TGSI_WRITEMASK_Y) 2102 fraction = dst; 2103 else if (dst.mask & TGSI_WRITEMASK_X) 2104 fraction = get_temp( emit ); 2105 else 2106 fraction.value = 0; 2107 2108 /* If y is being written, fill it with src0 - floor(src0). 2109 */ 2110 if (dst.mask & TGSI_WRITEMASK_XY) { 2111 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), 2112 writemask( fraction, TGSI_WRITEMASK_Y ), 2113 src0 )) 2114 return FALSE; 2115 } 2116 2117 /* If x is being written, fill it with 2 ^ floor(src0). 2118 */ 2119 if (dst.mask & TGSI_WRITEMASK_X) { 2120 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), 2121 writemask( dst, TGSI_WRITEMASK_X ), 2122 src0, 2123 scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) ) 2124 return FALSE; 2125 2126 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ), 2127 writemask( dst, TGSI_WRITEMASK_X ), 2128 scalar( src( dst ), TGSI_SWIZZLE_X ) ) ) 2129 return FALSE; 2130 2131 if (!(dst.mask & TGSI_WRITEMASK_Y)) 2132 release_temp( emit, fraction ); 2133 } 2134 2135 /* If z is being written, fill it with 2 ^ src0 (partial precision). 2136 */ 2137 if (dst.mask & TGSI_WRITEMASK_Z) { 2138 if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ), 2139 writemask( dst, TGSI_WRITEMASK_Z ), 2140 src0 ) ) 2141 return FALSE; 2142 } 2143 2144 /* If w is being written, fill it with one. 2145 */ 2146 if (dst.mask & TGSI_WRITEMASK_W) { 2147 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2148 writemask(dst, TGSI_WRITEMASK_W), 2149 scalar( zero, TGSI_SWIZZLE_W ) )) 2150 return FALSE; 2151 } 2152 2153 return TRUE; 2154 } 2155 2156 static boolean emit_lit(struct svga_shader_emitter *emit, 2157 const struct tgsi_full_instruction *insn ) 2158 { 2159 if (emit->unit == PIPE_SHADER_VERTEX) { 2160 /* SVGA/DX9 has a LIT instruction, but only for vertex shaders: 2161 */ 2162 return emit_simple_instruction(emit, SVGA3DOP_LIT, insn); 2163 } 2164 else { 2165 /* D3D vs. GL semantics can be fairly easily accomodated by 2166 * variations on this sequence. 2167 * 2168 * GL: 2169 * tmp.y = src.x 2170 * tmp.z = pow(src.y,src.w) 2171 * p0 = src0.xxxx > 0 2172 * result = zero.wxxw 2173 * (p0) result.yz = tmp 2174 * 2175 * D3D: 2176 * tmp.y = src.x 2177 * tmp.z = pow(src.y,src.w) 2178 * p0 = src0.xxyy > 0 2179 * result = zero.wxxw 2180 * (p0) result.yz = tmp 2181 * 2182 * Will implement the GL version for now. 2183 */ 2184 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2185 SVGA3dShaderDestToken tmp = get_temp( emit ); 2186 const struct src_register src0 = translate_src_register( 2187 emit, &insn->Src[0] ); 2188 struct src_register zero = get_zero_immediate( emit ); 2189 2190 /* tmp = pow(src.y, src.w) 2191 */ 2192 if (dst.mask & TGSI_WRITEMASK_Z) { 2193 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), 2194 tmp, 2195 scalar(src0, 1), 2196 scalar(src0, 3))) 2197 return FALSE; 2198 } 2199 2200 /* tmp.y = src.x 2201 */ 2202 if (dst.mask & TGSI_WRITEMASK_Y) { 2203 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2204 writemask(tmp, TGSI_WRITEMASK_Y ), 2205 scalar(src0, 0))) 2206 return FALSE; 2207 } 2208 2209 /* Can't quite do this with emit conditional due to the extra 2210 * writemask on the predicated mov: 2211 */ 2212 { 2213 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 ); 2214 SVGA3dShaderInstToken setp_token, mov_token; 2215 struct src_register predsrc; 2216 2217 setp_token = inst_token( SVGA3DOP_SETP ); 2218 mov_token = inst_token( SVGA3DOP_MOV ); 2219 2220 setp_token.control = SVGA3DOPCOMP_GT; 2221 2222 /* D3D vs GL semantics: 2223 */ 2224 if (0) 2225 predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */ 2226 else 2227 predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */ 2228 2229 /* SETP src0.xxyy, GT, {0}.x */ 2230 if (!submit_op2( emit, setp_token, pred_reg, 2231 predsrc, 2232 swizzle(zero, 0, 0, 0, 0) )) 2233 return FALSE; 2234 2235 /* MOV dst, fail */ 2236 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, 2237 swizzle(zero, 3, 0, 0, 3 ))) 2238 return FALSE; 2239 2240 /* MOV dst.yz, tmp (predicated) 2241 * 2242 * Note that the predicate reg (and possible modifiers) is passed 2243 * as the first source argument. 2244 */ 2245 if (dst.mask & TGSI_WRITEMASK_YZ) { 2246 mov_token.predicated = 1; 2247 if (!submit_op2( emit, mov_token, 2248 writemask(dst, TGSI_WRITEMASK_YZ), 2249 src( pred_reg ), src( tmp ) )) 2250 return FALSE; 2251 } 2252 } 2253 } 2254 2255 return TRUE; 2256 } 2257 2258 2259 static boolean emit_ex2( struct svga_shader_emitter *emit, 2260 const struct tgsi_full_instruction *insn ) 2261 { 2262 SVGA3dShaderInstToken inst; 2263 SVGA3dShaderDestToken dst; 2264 struct src_register src0; 2265 2266 inst = inst_token( SVGA3DOP_EXP ); 2267 dst = translate_dst_register( emit, insn, 0 ); 2268 src0 = translate_src_register( emit, &insn->Src[0] ); 2269 src0 = scalar( src0, TGSI_SWIZZLE_X ); 2270 2271 if (dst.mask != TGSI_WRITEMASK_XYZW) { 2272 SVGA3dShaderDestToken tmp = get_temp( emit ); 2273 2274 if (!submit_op1( emit, inst, tmp, src0 )) 2275 return FALSE; 2276 2277 return submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2278 dst, 2279 scalar( src( tmp ), TGSI_SWIZZLE_X ) ); 2280 } 2281 2282 return submit_op1( emit, inst, dst, src0 ); 2283 } 2284 2285 2286 static boolean emit_log(struct svga_shader_emitter *emit, 2287 const struct tgsi_full_instruction *insn) 2288 { 2289 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2290 struct src_register src0 = 2291 translate_src_register( emit, &insn->Src[0] ); 2292 struct src_register zero = get_zero_immediate( emit ); 2293 SVGA3dShaderDestToken abs_tmp; 2294 struct src_register abs_src0; 2295 SVGA3dShaderDestToken log2_abs; 2296 2297 abs_tmp.value = 0; 2298 2299 if (dst.mask & TGSI_WRITEMASK_Z) 2300 log2_abs = dst; 2301 else if (dst.mask & TGSI_WRITEMASK_XY) 2302 log2_abs = get_temp( emit ); 2303 else 2304 log2_abs.value = 0; 2305 2306 /* If z is being written, fill it with log2( abs( src0 ) ). 2307 */ 2308 if (dst.mask & TGSI_WRITEMASK_XYZ) { 2309 if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS) 2310 abs_src0 = src0; 2311 else { 2312 abs_tmp = get_temp( emit ); 2313 2314 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2315 abs_tmp, 2316 src0 ) ) 2317 return FALSE; 2318 2319 abs_src0 = src( abs_tmp ); 2320 } 2321 2322 abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) ); 2323 2324 if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ), 2325 writemask( log2_abs, TGSI_WRITEMASK_Z ), 2326 abs_src0 ) ) 2327 return FALSE; 2328 } 2329 2330 if (dst.mask & TGSI_WRITEMASK_XY) { 2331 SVGA3dShaderDestToken floor_log2; 2332 2333 if (dst.mask & TGSI_WRITEMASK_X) 2334 floor_log2 = dst; 2335 else 2336 floor_log2 = get_temp( emit ); 2337 2338 /* If x is being written, fill it with floor( log2( abs( src0 ) ) ). 2339 */ 2340 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), 2341 writemask( floor_log2, TGSI_WRITEMASK_X ), 2342 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) ) 2343 return FALSE; 2344 2345 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), 2346 writemask( floor_log2, TGSI_WRITEMASK_X ), 2347 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ), 2348 negate( src( floor_log2 ) ) ) ) 2349 return FALSE; 2350 2351 /* If y is being written, fill it with 2352 * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ). 2353 */ 2354 if (dst.mask & TGSI_WRITEMASK_Y) { 2355 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ), 2356 writemask( dst, TGSI_WRITEMASK_Y ), 2357 negate( scalar( src( floor_log2 ), 2358 TGSI_SWIZZLE_X ) ) ) ) 2359 return FALSE; 2360 2361 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), 2362 writemask( dst, TGSI_WRITEMASK_Y ), 2363 src( dst ), 2364 abs_src0 ) ) 2365 return FALSE; 2366 } 2367 2368 if (!(dst.mask & TGSI_WRITEMASK_X)) 2369 release_temp( emit, floor_log2 ); 2370 2371 if (!(dst.mask & TGSI_WRITEMASK_Z)) 2372 release_temp( emit, log2_abs ); 2373 } 2374 2375 if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod && 2376 src0.base.srcMod != SVGA3DSRCMOD_ABS) 2377 release_temp( emit, abs_tmp ); 2378 2379 /* If w is being written, fill it with one. 2380 */ 2381 if (dst.mask & TGSI_WRITEMASK_W) { 2382 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2383 writemask(dst, TGSI_WRITEMASK_W), 2384 scalar( zero, TGSI_SWIZZLE_W ) )) 2385 return FALSE; 2386 } 2387 2388 return TRUE; 2389 } 2390 2391 2392 /** 2393 * Translate TGSI TRUNC or ROUND instruction. 2394 * We need to truncate toward zero. Ex: trunc(-1.9) = -1 2395 * Different approaches are needed for VS versus PS. 2396 */ 2397 static boolean 2398 emit_trunc_round(struct svga_shader_emitter *emit, 2399 const struct tgsi_full_instruction *insn, 2400 boolean round) 2401 { 2402 SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0); 2403 const struct src_register src0 = 2404 translate_src_register(emit, &insn->Src[0] ); 2405 SVGA3dShaderDestToken t1 = get_temp(emit); 2406 2407 if (round) { 2408 SVGA3dShaderDestToken t0 = get_temp(emit); 2409 struct src_register half = get_half_immediate(emit); 2410 2411 /* t0 = abs(src0) + 0.5 */ 2412 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t0, 2413 absolute(src0), half)) 2414 return FALSE; 2415 2416 /* t1 = fract(t0) */ 2417 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, src(t0))) 2418 return FALSE; 2419 2420 /* t1 = t0 - t1 */ 2421 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, src(t0), 2422 negate(src(t1)))) 2423 return FALSE; 2424 } 2425 else { 2426 /* trunc */ 2427 2428 /* t1 = fract(abs(src0)) */ 2429 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, absolute(src0))) 2430 return FALSE; 2431 2432 /* t1 = abs(src0) - t1 */ 2433 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, absolute(src0), 2434 negate(src(t1)))) 2435 return FALSE; 2436 } 2437 2438 /* 2439 * Now we need to multiply t1 by the sign of the original value. 2440 */ 2441 if (emit->unit == PIPE_SHADER_VERTEX) { 2442 /* For VS: use SGN instruction */ 2443 /* Need two extra/dummy registers: */ 2444 SVGA3dShaderDestToken t2 = get_temp(emit), t3 = get_temp(emit), 2445 t4 = get_temp(emit); 2446 2447 /* t2 = sign(src0) */ 2448 if (!submit_op3(emit, inst_token(SVGA3DOP_SGN), t2, src0, 2449 src(t3), src(t4))) 2450 return FALSE; 2451 2452 /* dst = t1 * t2 */ 2453 if (!submit_op2(emit, inst_token(SVGA3DOP_MUL), dst, src(t1), src(t2))) 2454 return FALSE; 2455 } 2456 else { 2457 /* For FS: Use CMP instruction */ 2458 return submit_op3(emit, inst_token( SVGA3DOP_CMP ), dst, 2459 src0, src(t1), negate(src(t1))); 2460 } 2461 2462 return TRUE; 2463 } 2464 2465 2466 static boolean emit_bgnsub( struct svga_shader_emitter *emit, 2467 unsigned position, 2468 const struct tgsi_full_instruction *insn ) 2469 { 2470 unsigned i; 2471 2472 /* Note that we've finished the main function and are now emitting 2473 * subroutines. This affects how we terminate the generated 2474 * shader. 2475 */ 2476 emit->in_main_func = FALSE; 2477 2478 for (i = 0; i < emit->nr_labels; i++) { 2479 if (emit->label[i] == position) { 2480 return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) && 2481 emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) && 2482 emit_src( emit, src_register( SVGA3DREG_LABEL, i ))); 2483 } 2484 } 2485 2486 assert(0); 2487 return TRUE; 2488 } 2489 2490 static boolean emit_call( struct svga_shader_emitter *emit, 2491 const struct tgsi_full_instruction *insn ) 2492 { 2493 unsigned position = insn->Label.Label; 2494 unsigned i; 2495 2496 for (i = 0; i < emit->nr_labels; i++) { 2497 if (emit->label[i] == position) 2498 break; 2499 } 2500 2501 if (emit->nr_labels == Elements(emit->label)) 2502 return FALSE; 2503 2504 if (i == emit->nr_labels) { 2505 emit->label[i] = position; 2506 emit->nr_labels++; 2507 } 2508 2509 return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) && 2510 emit_src( emit, src_register( SVGA3DREG_LABEL, i ))); 2511 } 2512 2513 2514 static boolean emit_end( struct svga_shader_emitter *emit ) 2515 { 2516 if (emit->unit == PIPE_SHADER_VERTEX) { 2517 return emit_vs_postamble( emit ); 2518 } 2519 else { 2520 return emit_ps_postamble( emit ); 2521 } 2522 } 2523 2524 2525 2526 static boolean svga_emit_instruction( struct svga_shader_emitter *emit, 2527 unsigned position, 2528 const struct tgsi_full_instruction *insn ) 2529 { 2530 switch (insn->Instruction.Opcode) { 2531 2532 case TGSI_OPCODE_ARL: 2533 return emit_arl( emit, insn ); 2534 2535 case TGSI_OPCODE_TEX: 2536 case TGSI_OPCODE_TXB: 2537 case TGSI_OPCODE_TXP: 2538 case TGSI_OPCODE_TXL: 2539 case TGSI_OPCODE_TXD: 2540 return emit_tex( emit, insn ); 2541 2542 case TGSI_OPCODE_DDX: 2543 case TGSI_OPCODE_DDY: 2544 return emit_deriv( emit, insn ); 2545 2546 case TGSI_OPCODE_BGNSUB: 2547 return emit_bgnsub( emit, position, insn ); 2548 2549 case TGSI_OPCODE_ENDSUB: 2550 return TRUE; 2551 2552 case TGSI_OPCODE_CAL: 2553 return emit_call( emit, insn ); 2554 2555 case TGSI_OPCODE_FLR: 2556 return emit_floor( emit, insn ); 2557 2558 case TGSI_OPCODE_TRUNC: 2559 return emit_trunc_round( emit, insn, FALSE ); 2560 2561 case TGSI_OPCODE_ROUND: 2562 return emit_trunc_round( emit, insn, TRUE ); 2563 2564 case TGSI_OPCODE_CEIL: 2565 return emit_ceil( emit, insn ); 2566 2567 case TGSI_OPCODE_CMP: 2568 return emit_cmp( emit, insn ); 2569 2570 case TGSI_OPCODE_DIV: 2571 return emit_div( emit, insn ); 2572 2573 case TGSI_OPCODE_DP2: 2574 return emit_dp2( emit, insn ); 2575 2576 case TGSI_OPCODE_DPH: 2577 return emit_dph( emit, insn ); 2578 2579 case TGSI_OPCODE_NRM: 2580 return emit_nrm( emit, insn ); 2581 2582 case TGSI_OPCODE_COS: 2583 return emit_cos( emit, insn ); 2584 2585 case TGSI_OPCODE_SIN: 2586 return emit_sin( emit, insn ); 2587 2588 case TGSI_OPCODE_SCS: 2589 return emit_sincos( emit, insn ); 2590 2591 case TGSI_OPCODE_END: 2592 /* TGSI always finishes the main func with an END */ 2593 return emit_end( emit ); 2594 2595 case TGSI_OPCODE_KIL: 2596 return emit_kil( emit, insn ); 2597 2598 /* Selection opcodes. The underlying language is fairly 2599 * non-orthogonal about these. 2600 */ 2601 case TGSI_OPCODE_SEQ: 2602 return emit_select_op( emit, PIPE_FUNC_EQUAL, insn ); 2603 2604 case TGSI_OPCODE_SNE: 2605 return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn ); 2606 2607 case TGSI_OPCODE_SGT: 2608 return emit_select_op( emit, PIPE_FUNC_GREATER, insn ); 2609 2610 case TGSI_OPCODE_SGE: 2611 return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn ); 2612 2613 case TGSI_OPCODE_SLT: 2614 return emit_select_op( emit, PIPE_FUNC_LESS, insn ); 2615 2616 case TGSI_OPCODE_SLE: 2617 return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn ); 2618 2619 case TGSI_OPCODE_SUB: 2620 return emit_sub( emit, insn ); 2621 2622 case TGSI_OPCODE_POW: 2623 return emit_pow( emit, insn ); 2624 2625 case TGSI_OPCODE_EX2: 2626 return emit_ex2( emit, insn ); 2627 2628 case TGSI_OPCODE_EXP: 2629 return emit_exp( emit, insn ); 2630 2631 case TGSI_OPCODE_LOG: 2632 return emit_log( emit, insn ); 2633 2634 case TGSI_OPCODE_LG2: 2635 return emit_scalar_op1( emit, SVGA3DOP_LOG, insn ); 2636 2637 case TGSI_OPCODE_RSQ: 2638 return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn ); 2639 2640 case TGSI_OPCODE_RCP: 2641 return emit_scalar_op1( emit, SVGA3DOP_RCP, insn ); 2642 2643 case TGSI_OPCODE_CONT: 2644 case TGSI_OPCODE_RET: 2645 /* This is a noop -- we tell mesa that we can't support RET 2646 * within a function (early return), so this will always be 2647 * followed by an ENDSUB. 2648 */ 2649 return TRUE; 2650 2651 /* These aren't actually used by any of the frontends we care 2652 * about: 2653 */ 2654 case TGSI_OPCODE_CLAMP: 2655 case TGSI_OPCODE_AND: 2656 case TGSI_OPCODE_OR: 2657 case TGSI_OPCODE_I2F: 2658 case TGSI_OPCODE_NOT: 2659 case TGSI_OPCODE_SHL: 2660 case TGSI_OPCODE_ISHR: 2661 case TGSI_OPCODE_XOR: 2662 return FALSE; 2663 2664 case TGSI_OPCODE_IF: 2665 return emit_if( emit, insn ); 2666 case TGSI_OPCODE_ELSE: 2667 return emit_else( emit, insn ); 2668 case TGSI_OPCODE_ENDIF: 2669 return emit_endif( emit, insn ); 2670 2671 case TGSI_OPCODE_BGNLOOP: 2672 return emit_bgnloop2( emit, insn ); 2673 case TGSI_OPCODE_ENDLOOP: 2674 return emit_endloop2( emit, insn ); 2675 case TGSI_OPCODE_BRK: 2676 return emit_brk( emit, insn ); 2677 2678 case TGSI_OPCODE_XPD: 2679 return emit_xpd( emit, insn ); 2680 2681 case TGSI_OPCODE_KILP: 2682 return emit_kilp( emit, insn ); 2683 2684 case TGSI_OPCODE_DST: 2685 return emit_dst_insn( emit, insn ); 2686 2687 case TGSI_OPCODE_LIT: 2688 return emit_lit( emit, insn ); 2689 2690 case TGSI_OPCODE_LRP: 2691 return emit_lrp( emit, insn ); 2692 2693 case TGSI_OPCODE_SSG: 2694 return emit_ssg( emit, insn ); 2695 2696 default: { 2697 unsigned opcode = translate_opcode(insn->Instruction.Opcode); 2698 2699 if (opcode == SVGA3DOP_LAST_INST) 2700 return FALSE; 2701 2702 if (!emit_simple_instruction( emit, opcode, insn )) 2703 return FALSE; 2704 } 2705 } 2706 2707 return TRUE; 2708 } 2709 2710 2711 static boolean svga_emit_immediate( struct svga_shader_emitter *emit, 2712 struct tgsi_full_immediate *imm) 2713 { 2714 static const float id[4] = {0,0,0,1}; 2715 float value[4]; 2716 unsigned i; 2717 2718 assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5); 2719 for (i = 0; i < imm->Immediate.NrTokens - 1; i++) 2720 value[i] = imm->u[i].Float; 2721 2722 for ( ; i < 4; i++ ) 2723 value[i] = id[i]; 2724 2725 return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, 2726 emit->imm_start + emit->internal_imm_count++, 2727 value[0], value[1], value[2], value[3]); 2728 } 2729 2730 static boolean make_immediate( struct svga_shader_emitter *emit, 2731 float a, 2732 float b, 2733 float c, 2734 float d, 2735 struct src_register *out ) 2736 { 2737 unsigned idx = emit->nr_hw_float_const++; 2738 2739 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, 2740 idx, a, b, c, d )) 2741 return FALSE; 2742 2743 *out = src_register( SVGA3DREG_CONST, idx ); 2744 2745 return TRUE; 2746 } 2747 2748 static boolean emit_vs_preamble( struct svga_shader_emitter *emit ) 2749 { 2750 if (!emit->key.vkey.need_prescale) { 2751 if (!make_immediate( emit, 0, 0, .5, .5, 2752 &emit->imm_0055)) 2753 return FALSE; 2754 } 2755 2756 return TRUE; 2757 } 2758 2759 static boolean emit_ps_preamble( struct svga_shader_emitter *emit ) 2760 { 2761 if (emit->ps_reads_pos && emit->info.reads_z) { 2762 /* 2763 * Assemble the position from various bits of inputs. Depth and W are 2764 * passed in a texcoord this is due to D3D's vPos not hold Z or W. 2765 * Also fixup the perspective interpolation. 2766 * 2767 * temp_pos.xy = vPos.xy 2768 * temp_pos.w = rcp(texcoord1.w); 2769 * temp_pos.z = texcoord1.z * temp_pos.w; 2770 */ 2771 if (!submit_op1( emit, 2772 inst_token(SVGA3DOP_MOV), 2773 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_XY ), 2774 emit->ps_true_pos )) 2775 return FALSE; 2776 2777 if (!submit_op1( emit, 2778 inst_token(SVGA3DOP_RCP), 2779 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_W ), 2780 scalar( emit->ps_depth_pos, TGSI_SWIZZLE_W ) )) 2781 return FALSE; 2782 2783 if (!submit_op2( emit, 2784 inst_token(SVGA3DOP_MUL), 2785 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_Z ), 2786 scalar( emit->ps_depth_pos, TGSI_SWIZZLE_Z ), 2787 scalar( src(emit->ps_temp_pos), TGSI_SWIZZLE_W ) )) 2788 return FALSE; 2789 } 2790 2791 return TRUE; 2792 } 2793 2794 static boolean emit_ps_postamble( struct svga_shader_emitter *emit ) 2795 { 2796 unsigned i; 2797 2798 /* PS oDepth is incredibly fragile and it's very hard to catch the 2799 * types of usage that break it during shader emit. Easier just to 2800 * redirect the main program to a temporary and then only touch 2801 * oDepth with a hand-crafted MOV below. 2802 */ 2803 if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) { 2804 2805 if (!submit_op1( emit, 2806 inst_token(SVGA3DOP_MOV), 2807 emit->true_pos, 2808 scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) )) 2809 return FALSE; 2810 } 2811 2812 for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { 2813 if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) { 2814 2815 /* Potentially override output colors with white for XOR 2816 * logicop workaround. 2817 */ 2818 if (emit->unit == PIPE_SHADER_FRAGMENT && 2819 emit->key.fkey.white_fragments) { 2820 2821 struct src_register one = scalar( get_zero_immediate( emit ), 2822 TGSI_SWIZZLE_W ); 2823 2824 if (!submit_op1( emit, 2825 inst_token(SVGA3DOP_MOV), 2826 emit->true_col[i], 2827 one )) 2828 return FALSE; 2829 } 2830 else { 2831 if (!submit_op1( emit, 2832 inst_token(SVGA3DOP_MOV), 2833 emit->true_col[i], 2834 src(emit->temp_col[i]) )) 2835 return FALSE; 2836 } 2837 } 2838 } 2839 2840 return TRUE; 2841 } 2842 2843 static boolean emit_vs_postamble( struct svga_shader_emitter *emit ) 2844 { 2845 /* PSIZ output is incredibly fragile and it's very hard to catch 2846 * the types of usage that break it during shader emit. Easier 2847 * just to redirect the main program to a temporary and then only 2848 * touch PSIZ with a hand-crafted MOV below. 2849 */ 2850 if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) { 2851 if (!submit_op1( emit, 2852 inst_token(SVGA3DOP_MOV), 2853 emit->true_psiz, 2854 scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) )) 2855 return FALSE; 2856 } 2857 2858 /* Need to perform various manipulations on vertex position to cope 2859 * with the different GL and D3D clip spaces. 2860 */ 2861 if (emit->key.vkey.need_prescale) { 2862 SVGA3dShaderDestToken temp_pos = emit->temp_pos; 2863 SVGA3dShaderDestToken depth = emit->depth_pos; 2864 SVGA3dShaderDestToken pos = emit->true_pos; 2865 unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1; 2866 struct src_register prescale_scale = src_register( SVGA3DREG_CONST, 2867 offset + 0 ); 2868 struct src_register prescale_trans = src_register( SVGA3DREG_CONST, 2869 offset + 1 ); 2870 2871 if (!submit_op1( emit, 2872 inst_token(SVGA3DOP_MOV), 2873 writemask(depth, TGSI_WRITEMASK_W), 2874 scalar(src(temp_pos), TGSI_SWIZZLE_W) )) 2875 return FALSE; 2876 2877 /* MUL temp_pos.xyz, temp_pos, prescale.scale 2878 * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos 2879 * --> Note that prescale.trans.w == 0 2880 */ 2881 if (!submit_op2( emit, 2882 inst_token(SVGA3DOP_MUL), 2883 writemask(temp_pos, TGSI_WRITEMASK_XYZ), 2884 src(temp_pos), 2885 prescale_scale )) 2886 return FALSE; 2887 2888 if (!submit_op3( emit, 2889 inst_token(SVGA3DOP_MAD), 2890 pos, 2891 swizzle(src(temp_pos), 3, 3, 3, 3), 2892 prescale_trans, 2893 src(temp_pos))) 2894 return FALSE; 2895 2896 /* Also write to depth value */ 2897 if (!submit_op3( emit, 2898 inst_token(SVGA3DOP_MAD), 2899 writemask(depth, TGSI_WRITEMASK_Z), 2900 swizzle(src(temp_pos), 3, 3, 3, 3), 2901 prescale_trans, 2902 src(temp_pos) )) 2903 return FALSE; 2904 } 2905 else { 2906 SVGA3dShaderDestToken temp_pos = emit->temp_pos; 2907 SVGA3dShaderDestToken depth = emit->depth_pos; 2908 SVGA3dShaderDestToken pos = emit->true_pos; 2909 struct src_register imm_0055 = emit->imm_0055; 2910 2911 /* Adjust GL clipping coordinate space to hardware (D3D-style): 2912 * 2913 * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos 2914 * MOV result.position, temp_pos 2915 */ 2916 if (!submit_op2( emit, 2917 inst_token(SVGA3DOP_DP4), 2918 writemask(temp_pos, TGSI_WRITEMASK_Z), 2919 imm_0055, 2920 src(temp_pos) )) 2921 return FALSE; 2922 2923 if (!submit_op1( emit, 2924 inst_token(SVGA3DOP_MOV), 2925 pos, 2926 src(temp_pos) )) 2927 return FALSE; 2928 2929 /* Move the manipulated depth into the extra texcoord reg */ 2930 if (!submit_op1( emit, 2931 inst_token(SVGA3DOP_MOV), 2932 writemask(depth, TGSI_WRITEMASK_ZW), 2933 src(temp_pos) )) 2934 return FALSE; 2935 } 2936 2937 return TRUE; 2938 } 2939 2940 /* 2941 0: IF VFACE :4 2942 1: COLOR = FrontColor; 2943 2: ELSE 2944 3: COLOR = BackColor; 2945 4: ENDIF 2946 */ 2947 static boolean emit_light_twoside( struct svga_shader_emitter *emit ) 2948 { 2949 struct src_register vface, zero; 2950 struct src_register front[2]; 2951 struct src_register back[2]; 2952 SVGA3dShaderDestToken color[2]; 2953 int count = emit->internal_color_count; 2954 int i; 2955 SVGA3dShaderInstToken if_token; 2956 2957 if (count == 0) 2958 return TRUE; 2959 2960 vface = get_vface( emit ); 2961 zero = get_zero_immediate( emit ); 2962 2963 /* Can't use get_temp() to allocate the color reg as such 2964 * temporaries will be reclaimed after each instruction by the call 2965 * to reset_temp_regs(). 2966 */ 2967 for (i = 0; i < count; i++) { 2968 color[i] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ ); 2969 front[i] = emit->input_map[emit->internal_color_idx[i]]; 2970 2971 /* Back is always the next input: 2972 */ 2973 back[i] = front[i]; 2974 back[i].base.num = front[i].base.num + 1; 2975 2976 /* Reassign the input_map to the actual front-face color: 2977 */ 2978 emit->input_map[emit->internal_color_idx[i]] = src(color[i]); 2979 } 2980 2981 if_token = inst_token( SVGA3DOP_IFC ); 2982 2983 if (emit->key.fkey.front_ccw) 2984 if_token.control = SVGA3DOPCOMP_LT; 2985 else 2986 if_token.control = SVGA3DOPCOMP_GT; 2987 2988 zero = scalar(zero, TGSI_SWIZZLE_X); 2989 2990 if (!(emit_instruction( emit, if_token ) && 2991 emit_src( emit, vface ) && 2992 emit_src( emit, zero ) )) 2993 return FALSE; 2994 2995 for (i = 0; i < count; i++) { 2996 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] )) 2997 return FALSE; 2998 } 2999 3000 if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE)))) 3001 return FALSE; 3002 3003 for (i = 0; i < count; i++) { 3004 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] )) 3005 return FALSE; 3006 } 3007 3008 if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) )) 3009 return FALSE; 3010 3011 return TRUE; 3012 } 3013 3014 /* 3015 0: SETP_GT TEMP, VFACE, 0 3016 where TEMP is a fake frontface register 3017 */ 3018 static boolean emit_frontface( struct svga_shader_emitter *emit ) 3019 { 3020 struct src_register vface, zero; 3021 SVGA3dShaderDestToken temp; 3022 struct src_register pass, fail; 3023 3024 vface = get_vface( emit ); 3025 zero = get_zero_immediate( emit ); 3026 3027 /* Can't use get_temp() to allocate the fake frontface reg as such 3028 * temporaries will be reclaimed after each instruction by the call 3029 * to reset_temp_regs(). 3030 */ 3031 temp = dst_register( SVGA3DREG_TEMP, 3032 emit->nr_hw_temp++ ); 3033 3034 if (emit->key.fkey.front_ccw) { 3035 pass = scalar( zero, TGSI_SWIZZLE_X ); 3036 fail = scalar( zero, TGSI_SWIZZLE_W ); 3037 } else { 3038 pass = scalar( zero, TGSI_SWIZZLE_W ); 3039 fail = scalar( zero, TGSI_SWIZZLE_X ); 3040 } 3041 3042 if (!emit_conditional(emit, PIPE_FUNC_GREATER, 3043 temp, vface, scalar( zero, TGSI_SWIZZLE_X ), 3044 pass, fail)) 3045 return FALSE; 3046 3047 /* Reassign the input_map to the actual front-face color: 3048 */ 3049 emit->input_map[emit->internal_frontface_idx] = src(temp); 3050 3051 return TRUE; 3052 } 3053 3054 3055 /** 3056 * Emit code to invert the T component of the incoming texture coordinate. 3057 * This is used for drawing point sprites when 3058 * pipe_rasterizer_state::sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT. 3059 */ 3060 static boolean emit_inverted_texcoords( struct svga_shader_emitter *emit ) 3061 { 3062 struct src_register zero = get_zero_immediate(emit); 3063 struct src_register pos_neg_one = get_pos_neg_one_immediate( emit ); 3064 unsigned inverted_texcoords = emit->inverted_texcoords; 3065 3066 while (inverted_texcoords) { 3067 const unsigned unit = ffs(inverted_texcoords) - 1; 3068 3069 assert(emit->inverted_texcoords & (1 << unit)); 3070 3071 assert(unit < Elements(emit->ps_true_texcoord)); 3072 3073 assert(unit < Elements(emit->ps_inverted_texcoord_input)); 3074 3075 assert(emit->ps_inverted_texcoord_input[unit] 3076 < Elements(emit->input_map)); 3077 3078 /* inverted = coord * (1, -1, 1, 1) + (0, 1, 0, 0) */ 3079 if (!submit_op3(emit, 3080 inst_token(SVGA3DOP_MAD), 3081 dst(emit->ps_inverted_texcoord[unit]), 3082 emit->ps_true_texcoord[unit], 3083 swizzle(pos_neg_one, 0, 3, 0, 0), /* (1, -1, 1, 1) */ 3084 swizzle(zero, 0, 3, 0, 0))) /* (0, 1, 0, 0) */ 3085 return FALSE; 3086 3087 /* Reassign the input_map entry to the new texcoord register */ 3088 emit->input_map[emit->ps_inverted_texcoord_input[unit]] = 3089 emit->ps_inverted_texcoord[unit]; 3090 3091 inverted_texcoords &= ~(1 << unit); 3092 } 3093 3094 return TRUE; 3095 } 3096 3097 3098 static INLINE boolean 3099 needs_to_create_zero( struct svga_shader_emitter *emit ) 3100 { 3101 int i; 3102 3103 if (emit->unit == PIPE_SHADER_FRAGMENT) { 3104 if (emit->key.fkey.light_twoside) 3105 return TRUE; 3106 3107 if (emit->key.fkey.white_fragments) 3108 return TRUE; 3109 3110 if (emit->emit_frontface) 3111 return TRUE; 3112 3113 if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 || 3114 emit->info.opcode_count[TGSI_OPCODE_SSG] >= 1 || 3115 emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1) 3116 return TRUE; 3117 3118 if (emit->inverted_texcoords) 3119 return TRUE; 3120 3121 /* look for any PIPE_SWIZZLE_ZERO/ONE terms */ 3122 for (i = 0; i < emit->key.fkey.num_textures; i++) { 3123 if (emit->key.fkey.tex[i].swizzle_r > PIPE_SWIZZLE_ALPHA || 3124 emit->key.fkey.tex[i].swizzle_g > PIPE_SWIZZLE_ALPHA || 3125 emit->key.fkey.tex[i].swizzle_b > PIPE_SWIZZLE_ALPHA || 3126 emit->key.fkey.tex[i].swizzle_a > PIPE_SWIZZLE_ALPHA) 3127 return TRUE; 3128 } 3129 3130 for (i = 0; i < emit->key.fkey.num_textures; i++) { 3131 if (emit->key.fkey.tex[i].compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) 3132 return TRUE; 3133 } 3134 } 3135 3136 if (emit->unit == PIPE_SHADER_VERTEX) { 3137 if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1) 3138 return TRUE; 3139 } 3140 3141 if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 || 3142 emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 || 3143 emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 || 3144 emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 || 3145 emit->info.opcode_count[TGSI_OPCODE_ROUND] >= 1 || 3146 emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 || 3147 emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 || 3148 emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 || 3149 emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 || 3150 emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 || 3151 emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 || 3152 emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 || 3153 emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 || 3154 emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 || 3155 emit->info.opcode_count[TGSI_OPCODE_KILP] >= 1) 3156 return TRUE; 3157 3158 return FALSE; 3159 } 3160 3161 static INLINE boolean 3162 needs_to_create_loop_const( struct svga_shader_emitter *emit ) 3163 { 3164 return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1); 3165 } 3166 3167 static INLINE boolean 3168 needs_to_create_arl_consts( struct svga_shader_emitter *emit ) 3169 { 3170 return (emit->num_arl_consts > 0); 3171 } 3172 3173 static INLINE boolean 3174 pre_parse_add_indirect( struct svga_shader_emitter *emit, 3175 int num, int current_arl) 3176 { 3177 int i; 3178 assert(num < 0); 3179 3180 for (i = 0; i < emit->num_arl_consts; ++i) { 3181 if (emit->arl_consts[i].arl_num == current_arl) 3182 break; 3183 } 3184 /* new entry */ 3185 if (emit->num_arl_consts == i) { 3186 ++emit->num_arl_consts; 3187 } 3188 emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ? 3189 num : 3190 emit->arl_consts[i].number; 3191 emit->arl_consts[i].arl_num = current_arl; 3192 return TRUE; 3193 } 3194 3195 static boolean 3196 pre_parse_instruction( struct svga_shader_emitter *emit, 3197 const struct tgsi_full_instruction *insn, 3198 int current_arl) 3199 { 3200 if (insn->Src[0].Register.Indirect && 3201 insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) { 3202 const struct tgsi_full_src_register *reg = &insn->Src[0]; 3203 if (reg->Register.Index < 0) { 3204 pre_parse_add_indirect(emit, reg->Register.Index, current_arl); 3205 } 3206 } 3207 3208 if (insn->Src[1].Register.Indirect && 3209 insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) { 3210 const struct tgsi_full_src_register *reg = &insn->Src[1]; 3211 if (reg->Register.Index < 0) { 3212 pre_parse_add_indirect(emit, reg->Register.Index, current_arl); 3213 } 3214 } 3215 3216 if (insn->Src[2].Register.Indirect && 3217 insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) { 3218 const struct tgsi_full_src_register *reg = &insn->Src[2]; 3219 if (reg->Register.Index < 0) { 3220 pre_parse_add_indirect(emit, reg->Register.Index, current_arl); 3221 } 3222 } 3223 3224 return TRUE; 3225 } 3226 3227 static boolean 3228 pre_parse_tokens( struct svga_shader_emitter *emit, 3229 const struct tgsi_token *tokens ) 3230 { 3231 struct tgsi_parse_context parse; 3232 int current_arl = 0; 3233 3234 tgsi_parse_init( &parse, tokens ); 3235 3236 while (!tgsi_parse_end_of_tokens( &parse )) { 3237 tgsi_parse_token( &parse ); 3238 switch (parse.FullToken.Token.Type) { 3239 case TGSI_TOKEN_TYPE_IMMEDIATE: 3240 case TGSI_TOKEN_TYPE_DECLARATION: 3241 break; 3242 case TGSI_TOKEN_TYPE_INSTRUCTION: 3243 if (parse.FullToken.FullInstruction.Instruction.Opcode == 3244 TGSI_OPCODE_ARL) { 3245 ++current_arl; 3246 } 3247 if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction, 3248 current_arl )) 3249 return FALSE; 3250 break; 3251 default: 3252 break; 3253 } 3254 3255 } 3256 return TRUE; 3257 } 3258 3259 static boolean svga_shader_emit_helpers( struct svga_shader_emitter *emit ) 3260 3261 { 3262 if (needs_to_create_zero( emit )) { 3263 create_zero_immediate( emit ); 3264 } 3265 if (needs_to_create_loop_const( emit )) { 3266 create_loop_const( emit ); 3267 } 3268 if (needs_to_create_arl_consts( emit )) { 3269 create_arl_consts( emit ); 3270 } 3271 3272 if (emit->unit == PIPE_SHADER_FRAGMENT) { 3273 if (!emit_ps_preamble( emit )) 3274 return FALSE; 3275 3276 if (emit->key.fkey.light_twoside) { 3277 if (!emit_light_twoside( emit )) 3278 return FALSE; 3279 } 3280 if (emit->emit_frontface) { 3281 if (!emit_frontface( emit )) 3282 return FALSE; 3283 } 3284 if (emit->inverted_texcoords) { 3285 if (!emit_inverted_texcoords( emit )) 3286 return FALSE; 3287 } 3288 } 3289 3290 return TRUE; 3291 } 3292 3293 boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit, 3294 const struct tgsi_token *tokens ) 3295 { 3296 struct tgsi_parse_context parse; 3297 boolean ret = TRUE; 3298 boolean helpers_emitted = FALSE; 3299 unsigned line_nr = 0; 3300 3301 tgsi_parse_init( &parse, tokens ); 3302 emit->internal_imm_count = 0; 3303 3304 if (emit->unit == PIPE_SHADER_VERTEX) { 3305 ret = emit_vs_preamble( emit ); 3306 if (!ret) 3307 goto done; 3308 } 3309 3310 pre_parse_tokens(emit, tokens); 3311 3312 while (!tgsi_parse_end_of_tokens( &parse )) { 3313 tgsi_parse_token( &parse ); 3314 3315 switch (parse.FullToken.Token.Type) { 3316 case TGSI_TOKEN_TYPE_IMMEDIATE: 3317 ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate ); 3318 if (!ret) 3319 goto done; 3320 break; 3321 3322 case TGSI_TOKEN_TYPE_DECLARATION: 3323 ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration ); 3324 if (!ret) 3325 goto done; 3326 break; 3327 3328 case TGSI_TOKEN_TYPE_INSTRUCTION: 3329 if (!helpers_emitted) { 3330 if (!svga_shader_emit_helpers( emit )) 3331 goto done; 3332 helpers_emitted = TRUE; 3333 } 3334 ret = svga_emit_instruction( emit, 3335 line_nr++, 3336 &parse.FullToken.FullInstruction ); 3337 if (!ret) 3338 goto done; 3339 break; 3340 default: 3341 break; 3342 } 3343 3344 reset_temp_regs( emit ); 3345 } 3346 3347 /* Need to terminate the current subroutine. Note that the 3348 * hardware doesn't tolerate shaders without sub-routines 3349 * terminating with RET+END. 3350 */ 3351 if (!emit->in_main_func) { 3352 ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) ); 3353 if (!ret) 3354 goto done; 3355 } 3356 3357 assert(emit->dynamic_branching_level == 0); 3358 3359 /* Need to terminate the whole shader: 3360 */ 3361 ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) ); 3362 if (!ret) 3363 goto done; 3364 3365 done: 3366 tgsi_parse_free( &parse ); 3367 return ret; 3368 } 3369