1 /********************************************************** 2 * Copyright 1998-2013 VMware, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **********************************************************/ 25 26 /** 27 * @file svga_tgsi_vgpu10.c 28 * 29 * TGSI -> VGPU10 shader translation. 30 * 31 * \author Mingcheng Chen 32 * \author Brian Paul 33 */ 34 35 #include "pipe/p_compiler.h" 36 #include "pipe/p_shader_tokens.h" 37 #include "pipe/p_defines.h" 38 #include "tgsi/tgsi_build.h" 39 #include "tgsi/tgsi_dump.h" 40 #include "tgsi/tgsi_info.h" 41 #include "tgsi/tgsi_parse.h" 42 #include "tgsi/tgsi_scan.h" 43 #include "tgsi/tgsi_two_side.h" 44 #include "tgsi/tgsi_aa_point.h" 45 #include "tgsi/tgsi_util.h" 46 #include "util/u_math.h" 47 #include "util/u_memory.h" 48 #include "util/u_bitmask.h" 49 #include "util/u_debug.h" 50 #include "util/u_pstipple.h" 51 52 #include "svga_context.h" 53 #include "svga_debug.h" 54 #include "svga_link.h" 55 #include "svga_shader.h" 56 #include "svga_tgsi.h" 57 58 #include "VGPU10ShaderTokens.h" 59 60 61 #define INVALID_INDEX 99999 62 #define MAX_INTERNAL_TEMPS 3 63 #define MAX_SYSTEM_VALUES 4 64 #define MAX_IMMEDIATE_COUNT \ 65 (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4) 66 #define MAX_TEMP_ARRAYS 64 /* Enough? */ 67 68 69 /** 70 * Clipping is complicated. There's four different cases which we 71 * handle during VS/GS shader translation: 72 */ 73 enum clipping_mode 74 { 75 CLIP_NONE, /**< No clipping enabled */ 76 CLIP_LEGACY, /**< The shader has no clipping declarations or code but 77 * one or more user-defined clip planes are enabled. We 78 * generate extra code to emit clip distances. 79 */ 80 CLIP_DISTANCE, /**< The shader already declares clip distance output 81 * registers and has code to write to them. 82 */ 83 CLIP_VERTEX /**< The shader declares a clip vertex output register and 84 * has code that writes to the register. We convert the 85 * clipvertex position into one or more clip distances. 86 */ 87 }; 88 89 90 struct svga_shader_emitter_v10 91 { 92 /* The token output buffer */ 93 unsigned size; 94 char *buf; 95 char *ptr; 96 97 /* Information about the shader and state (does not change) */ 98 struct svga_compile_key key; 99 struct tgsi_shader_info info; 100 unsigned unit; 101 102 unsigned inst_start_token; 103 boolean discard_instruction; /**< throw away current instruction? */ 104 105 union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4]; 106 unsigned num_immediates; /**< Number of immediates emitted */ 107 unsigned common_immediate_pos[8]; /**< literals for common immediates */ 108 unsigned num_common_immediates; 109 boolean immediates_emitted; 110 111 unsigned num_outputs; /**< include any extra outputs */ 112 /** The first extra output is reserved for 113 * non-adjusted vertex position for 114 * stream output purpose 115 */ 116 117 /* Temporary Registers */ 118 unsigned num_shader_temps; /**< num of temps used by original shader */ 119 unsigned internal_temp_count; /**< currently allocated internal temps */ 120 struct { 121 unsigned start, size; 122 } temp_arrays[MAX_TEMP_ARRAYS]; 123 unsigned num_temp_arrays; 124 125 /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */ 126 struct { 127 unsigned arrayId, index; 128 } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */ 129 130 /** Number of constants used by original shader for each constant buffer. 131 * The size should probably always match with that of svga_state.constbufs. 132 */ 133 unsigned num_shader_consts[SVGA_MAX_CONST_BUFS]; 134 135 /* Samplers */ 136 unsigned num_samplers; 137 boolean sampler_view[PIPE_MAX_SAMPLERS]; /**< True if sampler view exists*/ 138 ubyte sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */ 139 ubyte sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */ 140 141 /* Address regs (really implemented with temps) */ 142 unsigned num_address_regs; 143 unsigned address_reg_index[MAX_VGPU10_ADDR_REGS]; 144 145 /* Output register usage masks */ 146 ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS]; 147 148 /* To map TGSI system value index to VGPU shader input indexes */ 149 ubyte system_value_indexes[MAX_SYSTEM_VALUES]; 150 151 struct { 152 /* vertex position scale/translation */ 153 unsigned out_index; /**< the real position output reg */ 154 unsigned tmp_index; /**< the fake/temp position output reg */ 155 unsigned so_index; /**< the non-adjusted position output reg */ 156 unsigned prescale_scale_index, prescale_trans_index; 157 boolean need_prescale; 158 } vposition; 159 160 /* For vertex shaders only */ 161 struct { 162 /* viewport constant */ 163 unsigned viewport_index; 164 165 /* temp index of adjusted vertex attributes */ 166 unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS]; 167 } vs; 168 169 /* For fragment shaders only */ 170 struct { 171 unsigned color_out_index[PIPE_MAX_COLOR_BUFS]; /**< the real color output regs */ 172 unsigned num_color_outputs; 173 unsigned color_tmp_index; /**< fake/temp color output reg */ 174 unsigned alpha_ref_index; /**< immediate constant for alpha ref */ 175 176 /* front-face */ 177 unsigned face_input_index; /**< real fragment shader face reg (bool) */ 178 unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */ 179 180 unsigned pstipple_sampler_unit; 181 182 unsigned fragcoord_input_index; /**< real fragment position input reg */ 183 unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */ 184 185 /** Which texture units are doing shadow comparison in the FS code */ 186 unsigned shadow_compare_units; 187 } fs; 188 189 /* For geometry shaders only */ 190 struct { 191 VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */ 192 VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */ 193 unsigned input_size; /**< size of input arrays */ 194 unsigned prim_id_index; /**< primitive id register index */ 195 unsigned max_out_vertices; /**< maximum number of output vertices */ 196 } gs; 197 198 /* For vertex or geometry shaders */ 199 enum clipping_mode clip_mode; 200 unsigned clip_dist_out_index; /**< clip distance output register index */ 201 unsigned clip_dist_tmp_index; /**< clip distance temporary register */ 202 unsigned clip_dist_so_index; /**< clip distance shadow copy */ 203 204 /** Index of temporary holding the clipvertex coordinate */ 205 unsigned clip_vertex_out_index; /**< clip vertex output register index */ 206 unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */ 207 208 /* user clip plane constant slot indexes */ 209 unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES]; 210 211 unsigned num_output_writes; 212 boolean constant_color_output; 213 214 boolean uses_flat_interp; 215 216 /* For all shaders: const reg index for RECT coord scaling */ 217 unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS]; 218 219 /* For all shaders: const reg index for texture buffer size */ 220 unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS]; 221 222 /* VS/GS/FS Linkage info */ 223 struct shader_linkage linkage; 224 225 bool register_overflow; /**< Set if we exceed a VGPU10 register limit */ 226 }; 227 228 229 static boolean 230 emit_post_helpers(struct svga_shader_emitter_v10 *emit); 231 232 static boolean 233 emit_vertex(struct svga_shader_emitter_v10 *emit, 234 const struct tgsi_full_instruction *inst); 235 236 static char err_buf[128]; 237 238 static boolean 239 expand(struct svga_shader_emitter_v10 *emit) 240 { 241 char *new_buf; 242 unsigned newsize = emit->size * 2; 243 244 if (emit->buf != err_buf) 245 new_buf = REALLOC(emit->buf, emit->size, newsize); 246 else 247 new_buf = NULL; 248 249 if (!new_buf) { 250 emit->ptr = err_buf; 251 emit->buf = err_buf; 252 emit->size = sizeof(err_buf); 253 return FALSE; 254 } 255 256 emit->size = newsize; 257 emit->ptr = new_buf + (emit->ptr - emit->buf); 258 emit->buf = new_buf; 259 return TRUE; 260 } 261 262 /** 263 * Create and initialize a new svga_shader_emitter_v10 object. 264 */ 265 static struct svga_shader_emitter_v10 * 266 alloc_emitter(void) 267 { 268 struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit)); 269 270 if (!emit) 271 return NULL; 272 273 /* to initialize the output buffer */ 274 emit->size = 512; 275 if (!expand(emit)) { 276 FREE(emit); 277 return NULL; 278 } 279 return emit; 280 } 281 282 /** 283 * Free an svga_shader_emitter_v10 object. 284 */ 285 static void 286 free_emitter(struct svga_shader_emitter_v10 *emit) 287 { 288 assert(emit); 289 FREE(emit->buf); /* will be NULL if translation succeeded */ 290 FREE(emit); 291 } 292 293 static inline boolean 294 reserve(struct svga_shader_emitter_v10 *emit, 295 unsigned nr_dwords) 296 { 297 while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) { 298 if (!expand(emit)) 299 return FALSE; 300 } 301 302 return TRUE; 303 } 304 305 static boolean 306 emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword) 307 { 308 if (!reserve(emit, 1)) 309 return FALSE; 310 311 *(uint32 *)emit->ptr = dword; 312 emit->ptr += sizeof dword; 313 return TRUE; 314 } 315 316 static boolean 317 emit_dwords(struct svga_shader_emitter_v10 *emit, 318 const uint32 *dwords, 319 unsigned nr) 320 { 321 if (!reserve(emit, nr)) 322 return FALSE; 323 324 memcpy(emit->ptr, dwords, nr * sizeof *dwords); 325 emit->ptr += nr * sizeof *dwords; 326 return TRUE; 327 } 328 329 /** Return the number of tokens in the emitter's buffer */ 330 static unsigned 331 emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit) 332 { 333 return (emit->ptr - emit->buf) / sizeof(unsigned); 334 } 335 336 337 /** 338 * Check for register overflow. If we overflow we'll set an 339 * error flag. This function can be called for register declarations 340 * or use as src/dst instruction operands. 341 * \param type register type. One of VGPU10_OPERAND_TYPE_x 342 or VGPU10_OPCODE_DCL_x 343 * \param index the register index 344 */ 345 static void 346 check_register_index(struct svga_shader_emitter_v10 *emit, 347 unsigned operandType, unsigned index) 348 { 349 bool overflow_before = emit->register_overflow; 350 351 switch (operandType) { 352 case VGPU10_OPERAND_TYPE_TEMP: 353 case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP: 354 case VGPU10_OPCODE_DCL_TEMPS: 355 if (index >= VGPU10_MAX_TEMPS) { 356 emit->register_overflow = TRUE; 357 } 358 break; 359 case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER: 360 case VGPU10_OPCODE_DCL_CONSTANT_BUFFER: 361 if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { 362 emit->register_overflow = TRUE; 363 } 364 break; 365 case VGPU10_OPERAND_TYPE_INPUT: 366 case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID: 367 case VGPU10_OPCODE_DCL_INPUT: 368 case VGPU10_OPCODE_DCL_INPUT_SGV: 369 case VGPU10_OPCODE_DCL_INPUT_SIV: 370 case VGPU10_OPCODE_DCL_INPUT_PS: 371 case VGPU10_OPCODE_DCL_INPUT_PS_SGV: 372 case VGPU10_OPCODE_DCL_INPUT_PS_SIV: 373 if ((emit->unit == PIPE_SHADER_VERTEX && 374 index >= VGPU10_MAX_VS_INPUTS) || 375 (emit->unit == PIPE_SHADER_GEOMETRY && 376 index >= VGPU10_MAX_GS_INPUTS) || 377 (emit->unit == PIPE_SHADER_FRAGMENT && 378 index >= VGPU10_MAX_FS_INPUTS)) { 379 emit->register_overflow = TRUE; 380 } 381 break; 382 case VGPU10_OPERAND_TYPE_OUTPUT: 383 case VGPU10_OPCODE_DCL_OUTPUT: 384 case VGPU10_OPCODE_DCL_OUTPUT_SGV: 385 case VGPU10_OPCODE_DCL_OUTPUT_SIV: 386 if ((emit->unit == PIPE_SHADER_VERTEX && 387 index >= VGPU10_MAX_VS_OUTPUTS) || 388 (emit->unit == PIPE_SHADER_GEOMETRY && 389 index >= VGPU10_MAX_GS_OUTPUTS) || 390 (emit->unit == PIPE_SHADER_FRAGMENT && 391 index >= VGPU10_MAX_FS_OUTPUTS)) { 392 emit->register_overflow = TRUE; 393 } 394 break; 395 case VGPU10_OPERAND_TYPE_SAMPLER: 396 case VGPU10_OPCODE_DCL_SAMPLER: 397 if (index >= VGPU10_MAX_SAMPLERS) { 398 emit->register_overflow = TRUE; 399 } 400 break; 401 case VGPU10_OPERAND_TYPE_RESOURCE: 402 case VGPU10_OPCODE_DCL_RESOURCE: 403 if (index >= VGPU10_MAX_RESOURCES) { 404 emit->register_overflow = TRUE; 405 } 406 break; 407 case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: 408 if (index >= MAX_IMMEDIATE_COUNT) { 409 emit->register_overflow = TRUE; 410 } 411 break; 412 default: 413 assert(0); 414 ; /* nothing */ 415 } 416 417 if (emit->register_overflow && !overflow_before) { 418 debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n", 419 operandType, index); 420 } 421 } 422 423 424 /** 425 * Examine misc state to determine the clipping mode. 426 */ 427 static void 428 determine_clipping_mode(struct svga_shader_emitter_v10 *emit) 429 { 430 if (emit->info.num_written_clipdistance > 0) { 431 emit->clip_mode = CLIP_DISTANCE; 432 } 433 else if (emit->info.writes_clipvertex) { 434 emit->clip_mode = CLIP_VERTEX; 435 } 436 else if (emit->key.clip_plane_enable) { 437 emit->clip_mode = CLIP_LEGACY; 438 } 439 else { 440 emit->clip_mode = CLIP_NONE; 441 } 442 } 443 444 445 /** 446 * For clip distance register declarations and clip distance register 447 * writes we need to mask the declaration usage or instruction writemask 448 * (respectively) against the set of the really-enabled clipping planes. 449 * 450 * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables 451 * has a VS that writes to all 8 clip distance registers, but the plane enable 452 * flags are a subset of that. 453 * 454 * This function is used to apply the plane enable flags to the register 455 * declaration or instruction writemask. 456 * 457 * \param writemask the declaration usage mask or instruction writemask 458 * \param clip_reg_index which clip plane register is being declared/written. 459 * The legal values are 0 and 1 (two clip planes per 460 * register, for a total of 8 clip planes) 461 */ 462 static unsigned 463 apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit, 464 unsigned writemask, unsigned clip_reg_index) 465 { 466 unsigned shift; 467 468 assert(clip_reg_index < 2); 469 470 /* four clip planes per clip register: */ 471 shift = clip_reg_index * 4; 472 writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf); 473 474 return writemask; 475 } 476 477 478 /** 479 * Translate gallium shader type into VGPU10 type. 480 */ 481 static VGPU10_PROGRAM_TYPE 482 translate_shader_type(unsigned type) 483 { 484 switch (type) { 485 case PIPE_SHADER_VERTEX: 486 return VGPU10_VERTEX_SHADER; 487 case PIPE_SHADER_GEOMETRY: 488 return VGPU10_GEOMETRY_SHADER; 489 case PIPE_SHADER_FRAGMENT: 490 return VGPU10_PIXEL_SHADER; 491 default: 492 assert(!"Unexpected shader type"); 493 return VGPU10_VERTEX_SHADER; 494 } 495 } 496 497 498 /** 499 * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x 500 * Note: we only need to translate the opcodes for "simple" instructions, 501 * as seen below. All other opcodes are handled/translated specially. 502 */ 503 static VGPU10_OPCODE_TYPE 504 translate_opcode(unsigned opcode) 505 { 506 switch (opcode) { 507 case TGSI_OPCODE_MOV: 508 return VGPU10_OPCODE_MOV; 509 case TGSI_OPCODE_MUL: 510 return VGPU10_OPCODE_MUL; 511 case TGSI_OPCODE_ADD: 512 return VGPU10_OPCODE_ADD; 513 case TGSI_OPCODE_DP3: 514 return VGPU10_OPCODE_DP3; 515 case TGSI_OPCODE_DP4: 516 return VGPU10_OPCODE_DP4; 517 case TGSI_OPCODE_MIN: 518 return VGPU10_OPCODE_MIN; 519 case TGSI_OPCODE_MAX: 520 return VGPU10_OPCODE_MAX; 521 case TGSI_OPCODE_MAD: 522 return VGPU10_OPCODE_MAD; 523 case TGSI_OPCODE_SQRT: 524 return VGPU10_OPCODE_SQRT; 525 case TGSI_OPCODE_FRC: 526 return VGPU10_OPCODE_FRC; 527 case TGSI_OPCODE_FLR: 528 return VGPU10_OPCODE_ROUND_NI; 529 case TGSI_OPCODE_FSEQ: 530 return VGPU10_OPCODE_EQ; 531 case TGSI_OPCODE_FSGE: 532 return VGPU10_OPCODE_GE; 533 case TGSI_OPCODE_FSNE: 534 return VGPU10_OPCODE_NE; 535 case TGSI_OPCODE_DDX: 536 return VGPU10_OPCODE_DERIV_RTX; 537 case TGSI_OPCODE_DDY: 538 return VGPU10_OPCODE_DERIV_RTY; 539 case TGSI_OPCODE_RET: 540 return VGPU10_OPCODE_RET; 541 case TGSI_OPCODE_DIV: 542 return VGPU10_OPCODE_DIV; 543 case TGSI_OPCODE_IDIV: 544 return VGPU10_OPCODE_IDIV; 545 case TGSI_OPCODE_DP2: 546 return VGPU10_OPCODE_DP2; 547 case TGSI_OPCODE_BRK: 548 return VGPU10_OPCODE_BREAK; 549 case TGSI_OPCODE_IF: 550 return VGPU10_OPCODE_IF; 551 case TGSI_OPCODE_ELSE: 552 return VGPU10_OPCODE_ELSE; 553 case TGSI_OPCODE_ENDIF: 554 return VGPU10_OPCODE_ENDIF; 555 case TGSI_OPCODE_CEIL: 556 return VGPU10_OPCODE_ROUND_PI; 557 case TGSI_OPCODE_I2F: 558 return VGPU10_OPCODE_ITOF; 559 case TGSI_OPCODE_NOT: 560 return VGPU10_OPCODE_NOT; 561 case TGSI_OPCODE_TRUNC: 562 return VGPU10_OPCODE_ROUND_Z; 563 case TGSI_OPCODE_SHL: 564 return VGPU10_OPCODE_ISHL; 565 case TGSI_OPCODE_AND: 566 return VGPU10_OPCODE_AND; 567 case TGSI_OPCODE_OR: 568 return VGPU10_OPCODE_OR; 569 case TGSI_OPCODE_XOR: 570 return VGPU10_OPCODE_XOR; 571 case TGSI_OPCODE_CONT: 572 return VGPU10_OPCODE_CONTINUE; 573 case TGSI_OPCODE_EMIT: 574 return VGPU10_OPCODE_EMIT; 575 case TGSI_OPCODE_ENDPRIM: 576 return VGPU10_OPCODE_CUT; 577 case TGSI_OPCODE_BGNLOOP: 578 return VGPU10_OPCODE_LOOP; 579 case TGSI_OPCODE_ENDLOOP: 580 return VGPU10_OPCODE_ENDLOOP; 581 case TGSI_OPCODE_ENDSUB: 582 return VGPU10_OPCODE_RET; 583 case TGSI_OPCODE_NOP: 584 return VGPU10_OPCODE_NOP; 585 case TGSI_OPCODE_END: 586 return VGPU10_OPCODE_RET; 587 case TGSI_OPCODE_F2I: 588 return VGPU10_OPCODE_FTOI; 589 case TGSI_OPCODE_IMAX: 590 return VGPU10_OPCODE_IMAX; 591 case TGSI_OPCODE_IMIN: 592 return VGPU10_OPCODE_IMIN; 593 case TGSI_OPCODE_UDIV: 594 case TGSI_OPCODE_UMOD: 595 case TGSI_OPCODE_MOD: 596 return VGPU10_OPCODE_UDIV; 597 case TGSI_OPCODE_IMUL_HI: 598 return VGPU10_OPCODE_IMUL; 599 case TGSI_OPCODE_INEG: 600 return VGPU10_OPCODE_INEG; 601 case TGSI_OPCODE_ISHR: 602 return VGPU10_OPCODE_ISHR; 603 case TGSI_OPCODE_ISGE: 604 return VGPU10_OPCODE_IGE; 605 case TGSI_OPCODE_ISLT: 606 return VGPU10_OPCODE_ILT; 607 case TGSI_OPCODE_F2U: 608 return VGPU10_OPCODE_FTOU; 609 case TGSI_OPCODE_UADD: 610 return VGPU10_OPCODE_IADD; 611 case TGSI_OPCODE_U2F: 612 return VGPU10_OPCODE_UTOF; 613 case TGSI_OPCODE_UCMP: 614 return VGPU10_OPCODE_MOVC; 615 case TGSI_OPCODE_UMAD: 616 return VGPU10_OPCODE_UMAD; 617 case TGSI_OPCODE_UMAX: 618 return VGPU10_OPCODE_UMAX; 619 case TGSI_OPCODE_UMIN: 620 return VGPU10_OPCODE_UMIN; 621 case TGSI_OPCODE_UMUL: 622 case TGSI_OPCODE_UMUL_HI: 623 return VGPU10_OPCODE_UMUL; 624 case TGSI_OPCODE_USEQ: 625 return VGPU10_OPCODE_IEQ; 626 case TGSI_OPCODE_USGE: 627 return VGPU10_OPCODE_UGE; 628 case TGSI_OPCODE_USHR: 629 return VGPU10_OPCODE_USHR; 630 case TGSI_OPCODE_USLT: 631 return VGPU10_OPCODE_ULT; 632 case TGSI_OPCODE_USNE: 633 return VGPU10_OPCODE_INE; 634 case TGSI_OPCODE_SWITCH: 635 return VGPU10_OPCODE_SWITCH; 636 case TGSI_OPCODE_CASE: 637 return VGPU10_OPCODE_CASE; 638 case TGSI_OPCODE_DEFAULT: 639 return VGPU10_OPCODE_DEFAULT; 640 case TGSI_OPCODE_ENDSWITCH: 641 return VGPU10_OPCODE_ENDSWITCH; 642 case TGSI_OPCODE_FSLT: 643 return VGPU10_OPCODE_LT; 644 case TGSI_OPCODE_ROUND: 645 return VGPU10_OPCODE_ROUND_NE; 646 default: 647 assert(!"Unexpected TGSI opcode in translate_opcode()"); 648 return VGPU10_OPCODE_NOP; 649 } 650 } 651 652 653 /** 654 * Translate a TGSI register file type into a VGPU10 operand type. 655 * \param array is the TGSI_FILE_TEMPORARY register an array? 656 */ 657 static VGPU10_OPERAND_TYPE 658 translate_register_file(enum tgsi_file_type file, boolean array) 659 { 660 switch (file) { 661 case TGSI_FILE_CONSTANT: 662 return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; 663 case TGSI_FILE_INPUT: 664 return VGPU10_OPERAND_TYPE_INPUT; 665 case TGSI_FILE_OUTPUT: 666 return VGPU10_OPERAND_TYPE_OUTPUT; 667 case TGSI_FILE_TEMPORARY: 668 return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP 669 : VGPU10_OPERAND_TYPE_TEMP; 670 case TGSI_FILE_IMMEDIATE: 671 /* all immediates are 32-bit values at this time so 672 * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time. 673 */ 674 return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER; 675 case TGSI_FILE_SAMPLER: 676 return VGPU10_OPERAND_TYPE_SAMPLER; 677 case TGSI_FILE_SYSTEM_VALUE: 678 return VGPU10_OPERAND_TYPE_INPUT; 679 680 /* XXX TODO more cases to finish */ 681 682 default: 683 assert(!"Bad tgsi register file!"); 684 return VGPU10_OPERAND_TYPE_NULL; 685 } 686 } 687 688 689 /** 690 * Emit a null dst register 691 */ 692 static void 693 emit_null_dst_register(struct svga_shader_emitter_v10 *emit) 694 { 695 VGPU10OperandToken0 operand; 696 697 operand.value = 0; 698 operand.operandType = VGPU10_OPERAND_TYPE_NULL; 699 operand.numComponents = VGPU10_OPERAND_0_COMPONENT; 700 701 emit_dword(emit, operand.value); 702 } 703 704 705 /** 706 * If the given register is a temporary, return the array ID. 707 * Else return zero. 708 */ 709 static unsigned 710 get_temp_array_id(const struct svga_shader_emitter_v10 *emit, 711 enum tgsi_file_type file, unsigned index) 712 { 713 if (file == TGSI_FILE_TEMPORARY) { 714 return emit->temp_map[index].arrayId; 715 } 716 else { 717 return 0; 718 } 719 } 720 721 722 /** 723 * If the given register is a temporary, convert the index from a TGSI 724 * TEMPORARY index to a VGPU10 temp index. 725 */ 726 static unsigned 727 remap_temp_index(const struct svga_shader_emitter_v10 *emit, 728 enum tgsi_file_type file, unsigned index) 729 { 730 if (file == TGSI_FILE_TEMPORARY) { 731 return emit->temp_map[index].index; 732 } 733 else { 734 return index; 735 } 736 } 737 738 739 /** 740 * Setup the operand0 fields related to indexing (1D, 2D, relative, etc). 741 * Note: the operandType field must already be initialized. 742 */ 743 static VGPU10OperandToken0 744 setup_operand0_indexing(struct svga_shader_emitter_v10 *emit, 745 VGPU10OperandToken0 operand0, 746 enum tgsi_file_type file, 747 boolean indirect, boolean index2D, 748 unsigned tempArrayID) 749 { 750 unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 751 752 /* 753 * Compute index dimensions 754 */ 755 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 || 756 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { 757 /* there's no swizzle for in-line immediates */ 758 indexDim = VGPU10_OPERAND_INDEX_0D; 759 assert(operand0.selectionMode == 0); 760 } 761 else { 762 if (index2D || 763 tempArrayID > 0 || 764 operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) { 765 indexDim = VGPU10_OPERAND_INDEX_2D; 766 } 767 else { 768 indexDim = VGPU10_OPERAND_INDEX_1D; 769 } 770 } 771 772 /* 773 * Compute index representations (immediate, relative, etc). 774 */ 775 if (tempArrayID > 0) { 776 assert(file == TGSI_FILE_TEMPORARY); 777 /* First index is the array ID, second index is the array element */ 778 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 779 if (indirect) { 780 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; 781 } 782 else { 783 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 784 } 785 } 786 else if (indirect) { 787 if (file == TGSI_FILE_CONSTANT) { 788 /* index[0] indicates which constant buffer while index[1] indicates 789 * the position in the constant buffer. 790 */ 791 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 792 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; 793 } 794 else { 795 /* All other register files are 1-dimensional */ 796 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; 797 } 798 } 799 else { 800 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 801 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 802 } 803 804 operand0.indexDimension = indexDim; 805 operand0.index0Representation = index0Rep; 806 operand0.index1Representation = index1Rep; 807 808 return operand0; 809 } 810 811 812 /** 813 * Emit the operand for expressing an address register for indirect indexing. 814 * Note that the address register is really just a temp register. 815 * \param addr_reg_index which address register to use 816 */ 817 static void 818 emit_indirect_register(struct svga_shader_emitter_v10 *emit, 819 unsigned addr_reg_index) 820 { 821 unsigned tmp_reg_index; 822 VGPU10OperandToken0 operand0; 823 824 assert(addr_reg_index < MAX_VGPU10_ADDR_REGS); 825 826 tmp_reg_index = emit->address_reg_index[addr_reg_index]; 827 828 /* operand0 is a simple temporary register, selecting one component */ 829 operand0.value = 0; 830 operand0.operandType = VGPU10_OPERAND_TYPE_TEMP; 831 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 832 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 833 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 834 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 835 operand0.swizzleX = 0; 836 operand0.swizzleY = 1; 837 operand0.swizzleZ = 2; 838 operand0.swizzleW = 3; 839 840 emit_dword(emit, operand0.value); 841 emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index)); 842 } 843 844 845 /** 846 * Translate the dst register of a TGSI instruction and emit VGPU10 tokens. 847 * \param emit the emitter context 848 * \param reg the TGSI dst register to translate 849 */ 850 static void 851 emit_dst_register(struct svga_shader_emitter_v10 *emit, 852 const struct tgsi_full_dst_register *reg) 853 { 854 enum tgsi_file_type file = reg->Register.File; 855 unsigned index = reg->Register.Index; 856 const enum tgsi_semantic sem_name = emit->info.output_semantic_name[index]; 857 const unsigned sem_index = emit->info.output_semantic_index[index]; 858 unsigned writemask = reg->Register.WriteMask; 859 const unsigned indirect = reg->Register.Indirect; 860 const unsigned tempArrayId = get_temp_array_id(emit, file, index); 861 const unsigned index2d = reg->Register.Dimension; 862 VGPU10OperandToken0 operand0; 863 864 if (file == TGSI_FILE_OUTPUT) { 865 if (emit->unit == PIPE_SHADER_VERTEX || 866 emit->unit == PIPE_SHADER_GEOMETRY) { 867 if (index == emit->vposition.out_index && 868 emit->vposition.tmp_index != INVALID_INDEX) { 869 /* replace OUTPUT[POS] with TEMP[POS]. We need to store the 870 * vertex position result in a temporary so that we can modify 871 * it in the post_helper() code. 872 */ 873 file = TGSI_FILE_TEMPORARY; 874 index = emit->vposition.tmp_index; 875 } 876 else if (sem_name == TGSI_SEMANTIC_CLIPDIST && 877 emit->clip_dist_tmp_index != INVALID_INDEX) { 878 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST]. 879 * We store the clip distance in a temporary first, then 880 * we'll copy it to the shadow copy and to CLIPDIST with the 881 * enabled planes mask in emit_clip_distance_instructions(). 882 */ 883 file = TGSI_FILE_TEMPORARY; 884 index = emit->clip_dist_tmp_index + sem_index; 885 } 886 else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX && 887 emit->clip_vertex_tmp_index != INVALID_INDEX) { 888 /* replace the CLIPVERTEX output register with a temporary */ 889 assert(emit->clip_mode == CLIP_VERTEX); 890 assert(sem_index == 0); 891 file = TGSI_FILE_TEMPORARY; 892 index = emit->clip_vertex_tmp_index; 893 } 894 } 895 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 896 if (sem_name == TGSI_SEMANTIC_POSITION) { 897 /* Fragment depth output register */ 898 operand0.value = 0; 899 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; 900 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 901 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 902 emit_dword(emit, operand0.value); 903 return; 904 } 905 else if (index == emit->fs.color_out_index[0] && 906 emit->fs.color_tmp_index != INVALID_INDEX) { 907 /* replace OUTPUT[COLOR] with TEMP[COLOR]. We need to store the 908 * fragment color result in a temporary so that we can read it 909 * it in the post_helper() code. 910 */ 911 file = TGSI_FILE_TEMPORARY; 912 index = emit->fs.color_tmp_index; 913 } 914 else { 915 /* Typically, for fragment shaders, the output register index 916 * matches the color semantic index. But not when we write to 917 * the fragment depth register. In that case, OUT[0] will be 918 * fragdepth and OUT[1] will be the 0th color output. We need 919 * to use the semantic index for color outputs. 920 */ 921 assert(sem_name == TGSI_SEMANTIC_COLOR); 922 index = emit->info.output_semantic_index[index]; 923 924 emit->num_output_writes++; 925 } 926 } 927 } 928 929 /* init operand tokens to all zero */ 930 operand0.value = 0; 931 932 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 933 934 /* the operand has a writemask */ 935 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 936 937 /* Which of the four dest components to write to. Note that we can use a 938 * simple assignment here since TGSI writemasks match VGPU10 writemasks. 939 */ 940 STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X); 941 operand0.mask = writemask; 942 943 /* translate TGSI register file type to VGPU10 operand type */ 944 operand0.operandType = translate_register_file(file, tempArrayId > 0); 945 946 check_register_index(emit, operand0.operandType, index); 947 948 operand0 = setup_operand0_indexing(emit, operand0, file, indirect, 949 index2d, tempArrayId); 950 951 /* Emit tokens */ 952 emit_dword(emit, operand0.value); 953 if (tempArrayId > 0) { 954 emit_dword(emit, tempArrayId); 955 } 956 957 emit_dword(emit, remap_temp_index(emit, file, index)); 958 959 if (indirect) { 960 emit_indirect_register(emit, reg->Indirect.Index); 961 } 962 } 963 964 965 /** 966 * Translate a src register of a TGSI instruction and emit VGPU10 tokens. 967 */ 968 static void 969 emit_src_register(struct svga_shader_emitter_v10 *emit, 970 const struct tgsi_full_src_register *reg) 971 { 972 enum tgsi_file_type file = reg->Register.File; 973 unsigned index = reg->Register.Index; 974 const unsigned indirect = reg->Register.Indirect; 975 const unsigned tempArrayId = get_temp_array_id(emit, file, index); 976 const unsigned index2d = reg->Register.Dimension; 977 const unsigned swizzleX = reg->Register.SwizzleX; 978 const unsigned swizzleY = reg->Register.SwizzleY; 979 const unsigned swizzleZ = reg->Register.SwizzleZ; 980 const unsigned swizzleW = reg->Register.SwizzleW; 981 const unsigned absolute = reg->Register.Absolute; 982 const unsigned negate = reg->Register.Negate; 983 bool is_prim_id = FALSE; 984 985 VGPU10OperandToken0 operand0; 986 VGPU10OperandToken1 operand1; 987 988 if (emit->unit == PIPE_SHADER_FRAGMENT && 989 file == TGSI_FILE_INPUT) { 990 if (index == emit->fs.face_input_index) { 991 /* Replace INPUT[FACE] with TEMP[FACE] */ 992 file = TGSI_FILE_TEMPORARY; 993 index = emit->fs.face_tmp_index; 994 } 995 else if (index == emit->fs.fragcoord_input_index) { 996 /* Replace INPUT[POSITION] with TEMP[POSITION] */ 997 file = TGSI_FILE_TEMPORARY; 998 index = emit->fs.fragcoord_tmp_index; 999 } 1000 else { 1001 /* We remap fragment shader inputs to that FS input indexes 1002 * match up with VS/GS output indexes. 1003 */ 1004 index = emit->linkage.input_map[index]; 1005 } 1006 } 1007 else if (emit->unit == PIPE_SHADER_GEOMETRY && 1008 file == TGSI_FILE_INPUT) { 1009 is_prim_id = (index == emit->gs.prim_id_index); 1010 index = emit->linkage.input_map[index]; 1011 } 1012 else if (emit->unit == PIPE_SHADER_VERTEX) { 1013 if (file == TGSI_FILE_INPUT) { 1014 /* if input is adjusted... */ 1015 if ((emit->key.vs.adjust_attrib_w_1 | 1016 emit->key.vs.adjust_attrib_itof | 1017 emit->key.vs.adjust_attrib_utof | 1018 emit->key.vs.attrib_is_bgra | 1019 emit->key.vs.attrib_puint_to_snorm | 1020 emit->key.vs.attrib_puint_to_uscaled | 1021 emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) { 1022 file = TGSI_FILE_TEMPORARY; 1023 index = emit->vs.adjusted_input[index]; 1024 } 1025 } 1026 else if (file == TGSI_FILE_SYSTEM_VALUE) { 1027 assert(index < ARRAY_SIZE(emit->system_value_indexes)); 1028 index = emit->system_value_indexes[index]; 1029 } 1030 } 1031 1032 operand0.value = operand1.value = 0; 1033 1034 if (is_prim_id) { 1035 /* NOTE: we should be using VGPU10_OPERAND_1_COMPONENT here, but 1036 * our virtual GPU accepts this as-is. 1037 */ 1038 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 1039 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 1040 } 1041 else { 1042 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1043 operand0.operandType = translate_register_file(file, tempArrayId > 0); 1044 } 1045 1046 operand0 = setup_operand0_indexing(emit, operand0, file, indirect, 1047 index2d, tempArrayId); 1048 1049 if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 && 1050 operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { 1051 /* there's no swizzle for in-line immediates */ 1052 if (swizzleX == swizzleY && 1053 swizzleX == swizzleZ && 1054 swizzleX == swizzleW) { 1055 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 1056 } 1057 else { 1058 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 1059 } 1060 1061 operand0.swizzleX = swizzleX; 1062 operand0.swizzleY = swizzleY; 1063 operand0.swizzleZ = swizzleZ; 1064 operand0.swizzleW = swizzleW; 1065 1066 if (absolute || negate) { 1067 operand0.extended = 1; 1068 operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER; 1069 if (absolute && !negate) 1070 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS; 1071 if (!absolute && negate) 1072 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG; 1073 if (absolute && negate) 1074 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG; 1075 } 1076 } 1077 1078 /* Emit the operand tokens */ 1079 emit_dword(emit, operand0.value); 1080 if (operand0.extended) 1081 emit_dword(emit, operand1.value); 1082 1083 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) { 1084 /* Emit the four float/int in-line immediate values */ 1085 unsigned *c; 1086 assert(index < ARRAY_SIZE(emit->immediates)); 1087 assert(file == TGSI_FILE_IMMEDIATE); 1088 assert(swizzleX < 4); 1089 assert(swizzleY < 4); 1090 assert(swizzleZ < 4); 1091 assert(swizzleW < 4); 1092 c = (unsigned *) emit->immediates[index]; 1093 emit_dword(emit, c[swizzleX]); 1094 emit_dword(emit, c[swizzleY]); 1095 emit_dword(emit, c[swizzleZ]); 1096 emit_dword(emit, c[swizzleW]); 1097 } 1098 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) { 1099 /* Emit the register index(es) */ 1100 if (index2d || 1101 operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) { 1102 emit_dword(emit, reg->Dimension.Index); 1103 } 1104 1105 if (tempArrayId > 0) { 1106 emit_dword(emit, tempArrayId); 1107 } 1108 1109 emit_dword(emit, remap_temp_index(emit, file, index)); 1110 1111 if (indirect) { 1112 emit_indirect_register(emit, reg->Indirect.Index); 1113 } 1114 } 1115 } 1116 1117 1118 /** 1119 * Emit a resource operand (for use with a SAMPLE instruction). 1120 */ 1121 static void 1122 emit_resource_register(struct svga_shader_emitter_v10 *emit, 1123 unsigned resource_number) 1124 { 1125 VGPU10OperandToken0 operand0; 1126 1127 check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number); 1128 1129 /* init */ 1130 operand0.value = 0; 1131 1132 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; 1133 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1134 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1135 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 1136 operand0.swizzleX = VGPU10_COMPONENT_X; 1137 operand0.swizzleY = VGPU10_COMPONENT_Y; 1138 operand0.swizzleZ = VGPU10_COMPONENT_Z; 1139 operand0.swizzleW = VGPU10_COMPONENT_W; 1140 1141 emit_dword(emit, operand0.value); 1142 emit_dword(emit, resource_number); 1143 } 1144 1145 1146 /** 1147 * Emit a sampler operand (for use with a SAMPLE instruction). 1148 */ 1149 static void 1150 emit_sampler_register(struct svga_shader_emitter_v10 *emit, 1151 unsigned sampler_number) 1152 { 1153 VGPU10OperandToken0 operand0; 1154 1155 check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number); 1156 1157 /* init */ 1158 operand0.value = 0; 1159 1160 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; 1161 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1162 1163 emit_dword(emit, operand0.value); 1164 emit_dword(emit, sampler_number); 1165 } 1166 1167 1168 /** 1169 * Emit an operand which reads the IS_FRONT_FACING register. 1170 */ 1171 static void 1172 emit_face_register(struct svga_shader_emitter_v10 *emit) 1173 { 1174 VGPU10OperandToken0 operand0; 1175 unsigned index = emit->linkage.input_map[emit->fs.face_input_index]; 1176 1177 /* init */ 1178 operand0.value = 0; 1179 1180 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT; 1181 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1182 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 1183 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1184 1185 operand0.swizzleX = VGPU10_COMPONENT_X; 1186 operand0.swizzleY = VGPU10_COMPONENT_X; 1187 operand0.swizzleZ = VGPU10_COMPONENT_X; 1188 operand0.swizzleW = VGPU10_COMPONENT_X; 1189 1190 emit_dword(emit, operand0.value); 1191 emit_dword(emit, index); 1192 } 1193 1194 1195 /** 1196 * Emit the token for a VGPU10 opcode. 1197 * \param saturate clamp result to [0,1]? 1198 */ 1199 static void 1200 emit_opcode(struct svga_shader_emitter_v10 *emit, 1201 unsigned vgpu10_opcode, boolean saturate) 1202 { 1203 VGPU10OpcodeToken0 token0; 1204 1205 token0.value = 0; /* init all fields to zero */ 1206 token0.opcodeType = vgpu10_opcode; 1207 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1208 token0.saturate = saturate; 1209 1210 emit_dword(emit, token0.value); 1211 } 1212 1213 1214 /** 1215 * Emit the token for a VGPU10 resinfo instruction. 1216 * \param modifier return type modifier, _uint or _rcpFloat. 1217 * TODO: We may want to remove this parameter if it will 1218 * only ever be used as _uint. 1219 */ 1220 static void 1221 emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit, 1222 VGPU10_RESINFO_RETURN_TYPE modifier) 1223 { 1224 VGPU10OpcodeToken0 token0; 1225 1226 token0.value = 0; /* init all fields to zero */ 1227 token0.opcodeType = VGPU10_OPCODE_RESINFO; 1228 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1229 token0.resinfoReturnType = modifier; 1230 1231 emit_dword(emit, token0.value); 1232 } 1233 1234 1235 /** 1236 * Emit opcode tokens for a texture sample instruction. Texture instructions 1237 * can be rather complicated (texel offsets, etc) so we have this specialized 1238 * function. 1239 */ 1240 static void 1241 emit_sample_opcode(struct svga_shader_emitter_v10 *emit, 1242 unsigned vgpu10_opcode, boolean saturate, 1243 const int offsets[3]) 1244 { 1245 VGPU10OpcodeToken0 token0; 1246 VGPU10OpcodeToken1 token1; 1247 1248 token0.value = 0; /* init all fields to zero */ 1249 token0.opcodeType = vgpu10_opcode; 1250 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1251 token0.saturate = saturate; 1252 1253 if (offsets[0] || offsets[1] || offsets[2]) { 1254 assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 1255 assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 1256 assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 1257 assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 1258 assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 1259 assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 1260 1261 token0.extended = 1; 1262 token1.value = 0; 1263 token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS; 1264 token1.offsetU = offsets[0]; 1265 token1.offsetV = offsets[1]; 1266 token1.offsetW = offsets[2]; 1267 } 1268 1269 emit_dword(emit, token0.value); 1270 if (token0.extended) { 1271 emit_dword(emit, token1.value); 1272 } 1273 } 1274 1275 1276 /** 1277 * Emit a DISCARD opcode token. 1278 * If nonzero is set, we'll discard the fragment if the X component is not 0. 1279 * Otherwise, we'll discard the fragment if the X component is 0. 1280 */ 1281 static void 1282 emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero) 1283 { 1284 VGPU10OpcodeToken0 opcode0; 1285 1286 opcode0.value = 0; 1287 opcode0.opcodeType = VGPU10_OPCODE_DISCARD; 1288 if (nonzero) 1289 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; 1290 1291 emit_dword(emit, opcode0.value); 1292 } 1293 1294 1295 /** 1296 * We need to call this before we begin emitting a VGPU10 instruction. 1297 */ 1298 static void 1299 begin_emit_instruction(struct svga_shader_emitter_v10 *emit) 1300 { 1301 assert(emit->inst_start_token == 0); 1302 /* Save location of the instruction's VGPU10OpcodeToken0 token. 1303 * Note, we can't save a pointer because it would become invalid if 1304 * we have to realloc the output buffer. 1305 */ 1306 emit->inst_start_token = emit_get_num_tokens(emit); 1307 } 1308 1309 1310 /** 1311 * We need to call this after we emit the last token of a VGPU10 instruction. 1312 * This function patches in the opcode token's instructionLength field. 1313 */ 1314 static void 1315 end_emit_instruction(struct svga_shader_emitter_v10 *emit) 1316 { 1317 VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf; 1318 unsigned inst_length; 1319 1320 assert(emit->inst_start_token > 0); 1321 1322 if (emit->discard_instruction) { 1323 /* Back up the emit->ptr to where this instruction started so 1324 * that we discard the current instruction. 1325 */ 1326 emit->ptr = (char *) (tokens + emit->inst_start_token); 1327 } 1328 else { 1329 /* Compute instruction length and patch that into the start of 1330 * the instruction. 1331 */ 1332 inst_length = emit_get_num_tokens(emit) - emit->inst_start_token; 1333 1334 assert(inst_length > 0); 1335 1336 tokens[emit->inst_start_token].instructionLength = inst_length; 1337 } 1338 1339 emit->inst_start_token = 0; /* reset to zero for error checking */ 1340 emit->discard_instruction = FALSE; 1341 } 1342 1343 1344 /** 1345 * Return index for a free temporary register. 1346 */ 1347 static unsigned 1348 get_temp_index(struct svga_shader_emitter_v10 *emit) 1349 { 1350 assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS); 1351 return emit->num_shader_temps + emit->internal_temp_count++; 1352 } 1353 1354 1355 /** 1356 * Release the temporaries which were generated by get_temp_index(). 1357 */ 1358 static void 1359 free_temp_indexes(struct svga_shader_emitter_v10 *emit) 1360 { 1361 emit->internal_temp_count = 0; 1362 } 1363 1364 1365 /** 1366 * Create a tgsi_full_src_register. 1367 */ 1368 static struct tgsi_full_src_register 1369 make_src_reg(enum tgsi_file_type file, unsigned index) 1370 { 1371 struct tgsi_full_src_register reg; 1372 1373 memset(®, 0, sizeof(reg)); 1374 reg.Register.File = file; 1375 reg.Register.Index = index; 1376 reg.Register.SwizzleX = TGSI_SWIZZLE_X; 1377 reg.Register.SwizzleY = TGSI_SWIZZLE_Y; 1378 reg.Register.SwizzleZ = TGSI_SWIZZLE_Z; 1379 reg.Register.SwizzleW = TGSI_SWIZZLE_W; 1380 return reg; 1381 } 1382 1383 1384 /** 1385 * Create a tgsi_full_src_register for a temporary. 1386 */ 1387 static struct tgsi_full_src_register 1388 make_src_temp_reg(unsigned index) 1389 { 1390 return make_src_reg(TGSI_FILE_TEMPORARY, index); 1391 } 1392 1393 1394 /** 1395 * Create a tgsi_full_src_register for a constant. 1396 */ 1397 static struct tgsi_full_src_register 1398 make_src_const_reg(unsigned index) 1399 { 1400 return make_src_reg(TGSI_FILE_CONSTANT, index); 1401 } 1402 1403 1404 /** 1405 * Create a tgsi_full_src_register for an immediate constant. 1406 */ 1407 static struct tgsi_full_src_register 1408 make_src_immediate_reg(unsigned index) 1409 { 1410 return make_src_reg(TGSI_FILE_IMMEDIATE, index); 1411 } 1412 1413 1414 /** 1415 * Create a tgsi_full_dst_register. 1416 */ 1417 static struct tgsi_full_dst_register 1418 make_dst_reg(enum tgsi_file_type file, unsigned index) 1419 { 1420 struct tgsi_full_dst_register reg; 1421 1422 memset(®, 0, sizeof(reg)); 1423 reg.Register.File = file; 1424 reg.Register.Index = index; 1425 reg.Register.WriteMask = TGSI_WRITEMASK_XYZW; 1426 return reg; 1427 } 1428 1429 1430 /** 1431 * Create a tgsi_full_dst_register for a temporary. 1432 */ 1433 static struct tgsi_full_dst_register 1434 make_dst_temp_reg(unsigned index) 1435 { 1436 return make_dst_reg(TGSI_FILE_TEMPORARY, index); 1437 } 1438 1439 1440 /** 1441 * Create a tgsi_full_dst_register for an output. 1442 */ 1443 static struct tgsi_full_dst_register 1444 make_dst_output_reg(unsigned index) 1445 { 1446 return make_dst_reg(TGSI_FILE_OUTPUT, index); 1447 } 1448 1449 1450 /** 1451 * Create negated tgsi_full_src_register. 1452 */ 1453 static struct tgsi_full_src_register 1454 negate_src(const struct tgsi_full_src_register *reg) 1455 { 1456 struct tgsi_full_src_register neg = *reg; 1457 neg.Register.Negate = !reg->Register.Negate; 1458 return neg; 1459 } 1460 1461 /** 1462 * Create absolute value of a tgsi_full_src_register. 1463 */ 1464 static struct tgsi_full_src_register 1465 absolute_src(const struct tgsi_full_src_register *reg) 1466 { 1467 struct tgsi_full_src_register absolute = *reg; 1468 absolute.Register.Absolute = 1; 1469 return absolute; 1470 } 1471 1472 1473 /** Return the named swizzle term from the src register */ 1474 static inline unsigned 1475 get_swizzle(const struct tgsi_full_src_register *reg, enum tgsi_swizzle term) 1476 { 1477 switch (term) { 1478 case TGSI_SWIZZLE_X: 1479 return reg->Register.SwizzleX; 1480 case TGSI_SWIZZLE_Y: 1481 return reg->Register.SwizzleY; 1482 case TGSI_SWIZZLE_Z: 1483 return reg->Register.SwizzleZ; 1484 case TGSI_SWIZZLE_W: 1485 return reg->Register.SwizzleW; 1486 default: 1487 assert(!"Bad swizzle"); 1488 return TGSI_SWIZZLE_X; 1489 } 1490 } 1491 1492 1493 /** 1494 * Create swizzled tgsi_full_src_register. 1495 */ 1496 static struct tgsi_full_src_register 1497 swizzle_src(const struct tgsi_full_src_register *reg, 1498 enum tgsi_swizzle swizzleX, enum tgsi_swizzle swizzleY, 1499 enum tgsi_swizzle swizzleZ, enum tgsi_swizzle swizzleW) 1500 { 1501 struct tgsi_full_src_register swizzled = *reg; 1502 /* Note: we swizzle the current swizzle */ 1503 swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX); 1504 swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY); 1505 swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ); 1506 swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW); 1507 return swizzled; 1508 } 1509 1510 1511 /** 1512 * Create swizzled tgsi_full_src_register where all the swizzle 1513 * terms are the same. 1514 */ 1515 static struct tgsi_full_src_register 1516 scalar_src(const struct tgsi_full_src_register *reg, enum tgsi_swizzle swizzle) 1517 { 1518 struct tgsi_full_src_register swizzled = *reg; 1519 /* Note: we swizzle the current swizzle */ 1520 swizzled.Register.SwizzleX = 1521 swizzled.Register.SwizzleY = 1522 swizzled.Register.SwizzleZ = 1523 swizzled.Register.SwizzleW = get_swizzle(reg, swizzle); 1524 return swizzled; 1525 } 1526 1527 1528 /** 1529 * Create new tgsi_full_dst_register with writemask. 1530 * \param mask bitmask of TGSI_WRITEMASK_[XYZW] 1531 */ 1532 static struct tgsi_full_dst_register 1533 writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask) 1534 { 1535 struct tgsi_full_dst_register masked = *reg; 1536 masked.Register.WriteMask = mask; 1537 return masked; 1538 } 1539 1540 1541 /** 1542 * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW. 1543 */ 1544 static boolean 1545 same_swizzle_terms(const struct tgsi_full_src_register *reg) 1546 { 1547 return (reg->Register.SwizzleX == reg->Register.SwizzleY && 1548 reg->Register.SwizzleY == reg->Register.SwizzleZ && 1549 reg->Register.SwizzleZ == reg->Register.SwizzleW); 1550 } 1551 1552 1553 /** 1554 * Search the vector for the value 'x' and return its position. 1555 */ 1556 static int 1557 find_imm_in_vec4(const union tgsi_immediate_data vec[4], 1558 union tgsi_immediate_data x) 1559 { 1560 unsigned i; 1561 for (i = 0; i < 4; i++) { 1562 if (vec[i].Int == x.Int) 1563 return i; 1564 } 1565 return -1; 1566 } 1567 1568 1569 /** 1570 * Helper used by make_immediate_reg(), make_immediate_reg_4(). 1571 */ 1572 static int 1573 find_immediate(struct svga_shader_emitter_v10 *emit, 1574 union tgsi_immediate_data x, unsigned startIndex) 1575 { 1576 const unsigned endIndex = emit->num_immediates; 1577 unsigned i; 1578 1579 assert(emit->immediates_emitted); 1580 1581 /* Search immediates for x, y, z, w */ 1582 for (i = startIndex; i < endIndex; i++) { 1583 if (x.Int == emit->immediates[i][0].Int || 1584 x.Int == emit->immediates[i][1].Int || 1585 x.Int == emit->immediates[i][2].Int || 1586 x.Int == emit->immediates[i][3].Int) { 1587 return i; 1588 } 1589 } 1590 /* Should never try to use an immediate value that wasn't pre-declared */ 1591 assert(!"find_immediate() failed!"); 1592 return -1; 1593 } 1594 1595 1596 /** 1597 * Return a tgsi_full_src_register for an immediate/literal 1598 * union tgsi_immediate_data[4] value. 1599 * Note: the values must have been previously declared/allocated in 1600 * emit_pre_helpers(). And, all of x,y,z,w must be located in the same 1601 * vec4 immediate. 1602 */ 1603 static struct tgsi_full_src_register 1604 make_immediate_reg_4(struct svga_shader_emitter_v10 *emit, 1605 const union tgsi_immediate_data imm[4]) 1606 { 1607 struct tgsi_full_src_register reg; 1608 unsigned i; 1609 1610 for (i = 0; i < emit->num_common_immediates; i++) { 1611 /* search for first component value */ 1612 int immpos = find_immediate(emit, imm[0], i); 1613 int x, y, z, w; 1614 1615 assert(immpos >= 0); 1616 1617 /* find remaining components within the immediate vector */ 1618 x = find_imm_in_vec4(emit->immediates[immpos], imm[0]); 1619 y = find_imm_in_vec4(emit->immediates[immpos], imm[1]); 1620 z = find_imm_in_vec4(emit->immediates[immpos], imm[2]); 1621 w = find_imm_in_vec4(emit->immediates[immpos], imm[3]); 1622 1623 if (x >=0 && y >= 0 && z >= 0 && w >= 0) { 1624 /* found them all */ 1625 memset(®, 0, sizeof(reg)); 1626 reg.Register.File = TGSI_FILE_IMMEDIATE; 1627 reg.Register.Index = immpos; 1628 reg.Register.SwizzleX = x; 1629 reg.Register.SwizzleY = y; 1630 reg.Register.SwizzleZ = z; 1631 reg.Register.SwizzleW = w; 1632 return reg; 1633 } 1634 /* else, keep searching */ 1635 } 1636 1637 assert(!"Failed to find immediate register!"); 1638 1639 /* Just return IMM[0].xxxx */ 1640 memset(®, 0, sizeof(reg)); 1641 reg.Register.File = TGSI_FILE_IMMEDIATE; 1642 return reg; 1643 } 1644 1645 1646 /** 1647 * Return a tgsi_full_src_register for an immediate/literal 1648 * union tgsi_immediate_data value of the form {value, value, value, value}. 1649 * \sa make_immediate_reg_4() regarding allowed values. 1650 */ 1651 static struct tgsi_full_src_register 1652 make_immediate_reg(struct svga_shader_emitter_v10 *emit, 1653 union tgsi_immediate_data value) 1654 { 1655 struct tgsi_full_src_register reg; 1656 int immpos = find_immediate(emit, value, 0); 1657 1658 assert(immpos >= 0); 1659 1660 memset(®, 0, sizeof(reg)); 1661 reg.Register.File = TGSI_FILE_IMMEDIATE; 1662 reg.Register.Index = immpos; 1663 reg.Register.SwizzleX = 1664 reg.Register.SwizzleY = 1665 reg.Register.SwizzleZ = 1666 reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value); 1667 1668 return reg; 1669 } 1670 1671 1672 /** 1673 * Return a tgsi_full_src_register for an immediate/literal float[4] value. 1674 * \sa make_immediate_reg_4() regarding allowed values. 1675 */ 1676 static struct tgsi_full_src_register 1677 make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit, 1678 float x, float y, float z, float w) 1679 { 1680 union tgsi_immediate_data imm[4]; 1681 imm[0].Float = x; 1682 imm[1].Float = y; 1683 imm[2].Float = z; 1684 imm[3].Float = w; 1685 return make_immediate_reg_4(emit, imm); 1686 } 1687 1688 1689 /** 1690 * Return a tgsi_full_src_register for an immediate/literal float value 1691 * of the form {value, value, value, value}. 1692 * \sa make_immediate_reg_4() regarding allowed values. 1693 */ 1694 static struct tgsi_full_src_register 1695 make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value) 1696 { 1697 union tgsi_immediate_data imm; 1698 imm.Float = value; 1699 return make_immediate_reg(emit, imm); 1700 } 1701 1702 1703 /** 1704 * Return a tgsi_full_src_register for an immediate/literal int[4] vector. 1705 */ 1706 static struct tgsi_full_src_register 1707 make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit, 1708 int x, int y, int z, int w) 1709 { 1710 union tgsi_immediate_data imm[4]; 1711 imm[0].Int = x; 1712 imm[1].Int = y; 1713 imm[2].Int = z; 1714 imm[3].Int = w; 1715 return make_immediate_reg_4(emit, imm); 1716 } 1717 1718 1719 /** 1720 * Return a tgsi_full_src_register for an immediate/literal int value 1721 * of the form {value, value, value, value}. 1722 * \sa make_immediate_reg_4() regarding allowed values. 1723 */ 1724 static struct tgsi_full_src_register 1725 make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value) 1726 { 1727 union tgsi_immediate_data imm; 1728 imm.Int = value; 1729 return make_immediate_reg(emit, imm); 1730 } 1731 1732 1733 /** 1734 * Allocate space for a union tgsi_immediate_data[4] immediate. 1735 * \return the index/position of the immediate. 1736 */ 1737 static unsigned 1738 alloc_immediate_4(struct svga_shader_emitter_v10 *emit, 1739 const union tgsi_immediate_data imm[4]) 1740 { 1741 unsigned n = emit->num_immediates++; 1742 assert(!emit->immediates_emitted); 1743 assert(n < ARRAY_SIZE(emit->immediates)); 1744 emit->immediates[n][0] = imm[0]; 1745 emit->immediates[n][1] = imm[1]; 1746 emit->immediates[n][2] = imm[2]; 1747 emit->immediates[n][3] = imm[3]; 1748 return n; 1749 } 1750 1751 1752 /** 1753 * Allocate space for a float[4] immediate. 1754 * \return the index/position of the immediate. 1755 */ 1756 static unsigned 1757 alloc_immediate_float4(struct svga_shader_emitter_v10 *emit, 1758 float x, float y, float z, float w) 1759 { 1760 union tgsi_immediate_data imm[4]; 1761 imm[0].Float = x; 1762 imm[1].Float = y; 1763 imm[2].Float = z; 1764 imm[3].Float = w; 1765 return alloc_immediate_4(emit, imm); 1766 } 1767 1768 1769 /** 1770 * Allocate space for an int[4] immediate. 1771 * \return the index/position of the immediate. 1772 */ 1773 static unsigned 1774 alloc_immediate_int4(struct svga_shader_emitter_v10 *emit, 1775 int x, int y, int z, int w) 1776 { 1777 union tgsi_immediate_data imm[4]; 1778 imm[0].Int = x; 1779 imm[1].Int = y; 1780 imm[2].Int = z; 1781 imm[3].Int = w; 1782 return alloc_immediate_4(emit, imm); 1783 } 1784 1785 1786 /** 1787 * Allocate a shader input to store a system value. 1788 */ 1789 static unsigned 1790 alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index) 1791 { 1792 const unsigned n = emit->info.file_max[TGSI_FILE_INPUT] + 1 + index; 1793 assert(index < ARRAY_SIZE(emit->system_value_indexes)); 1794 emit->system_value_indexes[index] = n; 1795 return n; 1796 } 1797 1798 1799 /** 1800 * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10. 1801 */ 1802 static boolean 1803 emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit, 1804 const struct tgsi_full_immediate *imm) 1805 { 1806 /* We don't actually emit any code here. We just save the 1807 * immediate values and emit them later. 1808 */ 1809 alloc_immediate_4(emit, imm->u); 1810 return TRUE; 1811 } 1812 1813 1814 /** 1815 * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block 1816 * containing all the immediate values previously allocated 1817 * with alloc_immediate_4(). 1818 */ 1819 static boolean 1820 emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit) 1821 { 1822 VGPU10OpcodeToken0 token; 1823 1824 assert(!emit->immediates_emitted); 1825 1826 token.value = 0; 1827 token.opcodeType = VGPU10_OPCODE_CUSTOMDATA; 1828 token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER; 1829 1830 /* Note: no begin/end_emit_instruction() calls */ 1831 emit_dword(emit, token.value); 1832 emit_dword(emit, 2 + 4 * emit->num_immediates); 1833 emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates); 1834 1835 emit->immediates_emitted = TRUE; 1836 1837 return TRUE; 1838 } 1839 1840 1841 /** 1842 * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10 1843 * interpolation mode. 1844 * \return a VGPU10_INTERPOLATION_x value 1845 */ 1846 static unsigned 1847 translate_interpolation(const struct svga_shader_emitter_v10 *emit, 1848 enum tgsi_interpolate_mode interp, 1849 enum tgsi_interpolate_loc interpolate_loc) 1850 { 1851 if (interp == TGSI_INTERPOLATE_COLOR) { 1852 interp = emit->key.fs.flatshade ? 1853 TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE; 1854 } 1855 1856 switch (interp) { 1857 case TGSI_INTERPOLATE_CONSTANT: 1858 return VGPU10_INTERPOLATION_CONSTANT; 1859 case TGSI_INTERPOLATE_LINEAR: 1860 return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ? 1861 VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID : 1862 VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE; 1863 case TGSI_INTERPOLATE_PERSPECTIVE: 1864 return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ? 1865 VGPU10_INTERPOLATION_LINEAR_CENTROID : 1866 VGPU10_INTERPOLATION_LINEAR; 1867 default: 1868 assert(!"Unexpected interpolation mode"); 1869 return VGPU10_INTERPOLATION_CONSTANT; 1870 } 1871 } 1872 1873 1874 /** 1875 * Translate a TGSI property to VGPU10. 1876 * Don't emit any instructions yet, only need to gather the primitive property 1877 * information. The output primitive topology might be changed later. The 1878 * final property instructions will be emitted as part of the pre-helper code. 1879 */ 1880 static boolean 1881 emit_vgpu10_property(struct svga_shader_emitter_v10 *emit, 1882 const struct tgsi_full_property *prop) 1883 { 1884 static const VGPU10_PRIMITIVE primType[] = { 1885 VGPU10_PRIMITIVE_POINT, /* PIPE_PRIM_POINTS */ 1886 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINES */ 1887 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_LOOP */ 1888 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_STRIP */ 1889 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLES */ 1890 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_STRIP */ 1891 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_FAN */ 1892 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUADS */ 1893 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ 1894 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_POLYGON */ 1895 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ 1896 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ 1897 VGPU10_PRIMITIVE_TRIANGLE_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ 1898 VGPU10_PRIMITIVE_TRIANGLE_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ 1899 }; 1900 1901 static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = { 1902 VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST, /* PIPE_PRIM_POINTS */ 1903 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINES */ 1904 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINE_LOOP */ 1905 VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP, /* PIPE_PRIM_LINE_STRIP */ 1906 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, /* PIPE_PRIM_TRIANGLES */ 1907 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */ 1908 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */ 1909 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUADS */ 1910 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ 1911 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_POLYGON */ 1912 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ 1913 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ 1914 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ 1915 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ 1916 }; 1917 1918 static const unsigned inputArraySize[] = { 1919 0, /* VGPU10_PRIMITIVE_UNDEFINED */ 1920 1, /* VGPU10_PRIMITIVE_POINT */ 1921 2, /* VGPU10_PRIMITIVE_LINE */ 1922 3, /* VGPU10_PRIMITIVE_TRIANGLE */ 1923 0, 1924 0, 1925 4, /* VGPU10_PRIMITIVE_LINE_ADJ */ 1926 6 /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */ 1927 }; 1928 1929 switch (prop->Property.PropertyName) { 1930 case TGSI_PROPERTY_GS_INPUT_PRIM: 1931 assert(prop->u[0].Data < ARRAY_SIZE(primType)); 1932 emit->gs.prim_type = primType[prop->u[0].Data]; 1933 assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED); 1934 emit->gs.input_size = inputArraySize[emit->gs.prim_type]; 1935 break; 1936 1937 case TGSI_PROPERTY_GS_OUTPUT_PRIM: 1938 assert(prop->u[0].Data < ARRAY_SIZE(primTopology)); 1939 emit->gs.prim_topology = primTopology[prop->u[0].Data]; 1940 assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED); 1941 break; 1942 1943 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: 1944 emit->gs.max_out_vertices = prop->u[0].Data; 1945 break; 1946 1947 default: 1948 break; 1949 } 1950 1951 return TRUE; 1952 } 1953 1954 1955 static void 1956 emit_property_instruction(struct svga_shader_emitter_v10 *emit, 1957 VGPU10OpcodeToken0 opcode0, unsigned nData, 1958 unsigned data) 1959 { 1960 begin_emit_instruction(emit); 1961 emit_dword(emit, opcode0.value); 1962 if (nData) 1963 emit_dword(emit, data); 1964 end_emit_instruction(emit); 1965 } 1966 1967 1968 /** 1969 * Emit property instructions 1970 */ 1971 static void 1972 emit_property_instructions(struct svga_shader_emitter_v10 *emit) 1973 { 1974 VGPU10OpcodeToken0 opcode0; 1975 1976 assert(emit->unit == PIPE_SHADER_GEOMETRY); 1977 1978 /* emit input primitive type declaration */ 1979 opcode0.value = 0; 1980 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE; 1981 opcode0.primitive = emit->gs.prim_type; 1982 emit_property_instruction(emit, opcode0, 0, 0); 1983 1984 /* emit output primitive topology declaration */ 1985 opcode0.value = 0; 1986 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY; 1987 opcode0.primitiveTopology = emit->gs.prim_topology; 1988 emit_property_instruction(emit, opcode0, 0, 0); 1989 1990 /* emit max output vertices */ 1991 opcode0.value = 0; 1992 opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT; 1993 emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices); 1994 } 1995 1996 1997 /** 1998 * Emit a vgpu10 declaration "instruction". 1999 * \param index the register index 2000 * \param size array size of the operand. In most cases, it is 1, 2001 * but for inputs to geometry shader, the array size varies 2002 * depending on the primitive type. 2003 */ 2004 static void 2005 emit_decl_instruction(struct svga_shader_emitter_v10 *emit, 2006 VGPU10OpcodeToken0 opcode0, 2007 VGPU10OperandToken0 operand0, 2008 VGPU10NameToken name_token, 2009 unsigned index, unsigned size) 2010 { 2011 assert(opcode0.opcodeType); 2012 assert(operand0.mask); 2013 2014 begin_emit_instruction(emit); 2015 emit_dword(emit, opcode0.value); 2016 2017 emit_dword(emit, operand0.value); 2018 2019 if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) { 2020 /* Next token is the index of the register to declare */ 2021 emit_dword(emit, index); 2022 } 2023 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) { 2024 /* Next token is the size of the register */ 2025 emit_dword(emit, size); 2026 2027 /* Followed by the index of the register */ 2028 emit_dword(emit, index); 2029 } 2030 2031 if (name_token.value) { 2032 emit_dword(emit, name_token.value); 2033 } 2034 2035 end_emit_instruction(emit); 2036 } 2037 2038 2039 /** 2040 * Emit the declaration for a shader input. 2041 * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx 2042 * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x 2043 * \param dim index dimension 2044 * \param index the input register index 2045 * \param size array size of the operand. In most cases, it is 1, 2046 * but for inputs to geometry shader, the array size varies 2047 * depending on the primitive type. 2048 * \param name one of VGPU10_NAME_x 2049 * \parma numComp number of components 2050 * \param selMode component selection mode 2051 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values 2052 * \param interpMode interpolation mode 2053 */ 2054 static void 2055 emit_input_declaration(struct svga_shader_emitter_v10 *emit, 2056 unsigned opcodeType, unsigned operandType, 2057 unsigned dim, unsigned index, unsigned size, 2058 unsigned name, unsigned numComp, 2059 unsigned selMode, unsigned usageMask, 2060 unsigned interpMode) 2061 { 2062 VGPU10OpcodeToken0 opcode0; 2063 VGPU10OperandToken0 operand0; 2064 VGPU10NameToken name_token; 2065 2066 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2067 assert(opcodeType == VGPU10_OPCODE_DCL_INPUT || 2068 opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV || 2069 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS || 2070 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV); 2071 assert(operandType == VGPU10_OPERAND_TYPE_INPUT || 2072 operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID); 2073 assert(numComp <= VGPU10_OPERAND_4_COMPONENT); 2074 assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE); 2075 assert(dim <= VGPU10_OPERAND_INDEX_3D); 2076 assert(name == VGPU10_NAME_UNDEFINED || 2077 name == VGPU10_NAME_POSITION || 2078 name == VGPU10_NAME_INSTANCE_ID || 2079 name == VGPU10_NAME_VERTEX_ID || 2080 name == VGPU10_NAME_PRIMITIVE_ID || 2081 name == VGPU10_NAME_IS_FRONT_FACE); 2082 assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED || 2083 interpMode == VGPU10_INTERPOLATION_CONSTANT || 2084 interpMode == VGPU10_INTERPOLATION_LINEAR || 2085 interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID || 2086 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE || 2087 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID); 2088 2089 check_register_index(emit, opcodeType, index); 2090 2091 opcode0.value = operand0.value = name_token.value = 0; 2092 2093 opcode0.opcodeType = opcodeType; 2094 opcode0.interpolationMode = interpMode; 2095 2096 operand0.operandType = operandType; 2097 operand0.numComponents = numComp; 2098 operand0.selectionMode = selMode; 2099 operand0.mask = usageMask; 2100 operand0.indexDimension = dim; 2101 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2102 if (dim == VGPU10_OPERAND_INDEX_2D) 2103 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2104 2105 name_token.name = name; 2106 2107 emit_decl_instruction(emit, opcode0, operand0, name_token, index, size); 2108 } 2109 2110 2111 /** 2112 * Emit the declaration for a shader output. 2113 * \param type one of VGPU10_OPCODE_DCL_OUTPUTx 2114 * \param index the output register index 2115 * \param name one of VGPU10_NAME_x 2116 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values 2117 */ 2118 static void 2119 emit_output_declaration(struct svga_shader_emitter_v10 *emit, 2120 unsigned type, unsigned index, 2121 unsigned name, unsigned usageMask) 2122 { 2123 VGPU10OpcodeToken0 opcode0; 2124 VGPU10OperandToken0 operand0; 2125 VGPU10NameToken name_token; 2126 2127 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2128 assert(type == VGPU10_OPCODE_DCL_OUTPUT || 2129 type == VGPU10_OPCODE_DCL_OUTPUT_SGV || 2130 type == VGPU10_OPCODE_DCL_OUTPUT_SIV); 2131 assert(name == VGPU10_NAME_UNDEFINED || 2132 name == VGPU10_NAME_POSITION || 2133 name == VGPU10_NAME_PRIMITIVE_ID || 2134 name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX || 2135 name == VGPU10_NAME_CLIP_DISTANCE); 2136 2137 check_register_index(emit, type, index); 2138 2139 opcode0.value = operand0.value = name_token.value = 0; 2140 2141 opcode0.opcodeType = type; 2142 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT; 2143 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 2144 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 2145 operand0.mask = usageMask; 2146 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 2147 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2148 2149 name_token.name = name; 2150 2151 emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1); 2152 } 2153 2154 2155 /** 2156 * Emit the declaration for the fragment depth output. 2157 */ 2158 static void 2159 emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit) 2160 { 2161 VGPU10OpcodeToken0 opcode0; 2162 VGPU10OperandToken0 operand0; 2163 VGPU10NameToken name_token; 2164 2165 assert(emit->unit == PIPE_SHADER_FRAGMENT); 2166 2167 opcode0.value = operand0.value = name_token.value = 0; 2168 2169 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT; 2170 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; 2171 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 2172 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 2173 operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 2174 2175 emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1); 2176 } 2177 2178 2179 /** 2180 * Emit the declaration for a system value input/output. 2181 */ 2182 static void 2183 emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, 2184 enum tgsi_semantic semantic_name, unsigned index) 2185 { 2186 switch (semantic_name) { 2187 case TGSI_SEMANTIC_INSTANCEID: 2188 index = alloc_system_value_index(emit, index); 2189 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, 2190 VGPU10_OPERAND_TYPE_INPUT, 2191 VGPU10_OPERAND_INDEX_1D, 2192 index, 1, 2193 VGPU10_NAME_INSTANCE_ID, 2194 VGPU10_OPERAND_4_COMPONENT, 2195 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2196 VGPU10_OPERAND_4_COMPONENT_MASK_X, 2197 VGPU10_INTERPOLATION_UNDEFINED); 2198 break; 2199 case TGSI_SEMANTIC_VERTEXID: 2200 index = alloc_system_value_index(emit, index); 2201 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, 2202 VGPU10_OPERAND_TYPE_INPUT, 2203 VGPU10_OPERAND_INDEX_1D, 2204 index, 1, 2205 VGPU10_NAME_VERTEX_ID, 2206 VGPU10_OPERAND_4_COMPONENT, 2207 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2208 VGPU10_OPERAND_4_COMPONENT_MASK_X, 2209 VGPU10_INTERPOLATION_UNDEFINED); 2210 break; 2211 default: 2212 ; /* XXX */ 2213 } 2214 } 2215 2216 /** 2217 * Translate a TGSI declaration to VGPU10. 2218 */ 2219 static boolean 2220 emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, 2221 const struct tgsi_full_declaration *decl) 2222 { 2223 switch (decl->Declaration.File) { 2224 case TGSI_FILE_INPUT: 2225 /* do nothing - see emit_input_declarations() */ 2226 return TRUE; 2227 2228 case TGSI_FILE_OUTPUT: 2229 assert(decl->Range.First == decl->Range.Last); 2230 emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask; 2231 return TRUE; 2232 2233 case TGSI_FILE_TEMPORARY: 2234 /* Don't declare the temps here. Just keep track of how many 2235 * and emit the declaration later. 2236 */ 2237 if (decl->Declaration.Array) { 2238 /* Indexed temporary array. Save the start index of the array 2239 * and the size of the array. 2240 */ 2241 const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS); 2242 unsigned i; 2243 2244 assert(arrayID < ARRAY_SIZE(emit->temp_arrays)); 2245 2246 /* Save this array so we can emit the declaration for it later */ 2247 emit->temp_arrays[arrayID].start = decl->Range.First; 2248 emit->temp_arrays[arrayID].size = 2249 decl->Range.Last - decl->Range.First + 1; 2250 2251 emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1); 2252 assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS); 2253 emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS); 2254 2255 /* Fill in the temp_map entries for this array */ 2256 for (i = decl->Range.First; i <= decl->Range.Last; i++) { 2257 emit->temp_map[i].arrayId = arrayID; 2258 emit->temp_map[i].index = i - decl->Range.First; 2259 } 2260 } 2261 2262 /* for all temps, indexed or not, keep track of highest index */ 2263 emit->num_shader_temps = MAX2(emit->num_shader_temps, 2264 decl->Range.Last + 1); 2265 return TRUE; 2266 2267 case TGSI_FILE_CONSTANT: 2268 /* Don't declare constants here. Just keep track and emit later. */ 2269 { 2270 unsigned constbuf = 0, num_consts; 2271 if (decl->Declaration.Dimension) { 2272 constbuf = decl->Dim.Index2D; 2273 } 2274 /* We throw an assertion here when, in fact, the shader should never 2275 * have linked due to constbuf index out of bounds, so we shouldn't 2276 * have reached here. 2277 */ 2278 assert(constbuf < ARRAY_SIZE(emit->num_shader_consts)); 2279 2280 num_consts = MAX2(emit->num_shader_consts[constbuf], 2281 decl->Range.Last + 1); 2282 2283 if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { 2284 debug_printf("Warning: constant buffer is declared to size [%u]" 2285 " but [%u] is the limit.\n", 2286 num_consts, 2287 VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); 2288 } 2289 /* The linker doesn't enforce the max UBO size so we clamp here */ 2290 emit->num_shader_consts[constbuf] = 2291 MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); 2292 } 2293 return TRUE; 2294 2295 case TGSI_FILE_IMMEDIATE: 2296 assert(!"TGSI_FILE_IMMEDIATE not handled yet!"); 2297 return FALSE; 2298 2299 case TGSI_FILE_SYSTEM_VALUE: 2300 emit_system_value_declaration(emit, decl->Semantic.Name, 2301 decl->Range.First); 2302 return TRUE; 2303 2304 case TGSI_FILE_SAMPLER: 2305 /* Don't declare samplers here. Just keep track and emit later. */ 2306 emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1); 2307 return TRUE; 2308 2309 #if 0 2310 case TGSI_FILE_RESOURCE: 2311 /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/ 2312 /* XXX more, VGPU10_RETURN_TYPE_FLOAT */ 2313 assert(!"TGSI_FILE_RESOURCE not handled yet"); 2314 return FALSE; 2315 #endif 2316 2317 case TGSI_FILE_ADDRESS: 2318 emit->num_address_regs = MAX2(emit->num_address_regs, 2319 decl->Range.Last + 1); 2320 return TRUE; 2321 2322 case TGSI_FILE_SAMPLER_VIEW: 2323 { 2324 unsigned unit = decl->Range.First; 2325 assert(decl->Range.First == decl->Range.Last); 2326 emit->sampler_target[unit] = decl->SamplerView.Resource; 2327 /* Note: we can ignore YZW return types for now */ 2328 emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX; 2329 emit->sampler_view[unit] = TRUE; 2330 } 2331 return TRUE; 2332 2333 default: 2334 assert(!"Unexpected type of declaration"); 2335 return FALSE; 2336 } 2337 } 2338 2339 2340 2341 /** 2342 * Emit all input declarations. 2343 */ 2344 static boolean 2345 emit_input_declarations(struct svga_shader_emitter_v10 *emit) 2346 { 2347 unsigned i; 2348 2349 if (emit->unit == PIPE_SHADER_FRAGMENT) { 2350 2351 for (i = 0; i < emit->linkage.num_inputs; i++) { 2352 enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i]; 2353 unsigned usage_mask = emit->info.input_usage_mask[i]; 2354 unsigned index = emit->linkage.input_map[i]; 2355 unsigned type, interpolationMode, name; 2356 2357 if (usage_mask == 0) 2358 continue; /* register is not actually used */ 2359 2360 if (semantic_name == TGSI_SEMANTIC_POSITION) { 2361 /* fragment position input */ 2362 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 2363 interpolationMode = VGPU10_INTERPOLATION_LINEAR; 2364 name = VGPU10_NAME_POSITION; 2365 if (usage_mask & TGSI_WRITEMASK_W) { 2366 /* we need to replace use of 'w' with '1/w' */ 2367 emit->fs.fragcoord_input_index = i; 2368 } 2369 } 2370 else if (semantic_name == TGSI_SEMANTIC_FACE) { 2371 /* fragment front-facing input */ 2372 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 2373 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 2374 name = VGPU10_NAME_IS_FRONT_FACE; 2375 emit->fs.face_input_index = i; 2376 } 2377 else if (semantic_name == TGSI_SEMANTIC_PRIMID) { 2378 /* primitive ID */ 2379 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 2380 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 2381 name = VGPU10_NAME_PRIMITIVE_ID; 2382 } 2383 else { 2384 /* general fragment input */ 2385 type = VGPU10_OPCODE_DCL_INPUT_PS; 2386 interpolationMode = 2387 translate_interpolation(emit, 2388 emit->info.input_interpolate[i], 2389 emit->info.input_interpolate_loc[i]); 2390 2391 /* keeps track if flat interpolation mode is being used */ 2392 emit->uses_flat_interp = emit->uses_flat_interp || 2393 (interpolationMode == VGPU10_INTERPOLATION_CONSTANT); 2394 2395 name = VGPU10_NAME_UNDEFINED; 2396 } 2397 2398 emit_input_declaration(emit, type, 2399 VGPU10_OPERAND_TYPE_INPUT, 2400 VGPU10_OPERAND_INDEX_1D, index, 1, 2401 name, 2402 VGPU10_OPERAND_4_COMPONENT, 2403 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2404 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 2405 interpolationMode); 2406 } 2407 } 2408 else if (emit->unit == PIPE_SHADER_GEOMETRY) { 2409 2410 for (i = 0; i < emit->info.num_inputs; i++) { 2411 enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i]; 2412 unsigned usage_mask = emit->info.input_usage_mask[i]; 2413 unsigned index = emit->linkage.input_map[i]; 2414 unsigned opcodeType, operandType; 2415 unsigned numComp, selMode; 2416 unsigned name; 2417 unsigned dim; 2418 2419 if (usage_mask == 0) 2420 continue; /* register is not actually used */ 2421 2422 opcodeType = VGPU10_OPCODE_DCL_INPUT; 2423 operandType = VGPU10_OPERAND_TYPE_INPUT; 2424 numComp = VGPU10_OPERAND_4_COMPONENT; 2425 selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 2426 name = VGPU10_NAME_UNDEFINED; 2427 2428 /* all geometry shader inputs are two dimensional except 2429 * gl_PrimitiveID 2430 */ 2431 dim = VGPU10_OPERAND_INDEX_2D; 2432 2433 if (semantic_name == TGSI_SEMANTIC_PRIMID) { 2434 /* Primitive ID */ 2435 operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 2436 dim = VGPU10_OPERAND_INDEX_0D; 2437 numComp = VGPU10_OPERAND_0_COMPONENT; 2438 selMode = 0; 2439 2440 /* also save the register index so we can check for 2441 * primitive id when emit src register. We need to modify the 2442 * operand type, index dimension when emit primitive id src reg. 2443 */ 2444 emit->gs.prim_id_index = i; 2445 } 2446 else if (semantic_name == TGSI_SEMANTIC_POSITION) { 2447 /* vertex position input */ 2448 opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV; 2449 name = VGPU10_NAME_POSITION; 2450 } 2451 2452 emit_input_declaration(emit, opcodeType, operandType, 2453 dim, index, 2454 emit->gs.input_size, 2455 name, 2456 numComp, selMode, 2457 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 2458 VGPU10_INTERPOLATION_UNDEFINED); 2459 } 2460 } 2461 else { 2462 assert(emit->unit == PIPE_SHADER_VERTEX); 2463 2464 for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) { 2465 unsigned usage_mask = emit->info.input_usage_mask[i]; 2466 unsigned index = i; 2467 2468 if (usage_mask == 0) 2469 continue; /* register is not actually used */ 2470 2471 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 2472 VGPU10_OPERAND_TYPE_INPUT, 2473 VGPU10_OPERAND_INDEX_1D, index, 1, 2474 VGPU10_NAME_UNDEFINED, 2475 VGPU10_OPERAND_4_COMPONENT, 2476 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2477 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 2478 VGPU10_INTERPOLATION_UNDEFINED); 2479 } 2480 } 2481 2482 return TRUE; 2483 } 2484 2485 2486 /** 2487 * Emit all output declarations. 2488 */ 2489 static boolean 2490 emit_output_declarations(struct svga_shader_emitter_v10 *emit) 2491 { 2492 unsigned i; 2493 2494 for (i = 0; i < emit->info.num_outputs; i++) { 2495 /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/ 2496 const enum tgsi_semantic semantic_name = 2497 emit->info.output_semantic_name[i]; 2498 const unsigned semantic_index = emit->info.output_semantic_index[i]; 2499 unsigned index = i; 2500 2501 if (emit->unit == PIPE_SHADER_FRAGMENT) { 2502 if (semantic_name == TGSI_SEMANTIC_COLOR) { 2503 assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index)); 2504 2505 emit->fs.color_out_index[semantic_index] = index; 2506 2507 emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs, 2508 index + 1); 2509 2510 /* The semantic index is the shader's color output/buffer index */ 2511 emit_output_declaration(emit, 2512 VGPU10_OPCODE_DCL_OUTPUT, semantic_index, 2513 VGPU10_NAME_UNDEFINED, 2514 VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2515 2516 if (semantic_index == 0) { 2517 if (emit->key.fs.write_color0_to_n_cbufs > 1) { 2518 /* Emit declarations for the additional color outputs 2519 * for broadcasting. 2520 */ 2521 unsigned j; 2522 for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) { 2523 /* Allocate a new output index */ 2524 unsigned idx = emit->info.num_outputs + j - 1; 2525 emit->fs.color_out_index[j] = idx; 2526 emit_output_declaration(emit, 2527 VGPU10_OPCODE_DCL_OUTPUT, idx, 2528 VGPU10_NAME_UNDEFINED, 2529 VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2530 emit->info.output_semantic_index[idx] = j; 2531 } 2532 2533 emit->fs.num_color_outputs = 2534 emit->key.fs.write_color0_to_n_cbufs; 2535 } 2536 } 2537 else { 2538 assert(!emit->key.fs.write_color0_to_n_cbufs); 2539 } 2540 } 2541 else if (semantic_name == TGSI_SEMANTIC_POSITION) { 2542 /* Fragment depth output */ 2543 emit_fragdepth_output_declaration(emit); 2544 } 2545 else { 2546 assert(!"Bad output semantic name"); 2547 } 2548 } 2549 else { 2550 /* VS or GS */ 2551 unsigned name, type; 2552 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 2553 2554 switch (semantic_name) { 2555 case TGSI_SEMANTIC_POSITION: 2556 assert(emit->unit != PIPE_SHADER_FRAGMENT); 2557 type = VGPU10_OPCODE_DCL_OUTPUT_SIV; 2558 name = VGPU10_NAME_POSITION; 2559 /* Save the index of the vertex position output register */ 2560 emit->vposition.out_index = index; 2561 break; 2562 case TGSI_SEMANTIC_CLIPDIST: 2563 type = VGPU10_OPCODE_DCL_OUTPUT_SIV; 2564 name = VGPU10_NAME_CLIP_DISTANCE; 2565 /* save the starting index of the clip distance output register */ 2566 if (semantic_index == 0) 2567 emit->clip_dist_out_index = index; 2568 writemask = emit->output_usage_mask[index]; 2569 writemask = apply_clip_plane_mask(emit, writemask, semantic_index); 2570 if (writemask == 0x0) { 2571 continue; /* discard this do-nothing declaration */ 2572 } 2573 break; 2574 case TGSI_SEMANTIC_PRIMID: 2575 assert(emit->unit == PIPE_SHADER_GEOMETRY); 2576 type = VGPU10_OPCODE_DCL_OUTPUT_SGV; 2577 name = VGPU10_NAME_PRIMITIVE_ID; 2578 break; 2579 case TGSI_SEMANTIC_LAYER: 2580 assert(emit->unit == PIPE_SHADER_GEOMETRY); 2581 type = VGPU10_OPCODE_DCL_OUTPUT_SGV; 2582 name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX; 2583 break; 2584 case TGSI_SEMANTIC_CLIPVERTEX: 2585 type = VGPU10_OPCODE_DCL_OUTPUT; 2586 name = VGPU10_NAME_UNDEFINED; 2587 emit->clip_vertex_out_index = index; 2588 break; 2589 default: 2590 /* generic output */ 2591 type = VGPU10_OPCODE_DCL_OUTPUT; 2592 name = VGPU10_NAME_UNDEFINED; 2593 } 2594 2595 emit_output_declaration(emit, type, index, name, writemask); 2596 } 2597 } 2598 2599 if (emit->vposition.so_index != INVALID_INDEX && 2600 emit->vposition.out_index != INVALID_INDEX) { 2601 2602 assert(emit->unit != PIPE_SHADER_FRAGMENT); 2603 2604 /* Emit the declaration for the non-adjusted vertex position 2605 * for stream output purpose 2606 */ 2607 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 2608 emit->vposition.so_index, 2609 VGPU10_NAME_UNDEFINED, 2610 VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2611 } 2612 2613 if (emit->clip_dist_so_index != INVALID_INDEX && 2614 emit->clip_dist_out_index != INVALID_INDEX) { 2615 2616 assert(emit->unit != PIPE_SHADER_FRAGMENT); 2617 2618 /* Emit the declaration for the clip distance shadow copy which 2619 * will be used for stream output purpose and for clip distance 2620 * varying variable 2621 */ 2622 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 2623 emit->clip_dist_so_index, 2624 VGPU10_NAME_UNDEFINED, 2625 emit->output_usage_mask[emit->clip_dist_out_index]); 2626 2627 if (emit->info.num_written_clipdistance > 4) { 2628 /* for the second clip distance register, each handles 4 planes */ 2629 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 2630 emit->clip_dist_so_index + 1, 2631 VGPU10_NAME_UNDEFINED, 2632 emit->output_usage_mask[emit->clip_dist_out_index+1]); 2633 } 2634 } 2635 2636 return TRUE; 2637 } 2638 2639 2640 /** 2641 * Emit the declaration for the temporary registers. 2642 */ 2643 static boolean 2644 emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) 2645 { 2646 unsigned total_temps, reg, i; 2647 2648 total_temps = emit->num_shader_temps; 2649 2650 /* If there is indirect access to non-indexable temps in the shader, 2651 * convert those temps to indexable temps. This works around a bug 2652 * in the GLSL->TGSI translator exposed in piglit test 2653 * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test. 2654 * Internal temps added by the driver remain as non-indexable temps. 2655 */ 2656 if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) && 2657 emit->num_temp_arrays == 0) { 2658 unsigned arrayID; 2659 2660 arrayID = 1; 2661 emit->num_temp_arrays = arrayID + 1; 2662 emit->temp_arrays[arrayID].start = 0; 2663 emit->temp_arrays[arrayID].size = total_temps; 2664 2665 /* Fill in the temp_map entries for this temp array */ 2666 for (i = 0; i < total_temps; i++) { 2667 emit->temp_map[i].arrayId = arrayID; 2668 emit->temp_map[i].index = i; 2669 } 2670 } 2671 2672 /* Allocate extra temps for specially-implemented instructions, 2673 * such as LIT. 2674 */ 2675 total_temps += MAX_INTERNAL_TEMPS; 2676 2677 if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) { 2678 if (emit->vposition.need_prescale || emit->key.vs.undo_viewport || 2679 emit->key.clip_plane_enable || 2680 emit->vposition.so_index != INVALID_INDEX) { 2681 emit->vposition.tmp_index = total_temps; 2682 total_temps += 1; 2683 } 2684 2685 if (emit->unit == PIPE_SHADER_VERTEX) { 2686 unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 | 2687 emit->key.vs.adjust_attrib_itof | 2688 emit->key.vs.adjust_attrib_utof | 2689 emit->key.vs.attrib_is_bgra | 2690 emit->key.vs.attrib_puint_to_snorm | 2691 emit->key.vs.attrib_puint_to_uscaled | 2692 emit->key.vs.attrib_puint_to_sscaled); 2693 while (attrib_mask) { 2694 unsigned index = u_bit_scan(&attrib_mask); 2695 emit->vs.adjusted_input[index] = total_temps++; 2696 } 2697 } 2698 2699 if (emit->clip_mode == CLIP_DISTANCE) { 2700 /* We need to write the clip distance to a temporary register 2701 * first. Then it will be copied to the shadow copy for 2702 * the clip distance varying variable and stream output purpose. 2703 * It will also be copied to the actual CLIPDIST register 2704 * according to the enabled clip planes 2705 */ 2706 emit->clip_dist_tmp_index = total_temps++; 2707 if (emit->info.num_written_clipdistance > 4) 2708 total_temps++; /* second clip register */ 2709 } 2710 else if (emit->clip_mode == CLIP_VERTEX) { 2711 /* We need to convert the TGSI CLIPVERTEX output to one or more 2712 * clip distances. Allocate a temp reg for the clipvertex here. 2713 */ 2714 assert(emit->info.writes_clipvertex > 0); 2715 emit->clip_vertex_tmp_index = total_temps; 2716 total_temps++; 2717 } 2718 } 2719 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 2720 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS || 2721 emit->key.fs.write_color0_to_n_cbufs > 1) { 2722 /* Allocate a temp to hold the output color */ 2723 emit->fs.color_tmp_index = total_temps; 2724 total_temps += 1; 2725 } 2726 2727 if (emit->fs.face_input_index != INVALID_INDEX) { 2728 /* Allocate a temp for the +/-1 face register */ 2729 emit->fs.face_tmp_index = total_temps; 2730 total_temps += 1; 2731 } 2732 2733 if (emit->fs.fragcoord_input_index != INVALID_INDEX) { 2734 /* Allocate a temp for modified fragment position register */ 2735 emit->fs.fragcoord_tmp_index = total_temps; 2736 total_temps += 1; 2737 } 2738 } 2739 2740 for (i = 0; i < emit->num_address_regs; i++) { 2741 emit->address_reg_index[i] = total_temps++; 2742 } 2743 2744 /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10 2745 * temp indexes. Basically, we compact all the non-array temp register 2746 * indexes into a consecutive series. 2747 * 2748 * Before, we may have some TGSI declarations like: 2749 * DCL TEMP[0..1], LOCAL 2750 * DCL TEMP[2..4], ARRAY(1), LOCAL 2751 * DCL TEMP[5..7], ARRAY(2), LOCAL 2752 * plus, some extra temps, like TEMP[8], TEMP[9] for misc things 2753 * 2754 * After, we'll have a map like this: 2755 * temp_map[0] = { array 0, index 0 } 2756 * temp_map[1] = { array 0, index 1 } 2757 * temp_map[2] = { array 1, index 0 } 2758 * temp_map[3] = { array 1, index 1 } 2759 * temp_map[4] = { array 1, index 2 } 2760 * temp_map[5] = { array 2, index 0 } 2761 * temp_map[6] = { array 2, index 1 } 2762 * temp_map[7] = { array 2, index 2 } 2763 * temp_map[8] = { array 0, index 2 } 2764 * temp_map[9] = { array 0, index 3 } 2765 * 2766 * We'll declare two arrays of 3 elements, plus a set of four non-indexed 2767 * temps numbered 0..3 2768 * 2769 * Any time we emit a temporary register index, we'll have to use the 2770 * temp_map[] table to convert the TGSI index to the VGPU10 index. 2771 * 2772 * Finally, we recompute the total_temps value here. 2773 */ 2774 reg = 0; 2775 for (i = 0; i < total_temps; i++) { 2776 if (emit->temp_map[i].arrayId == 0) { 2777 emit->temp_map[i].index = reg++; 2778 } 2779 } 2780 2781 if (0) { 2782 debug_printf("total_temps %u\n", total_temps); 2783 for (i = 0; i < total_temps; i++) { 2784 debug_printf("temp %u -> array %u index %u\n", 2785 i, emit->temp_map[i].arrayId, emit->temp_map[i].index); 2786 } 2787 } 2788 2789 total_temps = reg; 2790 2791 /* Emit declaration of ordinary temp registers */ 2792 if (total_temps > 0) { 2793 VGPU10OpcodeToken0 opcode0; 2794 2795 opcode0.value = 0; 2796 opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS; 2797 2798 begin_emit_instruction(emit); 2799 emit_dword(emit, opcode0.value); 2800 emit_dword(emit, total_temps); 2801 end_emit_instruction(emit); 2802 } 2803 2804 /* Emit declarations for indexable temp arrays. Skip 0th entry since 2805 * it's unused. 2806 */ 2807 for (i = 1; i < emit->num_temp_arrays; i++) { 2808 unsigned num_temps = emit->temp_arrays[i].size; 2809 2810 if (num_temps > 0) { 2811 VGPU10OpcodeToken0 opcode0; 2812 2813 opcode0.value = 0; 2814 opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP; 2815 2816 begin_emit_instruction(emit); 2817 emit_dword(emit, opcode0.value); 2818 emit_dword(emit, i); /* which array */ 2819 emit_dword(emit, num_temps); 2820 emit_dword(emit, 4); /* num components */ 2821 end_emit_instruction(emit); 2822 2823 total_temps += num_temps; 2824 } 2825 } 2826 2827 /* Check that the grand total of all regular and indexed temps is 2828 * under the limit. 2829 */ 2830 check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1); 2831 2832 return TRUE; 2833 } 2834 2835 2836 static boolean 2837 emit_constant_declaration(struct svga_shader_emitter_v10 *emit) 2838 { 2839 VGPU10OpcodeToken0 opcode0; 2840 VGPU10OperandToken0 operand0; 2841 unsigned total_consts, i; 2842 2843 opcode0.value = 0; 2844 opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER; 2845 opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED; 2846 /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */ 2847 2848 operand0.value = 0; 2849 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 2850 operand0.indexDimension = VGPU10_OPERAND_INDEX_2D; 2851 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2852 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2853 operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; 2854 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 2855 operand0.swizzleX = 0; 2856 operand0.swizzleY = 1; 2857 operand0.swizzleZ = 2; 2858 operand0.swizzleW = 3; 2859 2860 /** 2861 * Emit declaration for constant buffer [0]. We also allocate 2862 * room for the extra constants here. 2863 */ 2864 total_consts = emit->num_shader_consts[0]; 2865 2866 /* Now, allocate constant slots for the "extra" constants. 2867 * Note: it's critical that these extra constant locations 2868 * exactly match what's emitted by the "extra" constants code 2869 * in svga_state_constants.c 2870 */ 2871 2872 /* Vertex position scale/translation */ 2873 if (emit->vposition.need_prescale) { 2874 emit->vposition.prescale_scale_index = total_consts++; 2875 emit->vposition.prescale_trans_index = total_consts++; 2876 } 2877 2878 if (emit->unit == PIPE_SHADER_VERTEX) { 2879 if (emit->key.vs.undo_viewport) { 2880 emit->vs.viewport_index = total_consts++; 2881 } 2882 } 2883 2884 /* user-defined clip planes */ 2885 if (emit->key.clip_plane_enable) { 2886 unsigned n = util_bitcount(emit->key.clip_plane_enable); 2887 assert(emit->unit == PIPE_SHADER_VERTEX || 2888 emit->unit == PIPE_SHADER_GEOMETRY); 2889 for (i = 0; i < n; i++) { 2890 emit->clip_plane_const[i] = total_consts++; 2891 } 2892 } 2893 2894 for (i = 0; i < emit->num_samplers; i++) { 2895 2896 if (emit->sampler_view[i]) { 2897 2898 /* Texcoord scale factors for RECT textures */ 2899 if (emit->key.tex[i].unnormalized) { 2900 emit->texcoord_scale_index[i] = total_consts++; 2901 } 2902 2903 /* Texture buffer sizes */ 2904 if (emit->sampler_target[i] == TGSI_TEXTURE_BUFFER) { 2905 emit->texture_buffer_size_index[i] = total_consts++; 2906 } 2907 } 2908 } 2909 2910 if (total_consts > 0) { 2911 begin_emit_instruction(emit); 2912 emit_dword(emit, opcode0.value); 2913 emit_dword(emit, operand0.value); 2914 emit_dword(emit, 0); /* which const buffer slot */ 2915 emit_dword(emit, total_consts); 2916 end_emit_instruction(emit); 2917 } 2918 2919 /* Declare remaining constant buffers (UBOs) */ 2920 for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) { 2921 if (emit->num_shader_consts[i] > 0) { 2922 begin_emit_instruction(emit); 2923 emit_dword(emit, opcode0.value); 2924 emit_dword(emit, operand0.value); 2925 emit_dword(emit, i); /* which const buffer slot */ 2926 emit_dword(emit, emit->num_shader_consts[i]); 2927 end_emit_instruction(emit); 2928 } 2929 } 2930 2931 return TRUE; 2932 } 2933 2934 2935 /** 2936 * Emit declarations for samplers. 2937 */ 2938 static boolean 2939 emit_sampler_declarations(struct svga_shader_emitter_v10 *emit) 2940 { 2941 unsigned i; 2942 2943 for (i = 0; i < emit->num_samplers; i++) { 2944 VGPU10OpcodeToken0 opcode0; 2945 VGPU10OperandToken0 operand0; 2946 2947 opcode0.value = 0; 2948 opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER; 2949 opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT; 2950 2951 operand0.value = 0; 2952 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 2953 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; 2954 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 2955 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2956 2957 begin_emit_instruction(emit); 2958 emit_dword(emit, opcode0.value); 2959 emit_dword(emit, operand0.value); 2960 emit_dword(emit, i); 2961 end_emit_instruction(emit); 2962 } 2963 2964 return TRUE; 2965 } 2966 2967 2968 /** 2969 * Translate TGSI_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x. 2970 */ 2971 static unsigned 2972 tgsi_texture_to_resource_dimension(enum tgsi_texture_type target, 2973 boolean is_array) 2974 { 2975 switch (target) { 2976 case TGSI_TEXTURE_BUFFER: 2977 return VGPU10_RESOURCE_DIMENSION_BUFFER; 2978 case TGSI_TEXTURE_1D: 2979 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 2980 case TGSI_TEXTURE_2D: 2981 case TGSI_TEXTURE_RECT: 2982 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2983 case TGSI_TEXTURE_3D: 2984 return VGPU10_RESOURCE_DIMENSION_TEXTURE3D; 2985 case TGSI_TEXTURE_CUBE: 2986 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; 2987 case TGSI_TEXTURE_SHADOW1D: 2988 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 2989 case TGSI_TEXTURE_SHADOW2D: 2990 case TGSI_TEXTURE_SHADOWRECT: 2991 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2992 case TGSI_TEXTURE_1D_ARRAY: 2993 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2994 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY 2995 : VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 2996 case TGSI_TEXTURE_2D_ARRAY: 2997 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2998 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY 2999 : VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 3000 case TGSI_TEXTURE_SHADOWCUBE: 3001 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; 3002 case TGSI_TEXTURE_2D_MSAA: 3003 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS; 3004 case TGSI_TEXTURE_2D_ARRAY_MSAA: 3005 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY 3006 : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS; 3007 case TGSI_TEXTURE_CUBE_ARRAY: 3008 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY; 3009 default: 3010 assert(!"Unexpected resource type"); 3011 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 3012 } 3013 } 3014 3015 3016 /** 3017 * Given a tgsi_return_type, return true iff it is an integer type. 3018 */ 3019 static boolean 3020 is_integer_type(enum tgsi_return_type type) 3021 { 3022 switch (type) { 3023 case TGSI_RETURN_TYPE_SINT: 3024 case TGSI_RETURN_TYPE_UINT: 3025 return TRUE; 3026 case TGSI_RETURN_TYPE_FLOAT: 3027 case TGSI_RETURN_TYPE_UNORM: 3028 case TGSI_RETURN_TYPE_SNORM: 3029 return FALSE; 3030 case TGSI_RETURN_TYPE_COUNT: 3031 default: 3032 assert(!"is_integer_type: Unknown tgsi_return_type"); 3033 return FALSE; 3034 } 3035 } 3036 3037 3038 /** 3039 * Emit declarations for resources. 3040 * XXX When we're sure that all TGSI shaders will be generated with 3041 * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may 3042 * rework this code. 3043 */ 3044 static boolean 3045 emit_resource_declarations(struct svga_shader_emitter_v10 *emit) 3046 { 3047 unsigned i; 3048 3049 /* Emit resource decl for each sampler */ 3050 for (i = 0; i < emit->num_samplers; i++) { 3051 VGPU10OpcodeToken0 opcode0; 3052 VGPU10OperandToken0 operand0; 3053 VGPU10ResourceReturnTypeToken return_type; 3054 VGPU10_RESOURCE_RETURN_TYPE rt; 3055 3056 opcode0.value = 0; 3057 opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE; 3058 opcode0.resourceDimension = 3059 tgsi_texture_to_resource_dimension(emit->sampler_target[i], 3060 emit->key.tex[i].is_array); 3061 operand0.value = 0; 3062 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 3063 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; 3064 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 3065 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 3066 3067 #if 1 3068 /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */ 3069 STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1); 3070 STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1); 3071 STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1); 3072 STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1); 3073 STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1); 3074 assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT); 3075 rt = emit->sampler_return_type[i] + 1; 3076 #else 3077 switch (emit->sampler_return_type[i]) { 3078 case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break; 3079 case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break; 3080 case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break; 3081 case TGSI_RETURN_TYPE_UINT: rt = VGPU10_RETURN_TYPE_UINT; break; 3082 case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break; 3083 case TGSI_RETURN_TYPE_COUNT: 3084 default: 3085 rt = VGPU10_RETURN_TYPE_FLOAT; 3086 assert(!"emit_resource_declarations: Unknown tgsi_return_type"); 3087 } 3088 #endif 3089 3090 return_type.value = 0; 3091 return_type.component0 = rt; 3092 return_type.component1 = rt; 3093 return_type.component2 = rt; 3094 return_type.component3 = rt; 3095 3096 begin_emit_instruction(emit); 3097 emit_dword(emit, opcode0.value); 3098 emit_dword(emit, operand0.value); 3099 emit_dword(emit, i); 3100 emit_dword(emit, return_type.value); 3101 end_emit_instruction(emit); 3102 } 3103 3104 return TRUE; 3105 } 3106 3107 static void 3108 emit_instruction_op1(struct svga_shader_emitter_v10 *emit, 3109 unsigned opcode, 3110 const struct tgsi_full_dst_register *dst, 3111 const struct tgsi_full_src_register *src, 3112 boolean saturate) 3113 { 3114 begin_emit_instruction(emit); 3115 emit_opcode(emit, opcode, saturate); 3116 emit_dst_register(emit, dst); 3117 emit_src_register(emit, src); 3118 end_emit_instruction(emit); 3119 } 3120 3121 static void 3122 emit_instruction_op2(struct svga_shader_emitter_v10 *emit, 3123 unsigned opcode, 3124 const struct tgsi_full_dst_register *dst, 3125 const struct tgsi_full_src_register *src1, 3126 const struct tgsi_full_src_register *src2, 3127 boolean saturate) 3128 { 3129 begin_emit_instruction(emit); 3130 emit_opcode(emit, opcode, saturate); 3131 emit_dst_register(emit, dst); 3132 emit_src_register(emit, src1); 3133 emit_src_register(emit, src2); 3134 end_emit_instruction(emit); 3135 } 3136 3137 static void 3138 emit_instruction_op3(struct svga_shader_emitter_v10 *emit, 3139 unsigned opcode, 3140 const struct tgsi_full_dst_register *dst, 3141 const struct tgsi_full_src_register *src1, 3142 const struct tgsi_full_src_register *src2, 3143 const struct tgsi_full_src_register *src3, 3144 boolean saturate) 3145 { 3146 begin_emit_instruction(emit); 3147 emit_opcode(emit, opcode, saturate); 3148 emit_dst_register(emit, dst); 3149 emit_src_register(emit, src1); 3150 emit_src_register(emit, src2); 3151 emit_src_register(emit, src3); 3152 end_emit_instruction(emit); 3153 } 3154 3155 /** 3156 * Emit the actual clip distance instructions to be used for clipping 3157 * by copying the clip distance from the temporary registers to the 3158 * CLIPDIST registers written with the enabled planes mask. 3159 * Also copy the clip distance from the temporary to the clip distance 3160 * shadow copy register which will be referenced by the input shader 3161 */ 3162 static void 3163 emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit) 3164 { 3165 struct tgsi_full_src_register tmp_clip_dist_src; 3166 struct tgsi_full_dst_register clip_dist_dst; 3167 3168 unsigned i; 3169 unsigned clip_plane_enable = emit->key.clip_plane_enable; 3170 unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index; 3171 int num_written_clipdist = emit->info.num_written_clipdistance; 3172 3173 assert(emit->clip_dist_out_index != INVALID_INDEX); 3174 assert(emit->clip_dist_tmp_index != INVALID_INDEX); 3175 3176 /** 3177 * Temporary reset the temporary clip dist register index so 3178 * that the copy to the real clip dist register will not 3179 * attempt to copy to the temporary register again 3180 */ 3181 emit->clip_dist_tmp_index = INVALID_INDEX; 3182 3183 for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) { 3184 3185 tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i); 3186 3187 /** 3188 * copy to the shadow copy for use by varying variable and 3189 * stream output. All clip distances 3190 * will be written regardless of the enabled clipping planes. 3191 */ 3192 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, 3193 emit->clip_dist_so_index + i); 3194 3195 /* MOV clip_dist_so, tmp_clip_dist */ 3196 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, 3197 &tmp_clip_dist_src, FALSE); 3198 3199 /** 3200 * copy those clip distances to enabled clipping planes 3201 * to CLIPDIST registers for clipping 3202 */ 3203 if (clip_plane_enable & 0xf) { 3204 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, 3205 emit->clip_dist_out_index + i); 3206 clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf); 3207 3208 /* MOV CLIPDIST, tmp_clip_dist */ 3209 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, 3210 &tmp_clip_dist_src, FALSE); 3211 } 3212 /* four clip planes per clip register */ 3213 clip_plane_enable >>= 4; 3214 } 3215 /** 3216 * set the temporary clip dist register index back to the 3217 * temporary index for the next vertex 3218 */ 3219 emit->clip_dist_tmp_index = clip_dist_tmp_index; 3220 } 3221 3222 /* Declare clip distance output registers for user-defined clip planes 3223 * or the TGSI_CLIPVERTEX output. 3224 */ 3225 static void 3226 emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit) 3227 { 3228 unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable); 3229 unsigned index = emit->num_outputs; 3230 unsigned plane_mask; 3231 3232 assert(emit->unit == PIPE_SHADER_VERTEX || 3233 emit->unit == PIPE_SHADER_GEOMETRY); 3234 assert(num_clip_planes <= 8); 3235 3236 if (emit->clip_mode != CLIP_LEGACY && 3237 emit->clip_mode != CLIP_VERTEX) { 3238 return; 3239 } 3240 3241 if (num_clip_planes == 0) 3242 return; 3243 3244 /* Declare one or two clip output registers. The number of components 3245 * in the mask reflects the number of clip planes. For example, if 5 3246 * clip planes are needed, we'll declare outputs similar to: 3247 * dcl_output_siv o2.xyzw, clip_distance 3248 * dcl_output_siv o3.x, clip_distance 3249 */ 3250 emit->clip_dist_out_index = index; /* save the starting clip dist reg index */ 3251 3252 plane_mask = (1 << num_clip_planes) - 1; 3253 if (plane_mask & 0xf) { 3254 unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 3255 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index, 3256 VGPU10_NAME_CLIP_DISTANCE, cmask); 3257 emit->num_outputs++; 3258 } 3259 if (plane_mask & 0xf0) { 3260 unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 3261 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1, 3262 VGPU10_NAME_CLIP_DISTANCE, cmask); 3263 emit->num_outputs++; 3264 } 3265 } 3266 3267 3268 /** 3269 * Emit the instructions for writing to the clip distance registers 3270 * to handle legacy/automatic clip planes. 3271 * For each clip plane, the distance is the dot product of the vertex 3272 * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients. 3273 * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE 3274 * output registers already declared. 3275 */ 3276 static void 3277 emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit, 3278 unsigned vpos_tmp_index) 3279 { 3280 unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable); 3281 3282 assert(emit->clip_mode == CLIP_LEGACY); 3283 assert(num_clip_planes <= 8); 3284 3285 assert(emit->unit == PIPE_SHADER_VERTEX || 3286 emit->unit == PIPE_SHADER_GEOMETRY); 3287 3288 for (i = 0; i < num_clip_planes; i++) { 3289 struct tgsi_full_dst_register dst; 3290 struct tgsi_full_src_register plane_src, vpos_src; 3291 unsigned reg_index = emit->clip_dist_out_index + i / 4; 3292 unsigned comp = i % 4; 3293 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; 3294 3295 /* create dst, src regs */ 3296 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); 3297 dst = writemask_dst(&dst, writemask); 3298 3299 plane_src = make_src_const_reg(emit->clip_plane_const[i]); 3300 vpos_src = make_src_temp_reg(vpos_tmp_index); 3301 3302 /* DP4 clip_dist, plane, vpos */ 3303 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, 3304 &plane_src, &vpos_src, FALSE); 3305 } 3306 } 3307 3308 3309 /** 3310 * Emit the instructions for computing the clip distance results from 3311 * the clip vertex temporary. 3312 * For each clip plane, the distance is the dot product of the clip vertex 3313 * position (found in a temp reg) and the clip plane coefficients. 3314 */ 3315 static void 3316 emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit) 3317 { 3318 const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable); 3319 unsigned i; 3320 struct tgsi_full_dst_register dst; 3321 struct tgsi_full_src_register clipvert_src; 3322 const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index; 3323 3324 assert(emit->unit == PIPE_SHADER_VERTEX || 3325 emit->unit == PIPE_SHADER_GEOMETRY); 3326 3327 assert(emit->clip_mode == CLIP_VERTEX); 3328 3329 clipvert_src = make_src_temp_reg(clip_vertex_tmp); 3330 3331 for (i = 0; i < num_clip; i++) { 3332 struct tgsi_full_src_register plane_src; 3333 unsigned reg_index = emit->clip_dist_out_index + i / 4; 3334 unsigned comp = i % 4; 3335 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; 3336 3337 /* create dst, src regs */ 3338 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); 3339 dst = writemask_dst(&dst, writemask); 3340 3341 plane_src = make_src_const_reg(emit->clip_plane_const[i]); 3342 3343 /* DP4 clip_dist, plane, vpos */ 3344 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, 3345 &plane_src, &clipvert_src, FALSE); 3346 } 3347 3348 /* copy temporary clip vertex register to the clip vertex register */ 3349 3350 assert(emit->clip_vertex_out_index != INVALID_INDEX); 3351 3352 /** 3353 * temporary reset the temporary clip vertex register index so 3354 * that copy to the clip vertex register will not attempt 3355 * to copy to the temporary register again 3356 */ 3357 emit->clip_vertex_tmp_index = INVALID_INDEX; 3358 3359 /* MOV clip_vertex, clip_vertex_tmp */ 3360 dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index); 3361 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 3362 &dst, &clipvert_src, FALSE); 3363 3364 /** 3365 * set the temporary clip vertex register index back to the 3366 * temporary index for the next vertex 3367 */ 3368 emit->clip_vertex_tmp_index = clip_vertex_tmp; 3369 } 3370 3371 /** 3372 * Emit code to convert RGBA to BGRA 3373 */ 3374 static void 3375 emit_swap_r_b(struct svga_shader_emitter_v10 *emit, 3376 const struct tgsi_full_dst_register *dst, 3377 const struct tgsi_full_src_register *src) 3378 { 3379 struct tgsi_full_src_register bgra_src = 3380 swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W); 3381 3382 begin_emit_instruction(emit); 3383 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 3384 emit_dst_register(emit, dst); 3385 emit_src_register(emit, &bgra_src); 3386 end_emit_instruction(emit); 3387 } 3388 3389 3390 /** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */ 3391 static void 3392 emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit, 3393 const struct tgsi_full_dst_register *dst, 3394 const struct tgsi_full_src_register *src) 3395 { 3396 struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f); 3397 struct tgsi_full_src_register two = 3398 make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f); 3399 struct tgsi_full_src_register neg_two = 3400 make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f); 3401 3402 unsigned val_tmp = get_temp_index(emit); 3403 struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp); 3404 struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp); 3405 3406 unsigned bias_tmp = get_temp_index(emit); 3407 struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp); 3408 struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp); 3409 3410 /* val = src * 2.0 */ 3411 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, 3412 src, &two, FALSE); 3413 3414 /* bias = src > 0.5 */ 3415 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, 3416 src, &half, FALSE); 3417 3418 /* bias = bias & -2.0 */ 3419 emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst, 3420 &bias_src, &neg_two, FALSE); 3421 3422 /* dst = val + bias */ 3423 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst, 3424 &val_src, &bias_src, FALSE); 3425 3426 free_temp_indexes(emit); 3427 } 3428 3429 3430 /** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */ 3431 static void 3432 emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit, 3433 const struct tgsi_full_dst_register *dst, 3434 const struct tgsi_full_src_register *src) 3435 { 3436 struct tgsi_full_src_register scale = 3437 make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f); 3438 3439 /* dst = src * scale */ 3440 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale, FALSE); 3441 } 3442 3443 3444 /** Convert from R32_UINT to 10_10_10_2_sscaled */ 3445 static void 3446 emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit, 3447 const struct tgsi_full_dst_register *dst, 3448 const struct tgsi_full_src_register *src) 3449 { 3450 struct tgsi_full_src_register lshift = 3451 make_immediate_reg_int4(emit, 22, 12, 2, 0); 3452 struct tgsi_full_src_register rshift = 3453 make_immediate_reg_int4(emit, 22, 22, 22, 30); 3454 3455 struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X); 3456 3457 unsigned tmp = get_temp_index(emit); 3458 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3459 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3460 3461 /* 3462 * r = (pixel << 22) >> 22; # signed int in [511, -512] 3463 * g = (pixel << 12) >> 22; # signed int in [511, -512] 3464 * b = (pixel << 2) >> 22; # signed int in [511, -512] 3465 * a = (pixel << 0) >> 30; # signed int in [1, -2] 3466 * dst = i_to_f(r,g,b,a); # convert to float 3467 */ 3468 emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst, 3469 &src_xxxx, &lshift, FALSE); 3470 emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst, 3471 &tmp_src, &rshift, FALSE); 3472 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src, FALSE); 3473 3474 free_temp_indexes(emit); 3475 } 3476 3477 3478 /** 3479 * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction. 3480 */ 3481 static boolean 3482 emit_arl_uarl(struct svga_shader_emitter_v10 *emit, 3483 const struct tgsi_full_instruction *inst) 3484 { 3485 unsigned index = inst->Dst[0].Register.Index; 3486 struct tgsi_full_dst_register dst; 3487 unsigned opcode; 3488 3489 assert(index < MAX_VGPU10_ADDR_REGS); 3490 dst = make_dst_temp_reg(emit->address_reg_index[index]); 3491 3492 /* ARL dst, s0 3493 * Translates into: 3494 * FTOI address_tmp, s0 3495 * 3496 * UARL dst, s0 3497 * Translates into: 3498 * MOV address_tmp, s0 3499 */ 3500 if (inst->Instruction.Opcode == TGSI_OPCODE_ARL) 3501 opcode = VGPU10_OPCODE_FTOI; 3502 else 3503 opcode = VGPU10_OPCODE_MOV; 3504 3505 emit_instruction_op1(emit, opcode, &dst, &inst->Src[0], FALSE); 3506 3507 return TRUE; 3508 } 3509 3510 3511 /** 3512 * Emit code for TGSI_OPCODE_CAL instruction. 3513 */ 3514 static boolean 3515 emit_cal(struct svga_shader_emitter_v10 *emit, 3516 const struct tgsi_full_instruction *inst) 3517 { 3518 unsigned label = inst->Label.Label; 3519 VGPU10OperandToken0 operand; 3520 operand.value = 0; 3521 operand.operandType = VGPU10_OPERAND_TYPE_LABEL; 3522 3523 begin_emit_instruction(emit); 3524 emit_dword(emit, operand.value); 3525 emit_dword(emit, label); 3526 end_emit_instruction(emit); 3527 3528 return TRUE; 3529 } 3530 3531 3532 /** 3533 * Emit code for TGSI_OPCODE_IABS instruction. 3534 */ 3535 static boolean 3536 emit_iabs(struct svga_shader_emitter_v10 *emit, 3537 const struct tgsi_full_instruction *inst) 3538 { 3539 /* dst.x = (src0.x < 0) ? -src0.x : src0.x 3540 * dst.y = (src0.y < 0) ? -src0.y : src0.y 3541 * dst.z = (src0.z < 0) ? -src0.z : src0.z 3542 * dst.w = (src0.w < 0) ? -src0.w : src0.w 3543 * 3544 * Translates into 3545 * IMAX dst, src, neg(src) 3546 */ 3547 struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]); 3548 emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0], 3549 &inst->Src[0], &neg_src, FALSE); 3550 3551 return TRUE; 3552 } 3553 3554 3555 /** 3556 * Emit code for TGSI_OPCODE_CMP instruction. 3557 */ 3558 static boolean 3559 emit_cmp(struct svga_shader_emitter_v10 *emit, 3560 const struct tgsi_full_instruction *inst) 3561 { 3562 /* dst.x = (src0.x < 0) ? src1.x : src2.x 3563 * dst.y = (src0.y < 0) ? src1.y : src2.y 3564 * dst.z = (src0.z < 0) ? src1.z : src2.z 3565 * dst.w = (src0.w < 0) ? src1.w : src2.w 3566 * 3567 * Translates into 3568 * LT tmp, src0, 0.0 3569 * MOVC dst, tmp, src1, src2 3570 */ 3571 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 3572 unsigned tmp = get_temp_index(emit); 3573 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3574 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3575 3576 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, 3577 &inst->Src[0], &zero, FALSE); 3578 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], 3579 &tmp_src, &inst->Src[1], &inst->Src[2], 3580 inst->Instruction.Saturate); 3581 3582 free_temp_indexes(emit); 3583 3584 return TRUE; 3585 } 3586 3587 3588 /** 3589 * Emit code for TGSI_OPCODE_DST instruction. 3590 */ 3591 static boolean 3592 emit_dst(struct svga_shader_emitter_v10 *emit, 3593 const struct tgsi_full_instruction *inst) 3594 { 3595 /* 3596 * dst.x = 1 3597 * dst.y = src0.y * src1.y 3598 * dst.z = src0.z 3599 * dst.w = src1.w 3600 */ 3601 3602 struct tgsi_full_src_register s0_yyyy = 3603 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 3604 struct tgsi_full_src_register s0_zzzz = 3605 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z); 3606 struct tgsi_full_src_register s1_yyyy = 3607 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); 3608 struct tgsi_full_src_register s1_wwww = 3609 scalar_src(&inst->Src[1], TGSI_SWIZZLE_W); 3610 3611 /* 3612 * If dst and either src0 and src1 are the same we need 3613 * to create a temporary for it and insert a extra move. 3614 */ 3615 unsigned tmp_move = get_temp_index(emit); 3616 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 3617 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 3618 3619 /* MOV dst.x, 1.0 */ 3620 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3621 struct tgsi_full_dst_register dst_x = 3622 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 3623 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 3624 3625 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE); 3626 } 3627 3628 /* MUL dst.y, s0.y, s1.y */ 3629 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3630 struct tgsi_full_dst_register dst_y = 3631 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 3632 3633 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy, 3634 &s1_yyyy, inst->Instruction.Saturate); 3635 } 3636 3637 /* MOV dst.z, s0.z */ 3638 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3639 struct tgsi_full_dst_register dst_z = 3640 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 3641 3642 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, &s0_zzzz, 3643 inst->Instruction.Saturate); 3644 } 3645 3646 /* MOV dst.w, s1.w */ 3647 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3648 struct tgsi_full_dst_register dst_w = 3649 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 3650 3651 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &s1_wwww, 3652 inst->Instruction.Saturate); 3653 } 3654 3655 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, 3656 FALSE); 3657 free_temp_indexes(emit); 3658 3659 return TRUE; 3660 } 3661 3662 3663 3664 /** 3665 * Emit code for TGSI_OPCODE_ENDPRIM (GS only) 3666 */ 3667 static boolean 3668 emit_endprim(struct svga_shader_emitter_v10 *emit, 3669 const struct tgsi_full_instruction *inst) 3670 { 3671 assert(emit->unit == PIPE_SHADER_GEOMETRY); 3672 3673 /* We can't use emit_simple() because the TGSI instruction has one 3674 * operand (vertex stream number) which we must ignore for VGPU10. 3675 */ 3676 begin_emit_instruction(emit); 3677 emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE); 3678 end_emit_instruction(emit); 3679 return TRUE; 3680 } 3681 3682 3683 /** 3684 * Emit code for TGSI_OPCODE_EX2 (2^x) instruction. 3685 */ 3686 static boolean 3687 emit_ex2(struct svga_shader_emitter_v10 *emit, 3688 const struct tgsi_full_instruction *inst) 3689 { 3690 /* Note that TGSI_OPCODE_EX2 computes only one value from src.x 3691 * while VGPU10 computes four values. 3692 * 3693 * dst = EX2(src): 3694 * dst.xyzw = 2.0 ^ src.x 3695 */ 3696 3697 struct tgsi_full_src_register src_xxxx = 3698 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 3699 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 3700 3701 /* EXP tmp, s0.xxxx */ 3702 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx, 3703 inst->Instruction.Saturate); 3704 3705 return TRUE; 3706 } 3707 3708 3709 /** 3710 * Emit code for TGSI_OPCODE_EXP instruction. 3711 */ 3712 static boolean 3713 emit_exp(struct svga_shader_emitter_v10 *emit, 3714 const struct tgsi_full_instruction *inst) 3715 { 3716 /* 3717 * dst.x = 2 ^ floor(s0.x) 3718 * dst.y = s0.x - floor(s0.x) 3719 * dst.z = 2 ^ s0.x 3720 * dst.w = 1.0 3721 */ 3722 3723 struct tgsi_full_src_register src_xxxx = 3724 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 3725 unsigned tmp = get_temp_index(emit); 3726 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3727 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3728 3729 /* 3730 * If dst and src are the same we need to create 3731 * a temporary for it and insert a extra move. 3732 */ 3733 unsigned tmp_move = get_temp_index(emit); 3734 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 3735 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 3736 3737 /* only use X component of temp reg */ 3738 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 3739 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); 3740 3741 /* ROUND_NI tmp.x, s0.x */ 3742 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, 3743 &src_xxxx, FALSE); /* round to -infinity */ 3744 3745 /* EXP dst.x, tmp.x */ 3746 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3747 struct tgsi_full_dst_register dst_x = 3748 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 3749 3750 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src, 3751 inst->Instruction.Saturate); 3752 } 3753 3754 /* ADD dst.y, s0.x, -tmp */ 3755 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3756 struct tgsi_full_dst_register dst_y = 3757 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 3758 struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src); 3759 3760 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx, 3761 &neg_tmp_src, inst->Instruction.Saturate); 3762 } 3763 3764 /* EXP dst.z, s0.x */ 3765 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3766 struct tgsi_full_dst_register dst_z = 3767 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 3768 3769 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx, 3770 inst->Instruction.Saturate); 3771 } 3772 3773 /* MOV dst.w, 1.0 */ 3774 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3775 struct tgsi_full_dst_register dst_w = 3776 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 3777 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 3778 3779 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, 3780 FALSE); 3781 } 3782 3783 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, 3784 FALSE); 3785 3786 free_temp_indexes(emit); 3787 3788 return TRUE; 3789 } 3790 3791 3792 /** 3793 * Emit code for TGSI_OPCODE_IF instruction. 3794 */ 3795 static boolean 3796 emit_if(struct svga_shader_emitter_v10 *emit, 3797 const struct tgsi_full_instruction *inst) 3798 { 3799 VGPU10OpcodeToken0 opcode0; 3800 3801 /* The src register should be a scalar */ 3802 assert(inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleY && 3803 inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleZ && 3804 inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleW); 3805 3806 /* The only special thing here is that we need to set the 3807 * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if 3808 * src.x is non-zero. 3809 */ 3810 opcode0.value = 0; 3811 opcode0.opcodeType = VGPU10_OPCODE_IF; 3812 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; 3813 3814 begin_emit_instruction(emit); 3815 emit_dword(emit, opcode0.value); 3816 emit_src_register(emit, &inst->Src[0]); 3817 end_emit_instruction(emit); 3818 3819 return TRUE; 3820 } 3821 3822 3823 /** 3824 * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of 3825 * the register components are negative). 3826 */ 3827 static boolean 3828 emit_kill_if(struct svga_shader_emitter_v10 *emit, 3829 const struct tgsi_full_instruction *inst) 3830 { 3831 unsigned tmp = get_temp_index(emit); 3832 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3833 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3834 3835 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 3836 3837 struct tgsi_full_dst_register tmp_dst_x = 3838 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 3839 struct tgsi_full_src_register tmp_src_xxxx = 3840 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 3841 3842 /* tmp = src[0] < 0.0 */ 3843 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], 3844 &zero, FALSE); 3845 3846 if (!same_swizzle_terms(&inst->Src[0])) { 3847 /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to 3848 * logically OR the swizzle terms. Most uses of KILL_IF only 3849 * test one channel so it's good to avoid these extra steps. 3850 */ 3851 struct tgsi_full_src_register tmp_src_yyyy = 3852 scalar_src(&tmp_src, TGSI_SWIZZLE_Y); 3853 struct tgsi_full_src_register tmp_src_zzzz = 3854 scalar_src(&tmp_src, TGSI_SWIZZLE_Z); 3855 struct tgsi_full_src_register tmp_src_wwww = 3856 scalar_src(&tmp_src, TGSI_SWIZZLE_W); 3857 3858 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 3859 &tmp_src_yyyy, FALSE); 3860 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 3861 &tmp_src_zzzz, FALSE); 3862 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 3863 &tmp_src_wwww, FALSE); 3864 } 3865 3866 begin_emit_instruction(emit); 3867 emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */ 3868 emit_src_register(emit, &tmp_src_xxxx); 3869 end_emit_instruction(emit); 3870 3871 free_temp_indexes(emit); 3872 3873 return TRUE; 3874 } 3875 3876 3877 /** 3878 * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard). 3879 */ 3880 static boolean 3881 emit_kill(struct svga_shader_emitter_v10 *emit, 3882 const struct tgsi_full_instruction *inst) 3883 { 3884 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 3885 3886 /* DISCARD if 0.0 is zero */ 3887 begin_emit_instruction(emit); 3888 emit_discard_opcode(emit, FALSE); 3889 emit_src_register(emit, &zero); 3890 end_emit_instruction(emit); 3891 3892 return TRUE; 3893 } 3894 3895 3896 /** 3897 * Emit code for TGSI_OPCODE_LG2 instruction. 3898 */ 3899 static boolean 3900 emit_lg2(struct svga_shader_emitter_v10 *emit, 3901 const struct tgsi_full_instruction *inst) 3902 { 3903 /* Note that TGSI_OPCODE_LG2 computes only one value from src.x 3904 * while VGPU10 computes four values. 3905 * 3906 * dst = LG2(src): 3907 * dst.xyzw = log2(src.x) 3908 */ 3909 3910 struct tgsi_full_src_register src_xxxx = 3911 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 3912 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 3913 3914 /* LOG tmp, s0.xxxx */ 3915 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &inst->Dst[0], &src_xxxx, 3916 inst->Instruction.Saturate); 3917 3918 return TRUE; 3919 } 3920 3921 3922 /** 3923 * Emit code for TGSI_OPCODE_LIT instruction. 3924 */ 3925 static boolean 3926 emit_lit(struct svga_shader_emitter_v10 *emit, 3927 const struct tgsi_full_instruction *inst) 3928 { 3929 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 3930 3931 /* 3932 * If dst and src are the same we need to create 3933 * a temporary for it and insert a extra move. 3934 */ 3935 unsigned tmp_move = get_temp_index(emit); 3936 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 3937 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 3938 3939 /* 3940 * dst.x = 1 3941 * dst.y = max(src.x, 0) 3942 * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0 3943 * dst.w = 1 3944 */ 3945 3946 /* MOV dst.x, 1.0 */ 3947 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3948 struct tgsi_full_dst_register dst_x = 3949 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 3950 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE); 3951 } 3952 3953 /* MOV dst.w, 1.0 */ 3954 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3955 struct tgsi_full_dst_register dst_w = 3956 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 3957 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE); 3958 } 3959 3960 /* MAX dst.y, src.x, 0.0 */ 3961 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3962 struct tgsi_full_dst_register dst_y = 3963 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 3964 struct tgsi_full_src_register zero = 3965 make_immediate_reg_float(emit, 0.0f); 3966 struct tgsi_full_src_register src_xxxx = 3967 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 3968 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 3969 3970 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx, 3971 &zero, inst->Instruction.Saturate); 3972 } 3973 3974 /* 3975 * tmp1 = clamp(src.w, -128, 128); 3976 * MAX tmp1, src.w, -128 3977 * MIN tmp1, tmp1, 128 3978 * 3979 * tmp2 = max(tmp2, 0); 3980 * MAX tmp2, src.y, 0 3981 * 3982 * tmp1 = pow(tmp2, tmp1); 3983 * LOG tmp2, tmp2 3984 * MUL tmp1, tmp2, tmp1 3985 * EXP tmp1, tmp1 3986 * 3987 * tmp1 = (src.w == 0) ? 1 : tmp1; 3988 * EQ tmp2, 0, src.w 3989 * MOVC tmp1, tmp2, 1.0, tmp1 3990 * 3991 * dst.z = (0 < src.x) ? tmp1 : 0; 3992 * LT tmp2, 0, src.x 3993 * MOVC dst.z, tmp2, tmp1, 0.0 3994 */ 3995 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3996 struct tgsi_full_dst_register dst_z = 3997 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 3998 3999 unsigned tmp1 = get_temp_index(emit); 4000 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 4001 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 4002 unsigned tmp2 = get_temp_index(emit); 4003 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 4004 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 4005 4006 struct tgsi_full_src_register src_xxxx = 4007 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 4008 struct tgsi_full_src_register src_yyyy = 4009 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 4010 struct tgsi_full_src_register src_wwww = 4011 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 4012 4013 struct tgsi_full_src_register zero = 4014 make_immediate_reg_float(emit, 0.0f); 4015 struct tgsi_full_src_register lowerbound = 4016 make_immediate_reg_float(emit, -128.0f); 4017 struct tgsi_full_src_register upperbound = 4018 make_immediate_reg_float(emit, 128.0f); 4019 4020 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww, 4021 &lowerbound, FALSE); 4022 emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src, 4023 &upperbound, FALSE); 4024 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy, 4025 &zero, FALSE); 4026 4027 /* POW tmp1, tmp2, tmp1 */ 4028 /* LOG tmp2, tmp2 */ 4029 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src, 4030 FALSE); 4031 4032 /* MUL tmp1, tmp2, tmp1 */ 4033 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src, 4034 &tmp1_src, FALSE); 4035 4036 /* EXP tmp1, tmp1 */ 4037 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src, 4038 FALSE); 4039 4040 /* EQ tmp2, 0, src.w */ 4041 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, 4042 &src_wwww, FALSE); 4043 /* MOVC tmp1.z, tmp2, tmp1, 1.0 */ 4044 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst, 4045 &tmp2_src, &one, &tmp1_src, FALSE); 4046 4047 /* LT tmp2, 0, src.x */ 4048 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, 4049 &src_xxxx, FALSE); 4050 /* MOVC dst.z, tmp2, tmp1, 0.0 */ 4051 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z, 4052 &tmp2_src, &tmp1_src, &zero, FALSE); 4053 } 4054 4055 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, 4056 FALSE); 4057 free_temp_indexes(emit); 4058 4059 return TRUE; 4060 } 4061 4062 4063 /** 4064 * Emit code for TGSI_OPCODE_LOG instruction. 4065 */ 4066 static boolean 4067 emit_log(struct svga_shader_emitter_v10 *emit, 4068 const struct tgsi_full_instruction *inst) 4069 { 4070 /* 4071 * dst.x = floor(lg2(abs(s0.x))) 4072 * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x)))) 4073 * dst.z = lg2(abs(s0.x)) 4074 * dst.w = 1.0 4075 */ 4076 4077 struct tgsi_full_src_register src_xxxx = 4078 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 4079 unsigned tmp = get_temp_index(emit); 4080 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4081 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4082 struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx); 4083 4084 /* only use X component of temp reg */ 4085 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4086 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4087 4088 /* LOG tmp.x, abs(s0.x) */ 4089 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { 4090 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, 4091 &abs_src_xxxx, FALSE); 4092 } 4093 4094 /* MOV dst.z, tmp.x */ 4095 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 4096 struct tgsi_full_dst_register dst_z = 4097 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z); 4098 4099 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, 4100 &tmp_src, inst->Instruction.Saturate); 4101 } 4102 4103 /* FLR tmp.x, tmp.x */ 4104 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { 4105 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, 4106 &tmp_src, FALSE); 4107 } 4108 4109 /* MOV dst.x, tmp.x */ 4110 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 4111 struct tgsi_full_dst_register dst_x = 4112 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); 4113 4114 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &tmp_src, 4115 inst->Instruction.Saturate); 4116 } 4117 4118 /* EXP tmp.x, tmp.x */ 4119 /* DIV dst.y, abs(s0.x), tmp.x */ 4120 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 4121 struct tgsi_full_dst_register dst_y = 4122 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); 4123 4124 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src, 4125 FALSE); 4126 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx, 4127 &tmp_src, inst->Instruction.Saturate); 4128 } 4129 4130 /* MOV dst.w, 1.0 */ 4131 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 4132 struct tgsi_full_dst_register dst_w = 4133 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W); 4134 struct tgsi_full_src_register one = 4135 make_immediate_reg_float(emit, 1.0f); 4136 4137 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE); 4138 } 4139 4140 free_temp_indexes(emit); 4141 4142 return TRUE; 4143 } 4144 4145 4146 /** 4147 * Emit code for TGSI_OPCODE_LRP instruction. 4148 */ 4149 static boolean 4150 emit_lrp(struct svga_shader_emitter_v10 *emit, 4151 const struct tgsi_full_instruction *inst) 4152 { 4153 /* dst = LRP(s0, s1, s2): 4154 * dst = s0 * (s1 - s2) + s2 4155 * Translates into: 4156 * SUB tmp, s1, s2; tmp = s1 - s2 4157 * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2 4158 */ 4159 unsigned tmp = get_temp_index(emit); 4160 struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp); 4161 struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp); 4162 struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]); 4163 4164 /* ADD tmp, s1, -s2 */ 4165 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_tmp, 4166 &inst->Src[1], &neg_src2, FALSE); 4167 4168 /* MAD dst, s1, tmp, s3 */ 4169 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &inst->Dst[0], 4170 &inst->Src[0], &src_tmp, &inst->Src[2], 4171 inst->Instruction.Saturate); 4172 4173 free_temp_indexes(emit); 4174 4175 return TRUE; 4176 } 4177 4178 4179 /** 4180 * Emit code for TGSI_OPCODE_POW instruction. 4181 */ 4182 static boolean 4183 emit_pow(struct svga_shader_emitter_v10 *emit, 4184 const struct tgsi_full_instruction *inst) 4185 { 4186 /* Note that TGSI_OPCODE_POW computes only one value from src0.x and 4187 * src1.x while VGPU10 computes four values. 4188 * 4189 * dst = POW(src0, src1): 4190 * dst.xyzw = src0.x ^ src1.x 4191 */ 4192 unsigned tmp = get_temp_index(emit); 4193 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4194 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4195 struct tgsi_full_src_register src0_xxxx = 4196 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 4197 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 4198 struct tgsi_full_src_register src1_xxxx = 4199 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 4200 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 4201 4202 /* LOG tmp, s0.xxxx */ 4203 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &src0_xxxx, 4204 FALSE); 4205 4206 /* MUL tmp, tmp, s1.xxxx */ 4207 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, &tmp_src, 4208 &src1_xxxx, FALSE); 4209 4210 /* EXP tmp, s0.xxxx */ 4211 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], 4212 &tmp_src, inst->Instruction.Saturate); 4213 4214 /* free tmp */ 4215 free_temp_indexes(emit); 4216 4217 return TRUE; 4218 } 4219 4220 4221 /** 4222 * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction. 4223 */ 4224 static boolean 4225 emit_rcp(struct svga_shader_emitter_v10 *emit, 4226 const struct tgsi_full_instruction *inst) 4227 { 4228 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4229 4230 unsigned tmp = get_temp_index(emit); 4231 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4232 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4233 4234 struct tgsi_full_dst_register tmp_dst_x = 4235 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4236 struct tgsi_full_src_register tmp_src_xxxx = 4237 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4238 4239 /* DIV tmp.x, 1.0, s0 */ 4240 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst_x, &one, 4241 &inst->Src[0], FALSE); 4242 4243 /* MOV dst, tmp.xxxx */ 4244 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 4245 &tmp_src_xxxx, inst->Instruction.Saturate); 4246 4247 free_temp_indexes(emit); 4248 4249 return TRUE; 4250 } 4251 4252 4253 /** 4254 * Emit code for TGSI_OPCODE_RSQ instruction. 4255 */ 4256 static boolean 4257 emit_rsq(struct svga_shader_emitter_v10 *emit, 4258 const struct tgsi_full_instruction *inst) 4259 { 4260 /* dst = RSQ(src): 4261 * dst.xyzw = 1 / sqrt(src.x) 4262 * Translates into: 4263 * RSQ tmp, src.x 4264 * MOV dst, tmp.xxxx 4265 */ 4266 4267 unsigned tmp = get_temp_index(emit); 4268 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4269 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4270 4271 struct tgsi_full_dst_register tmp_dst_x = 4272 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4273 struct tgsi_full_src_register tmp_src_xxxx = 4274 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4275 4276 /* RSQ tmp, src.x */ 4277 emit_instruction_op1(emit, VGPU10_OPCODE_RSQ, &tmp_dst_x, 4278 &inst->Src[0], FALSE); 4279 4280 /* MOV dst, tmp.xxxx */ 4281 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 4282 &tmp_src_xxxx, inst->Instruction.Saturate); 4283 4284 /* free tmp */ 4285 free_temp_indexes(emit); 4286 4287 return TRUE; 4288 } 4289 4290 4291 /** 4292 * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction. 4293 */ 4294 static boolean 4295 emit_seq(struct svga_shader_emitter_v10 *emit, 4296 const struct tgsi_full_instruction *inst) 4297 { 4298 /* dst = SEQ(s0, s1): 4299 * dst = s0 == s1 ? 1.0 : 0.0 (per component) 4300 * Translates into: 4301 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) 4302 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4303 */ 4304 unsigned tmp = get_temp_index(emit); 4305 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4306 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4307 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4308 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4309 4310 /* EQ tmp, s0, s1 */ 4311 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0], 4312 &inst->Src[1], FALSE); 4313 4314 /* MOVC dst, tmp, one, zero */ 4315 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4316 &one, &zero, FALSE); 4317 4318 free_temp_indexes(emit); 4319 4320 return TRUE; 4321 } 4322 4323 4324 /** 4325 * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction. 4326 */ 4327 static boolean 4328 emit_sge(struct svga_shader_emitter_v10 *emit, 4329 const struct tgsi_full_instruction *inst) 4330 { 4331 /* dst = SGE(s0, s1): 4332 * dst = s0 >= s1 ? 1.0 : 0.0 (per component) 4333 * Translates into: 4334 * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp) 4335 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4336 */ 4337 unsigned tmp = get_temp_index(emit); 4338 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4339 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4340 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4341 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4342 4343 /* GE tmp, s0, s1 */ 4344 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0], 4345 &inst->Src[1], FALSE); 4346 4347 /* MOVC dst, tmp, one, zero */ 4348 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4349 &one, &zero, FALSE); 4350 4351 free_temp_indexes(emit); 4352 4353 return TRUE; 4354 } 4355 4356 4357 /** 4358 * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction. 4359 */ 4360 static boolean 4361 emit_sgt(struct svga_shader_emitter_v10 *emit, 4362 const struct tgsi_full_instruction *inst) 4363 { 4364 /* dst = SGT(s0, s1): 4365 * dst = s0 > s1 ? 1.0 : 0.0 (per component) 4366 * Translates into: 4367 * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp) 4368 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4369 */ 4370 unsigned tmp = get_temp_index(emit); 4371 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4372 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4373 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4374 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4375 4376 /* LT tmp, s1, s0 */ 4377 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1], 4378 &inst->Src[0], FALSE); 4379 4380 /* MOVC dst, tmp, one, zero */ 4381 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4382 &one, &zero, FALSE); 4383 4384 free_temp_indexes(emit); 4385 4386 return TRUE; 4387 } 4388 4389 4390 /** 4391 * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions. 4392 */ 4393 static boolean 4394 emit_sincos(struct svga_shader_emitter_v10 *emit, 4395 const struct tgsi_full_instruction *inst) 4396 { 4397 unsigned tmp = get_temp_index(emit); 4398 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4399 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4400 4401 struct tgsi_full_src_register tmp_src_xxxx = 4402 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4403 struct tgsi_full_dst_register tmp_dst_x = 4404 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4405 4406 begin_emit_instruction(emit); 4407 emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE); 4408 4409 if(inst->Instruction.Opcode == TGSI_OPCODE_SIN) 4410 { 4411 emit_dst_register(emit, &tmp_dst_x); /* first destination register */ 4412 emit_null_dst_register(emit); /* second destination register */ 4413 } 4414 else { 4415 emit_null_dst_register(emit); 4416 emit_dst_register(emit, &tmp_dst_x); 4417 } 4418 4419 emit_src_register(emit, &inst->Src[0]); 4420 end_emit_instruction(emit); 4421 4422 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 4423 &tmp_src_xxxx, inst->Instruction.Saturate); 4424 4425 free_temp_indexes(emit); 4426 4427 return TRUE; 4428 } 4429 4430 4431 /** 4432 * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction. 4433 */ 4434 static boolean 4435 emit_sle(struct svga_shader_emitter_v10 *emit, 4436 const struct tgsi_full_instruction *inst) 4437 { 4438 /* dst = SLE(s0, s1): 4439 * dst = s0 <= s1 ? 1.0 : 0.0 (per component) 4440 * Translates into: 4441 * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp) 4442 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4443 */ 4444 unsigned tmp = get_temp_index(emit); 4445 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4446 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4447 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4448 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4449 4450 /* GE tmp, s1, s0 */ 4451 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1], 4452 &inst->Src[0], FALSE); 4453 4454 /* MOVC dst, tmp, one, zero */ 4455 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4456 &one, &zero, FALSE); 4457 4458 free_temp_indexes(emit); 4459 4460 return TRUE; 4461 } 4462 4463 4464 /** 4465 * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction. 4466 */ 4467 static boolean 4468 emit_slt(struct svga_shader_emitter_v10 *emit, 4469 const struct tgsi_full_instruction *inst) 4470 { 4471 /* dst = SLT(s0, s1): 4472 * dst = s0 < s1 ? 1.0 : 0.0 (per component) 4473 * Translates into: 4474 * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp) 4475 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4476 */ 4477 unsigned tmp = get_temp_index(emit); 4478 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4479 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4480 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4481 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4482 4483 /* LT tmp, s0, s1 */ 4484 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], 4485 &inst->Src[1], FALSE); 4486 4487 /* MOVC dst, tmp, one, zero */ 4488 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4489 &one, &zero, FALSE); 4490 4491 free_temp_indexes(emit); 4492 4493 return TRUE; 4494 } 4495 4496 4497 /** 4498 * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction. 4499 */ 4500 static boolean 4501 emit_sne(struct svga_shader_emitter_v10 *emit, 4502 const struct tgsi_full_instruction *inst) 4503 { 4504 /* dst = SNE(s0, s1): 4505 * dst = s0 != s1 ? 1.0 : 0.0 (per component) 4506 * Translates into: 4507 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) 4508 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4509 */ 4510 unsigned tmp = get_temp_index(emit); 4511 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4512 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4513 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4514 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4515 4516 /* NE tmp, s0, s1 */ 4517 emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0], 4518 &inst->Src[1], FALSE); 4519 4520 /* MOVC dst, tmp, one, zero */ 4521 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4522 &one, &zero, FALSE); 4523 4524 free_temp_indexes(emit); 4525 4526 return TRUE; 4527 } 4528 4529 4530 /** 4531 * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction. 4532 */ 4533 static boolean 4534 emit_ssg(struct svga_shader_emitter_v10 *emit, 4535 const struct tgsi_full_instruction *inst) 4536 { 4537 /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0 4538 * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0 4539 * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0 4540 * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0 4541 * Translates into: 4542 * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp) 4543 * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component) 4544 * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp) 4545 * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component) 4546 */ 4547 struct tgsi_full_src_register zero = 4548 make_immediate_reg_float(emit, 0.0f); 4549 struct tgsi_full_src_register one = 4550 make_immediate_reg_float(emit, 1.0f); 4551 struct tgsi_full_src_register neg_one = 4552 make_immediate_reg_float(emit, -1.0f); 4553 4554 unsigned tmp1 = get_temp_index(emit); 4555 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 4556 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 4557 4558 unsigned tmp2 = get_temp_index(emit); 4559 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 4560 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 4561 4562 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0], 4563 &zero, FALSE); 4564 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src, 4565 &neg_one, &zero, FALSE); 4566 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero, 4567 &inst->Src[0], FALSE); 4568 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src, 4569 &one, &tmp2_src, FALSE); 4570 4571 free_temp_indexes(emit); 4572 4573 return TRUE; 4574 } 4575 4576 4577 /** 4578 * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction. 4579 */ 4580 static boolean 4581 emit_issg(struct svga_shader_emitter_v10 *emit, 4582 const struct tgsi_full_instruction *inst) 4583 { 4584 /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0 4585 * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0 4586 * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0 4587 * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0 4588 * Translates into: 4589 * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component) 4590 * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component) 4591 * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component) 4592 */ 4593 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4594 4595 unsigned tmp1 = get_temp_index(emit); 4596 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 4597 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 4598 4599 unsigned tmp2 = get_temp_index(emit); 4600 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 4601 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 4602 4603 struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src); 4604 4605 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst, 4606 &inst->Src[0], &zero, FALSE); 4607 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst, 4608 &zero, &inst->Src[0], FALSE); 4609 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0], 4610 &tmp1_src, &neg_tmp2, FALSE); 4611 4612 free_temp_indexes(emit); 4613 4614 return TRUE; 4615 } 4616 4617 4618 /** 4619 * Emit a comparison instruction. The dest register will get 4620 * 0 or ~0 values depending on the outcome of comparing src0 to src1. 4621 */ 4622 static void 4623 emit_comparison(struct svga_shader_emitter_v10 *emit, 4624 SVGA3dCmpFunc func, 4625 const struct tgsi_full_dst_register *dst, 4626 const struct tgsi_full_src_register *src0, 4627 const struct tgsi_full_src_register *src1) 4628 { 4629 struct tgsi_full_src_register immediate; 4630 VGPU10OpcodeToken0 opcode0; 4631 boolean swapSrc = FALSE; 4632 4633 /* Sanity checks for svga vs. gallium enums */ 4634 STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1)); 4635 STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1)); 4636 4637 opcode0.value = 0; 4638 4639 switch (func) { 4640 case SVGA3D_CMP_NEVER: 4641 immediate = make_immediate_reg_int(emit, 0); 4642 /* MOV dst, {0} */ 4643 begin_emit_instruction(emit); 4644 emit_dword(emit, VGPU10_OPCODE_MOV); 4645 emit_dst_register(emit, dst); 4646 emit_src_register(emit, &immediate); 4647 end_emit_instruction(emit); 4648 return; 4649 case SVGA3D_CMP_ALWAYS: 4650 immediate = make_immediate_reg_int(emit, -1); 4651 /* MOV dst, {-1} */ 4652 begin_emit_instruction(emit); 4653 emit_dword(emit, VGPU10_OPCODE_MOV); 4654 emit_dst_register(emit, dst); 4655 emit_src_register(emit, &immediate); 4656 end_emit_instruction(emit); 4657 return; 4658 case SVGA3D_CMP_LESS: 4659 opcode0.opcodeType = VGPU10_OPCODE_LT; 4660 break; 4661 case SVGA3D_CMP_EQUAL: 4662 opcode0.opcodeType = VGPU10_OPCODE_EQ; 4663 break; 4664 case SVGA3D_CMP_LESSEQUAL: 4665 opcode0.opcodeType = VGPU10_OPCODE_GE; 4666 swapSrc = TRUE; 4667 break; 4668 case SVGA3D_CMP_GREATER: 4669 opcode0.opcodeType = VGPU10_OPCODE_LT; 4670 swapSrc = TRUE; 4671 break; 4672 case SVGA3D_CMP_NOTEQUAL: 4673 opcode0.opcodeType = VGPU10_OPCODE_NE; 4674 break; 4675 case SVGA3D_CMP_GREATEREQUAL: 4676 opcode0.opcodeType = VGPU10_OPCODE_GE; 4677 break; 4678 default: 4679 assert(!"Unexpected comparison mode"); 4680 opcode0.opcodeType = VGPU10_OPCODE_EQ; 4681 } 4682 4683 begin_emit_instruction(emit); 4684 emit_dword(emit, opcode0.value); 4685 emit_dst_register(emit, dst); 4686 if (swapSrc) { 4687 emit_src_register(emit, src1); 4688 emit_src_register(emit, src0); 4689 } 4690 else { 4691 emit_src_register(emit, src0); 4692 emit_src_register(emit, src1); 4693 } 4694 end_emit_instruction(emit); 4695 } 4696 4697 4698 /** 4699 * Get texel/address offsets for a texture instruction. 4700 */ 4701 static void 4702 get_texel_offsets(const struct svga_shader_emitter_v10 *emit, 4703 const struct tgsi_full_instruction *inst, int offsets[3]) 4704 { 4705 if (inst->Texture.NumOffsets == 1) { 4706 /* According to OpenGL Shader Language spec the offsets are only 4707 * fetched from a previously-declared immediate/literal. 4708 */ 4709 const struct tgsi_texture_offset *off = inst->TexOffsets; 4710 const unsigned index = off[0].Index; 4711 const unsigned swizzleX = off[0].SwizzleX; 4712 const unsigned swizzleY = off[0].SwizzleY; 4713 const unsigned swizzleZ = off[0].SwizzleZ; 4714 const union tgsi_immediate_data *imm = emit->immediates[index]; 4715 4716 assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE); 4717 4718 offsets[0] = imm[swizzleX].Int; 4719 offsets[1] = imm[swizzleY].Int; 4720 offsets[2] = imm[swizzleZ].Int; 4721 } 4722 else { 4723 offsets[0] = offsets[1] = offsets[2] = 0; 4724 } 4725 } 4726 4727 4728 /** 4729 * Set up the coordinate register for texture sampling. 4730 * When we're sampling from a RECT texture we have to scale the 4731 * unnormalized coordinate to a normalized coordinate. 4732 * We do that by multiplying the coordinate by an "extra" constant. 4733 * An alternative would be to use the RESINFO instruction to query the 4734 * texture's size. 4735 */ 4736 static struct tgsi_full_src_register 4737 setup_texcoord(struct svga_shader_emitter_v10 *emit, 4738 unsigned unit, 4739 const struct tgsi_full_src_register *coord) 4740 { 4741 if (emit->key.tex[unit].unnormalized) { 4742 unsigned scale_index = emit->texcoord_scale_index[unit]; 4743 unsigned tmp = get_temp_index(emit); 4744 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4745 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4746 struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index); 4747 4748 if (emit->key.tex[unit].texel_bias) { 4749 /* to fix texture coordinate rounding issue, 0.0001 offset is 4750 * been added. This fixes piglit test fbo-blit-scaled-linear. */ 4751 struct tgsi_full_src_register offset = 4752 make_immediate_reg_float(emit, 0.0001f); 4753 4754 /* ADD tmp, coord, offset */ 4755 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst, 4756 coord, &offset, FALSE); 4757 /* MUL tmp, tmp, scale */ 4758 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, 4759 &tmp_src, &scale_src, FALSE); 4760 } 4761 else { 4762 /* MUL tmp, coord, const[] */ 4763 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, 4764 coord, &scale_src, FALSE); 4765 } 4766 return tmp_src; 4767 } 4768 else { 4769 /* use texcoord as-is */ 4770 return *coord; 4771 } 4772 } 4773 4774 4775 /** 4776 * For SAMPLE_C instructions, emit the extra src register which indicates 4777 * the reference/comparision value. 4778 */ 4779 static void 4780 emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit, 4781 enum tgsi_texture_type target, 4782 const struct tgsi_full_src_register *coord) 4783 { 4784 struct tgsi_full_src_register coord_src_ref; 4785 int component; 4786 4787 assert(tgsi_is_shadow_target(target)); 4788 4789 component = tgsi_util_get_shadow_ref_src_index(target) % 4; 4790 assert(component >= 0); 4791 4792 coord_src_ref = scalar_src(coord, component); 4793 4794 emit_src_register(emit, &coord_src_ref); 4795 } 4796 4797 4798 /** 4799 * Info for implementing texture swizzles. 4800 * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle() 4801 * functions use this to encapsulate the extra steps needed to perform 4802 * a texture swizzle, or shadow/depth comparisons. 4803 * The shadow/depth comparison is only done here if for the cases where 4804 * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare). 4805 */ 4806 struct tex_swizzle_info 4807 { 4808 boolean swizzled; 4809 boolean shadow_compare; 4810 unsigned unit; 4811 enum tgsi_texture_type texture_target; /**< TGSI_TEXTURE_x */ 4812 struct tgsi_full_src_register tmp_src; 4813 struct tgsi_full_dst_register tmp_dst; 4814 const struct tgsi_full_dst_register *inst_dst; 4815 const struct tgsi_full_src_register *coord_src; 4816 }; 4817 4818 4819 /** 4820 * Do setup for handling texture swizzles or shadow compares. 4821 * \param unit the texture unit 4822 * \param inst the TGSI texture instruction 4823 * \param shadow_compare do shadow/depth comparison? 4824 * \param swz returns the swizzle info 4825 */ 4826 static void 4827 begin_tex_swizzle(struct svga_shader_emitter_v10 *emit, 4828 unsigned unit, 4829 const struct tgsi_full_instruction *inst, 4830 boolean shadow_compare, 4831 struct tex_swizzle_info *swz) 4832 { 4833 swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X || 4834 emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y || 4835 emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z || 4836 emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W); 4837 4838 swz->shadow_compare = shadow_compare; 4839 swz->texture_target = inst->Texture.Texture; 4840 4841 if (swz->swizzled || shadow_compare) { 4842 /* Allocate temp register for the result of the SAMPLE instruction 4843 * and the source of the MOV/compare/swizzle instructions. 4844 */ 4845 unsigned tmp = get_temp_index(emit); 4846 swz->tmp_src = make_src_temp_reg(tmp); 4847 swz->tmp_dst = make_dst_temp_reg(tmp); 4848 4849 swz->unit = unit; 4850 } 4851 swz->inst_dst = &inst->Dst[0]; 4852 swz->coord_src = &inst->Src[0]; 4853 4854 emit->fs.shadow_compare_units |= shadow_compare << unit; 4855 } 4856 4857 4858 /** 4859 * Returns the register to put the SAMPLE instruction results into. 4860 * This will either be the original instruction dst reg (if no swizzle 4861 * and no shadow comparison) or a temporary reg if there is a swizzle. 4862 */ 4863 static const struct tgsi_full_dst_register * 4864 get_tex_swizzle_dst(const struct tex_swizzle_info *swz) 4865 { 4866 return (swz->swizzled || swz->shadow_compare) 4867 ? &swz->tmp_dst : swz->inst_dst; 4868 } 4869 4870 4871 /** 4872 * This emits the MOV instruction that actually implements a texture swizzle 4873 * and/or shadow comparison. 4874 */ 4875 static void 4876 end_tex_swizzle(struct svga_shader_emitter_v10 *emit, 4877 const struct tex_swizzle_info *swz) 4878 { 4879 if (swz->shadow_compare) { 4880 /* Emit extra instructions to compare the fetched texel value against 4881 * a texture coordinate component. The result of the comparison 4882 * is 0.0 or 1.0. 4883 */ 4884 struct tgsi_full_src_register coord_src; 4885 struct tgsi_full_src_register texel_src = 4886 scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X); 4887 struct tgsi_full_src_register one = 4888 make_immediate_reg_float(emit, 1.0f); 4889 /* convert gallium comparison func to SVGA comparison func */ 4890 SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1; 4891 4892 assert(emit->unit == PIPE_SHADER_FRAGMENT); 4893 4894 int component = 4895 tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4; 4896 assert(component >= 0); 4897 coord_src = scalar_src(swz->coord_src, component); 4898 4899 /* COMPARE tmp, coord, texel */ 4900 emit_comparison(emit, compare_func, 4901 &swz->tmp_dst, &coord_src, &texel_src); 4902 4903 /* AND dest, tmp, {1.0} */ 4904 begin_emit_instruction(emit); 4905 emit_opcode(emit, VGPU10_OPCODE_AND, FALSE); 4906 if (swz->swizzled) { 4907 emit_dst_register(emit, &swz->tmp_dst); 4908 } 4909 else { 4910 emit_dst_register(emit, swz->inst_dst); 4911 } 4912 emit_src_register(emit, &swz->tmp_src); 4913 emit_src_register(emit, &one); 4914 end_emit_instruction(emit); 4915 } 4916 4917 if (swz->swizzled) { 4918 unsigned swz_r = emit->key.tex[swz->unit].swizzle_r; 4919 unsigned swz_g = emit->key.tex[swz->unit].swizzle_g; 4920 unsigned swz_b = emit->key.tex[swz->unit].swizzle_b; 4921 unsigned swz_a = emit->key.tex[swz->unit].swizzle_a; 4922 unsigned writemask_0 = 0, writemask_1 = 0; 4923 boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]); 4924 4925 /* Swizzle w/out zero/one terms */ 4926 struct tgsi_full_src_register src_swizzled = 4927 swizzle_src(&swz->tmp_src, 4928 swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X, 4929 swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y, 4930 swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z, 4931 swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W); 4932 4933 /* MOV dst, color(tmp).<swizzle> */ 4934 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 4935 swz->inst_dst, &src_swizzled, FALSE); 4936 4937 /* handle swizzle zero terms */ 4938 writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) | 4939 ((swz_g == PIPE_SWIZZLE_0) << 1) | 4940 ((swz_b == PIPE_SWIZZLE_0) << 2) | 4941 ((swz_a == PIPE_SWIZZLE_0) << 3)); 4942 writemask_0 &= swz->inst_dst->Register.WriteMask; 4943 4944 if (writemask_0) { 4945 struct tgsi_full_src_register zero = int_tex ? 4946 make_immediate_reg_int(emit, 0) : 4947 make_immediate_reg_float(emit, 0.0f); 4948 struct tgsi_full_dst_register dst = 4949 writemask_dst(swz->inst_dst, writemask_0); 4950 4951 /* MOV dst.writemask_0, {0,0,0,0} */ 4952 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 4953 &dst, &zero, FALSE); 4954 } 4955 4956 /* handle swizzle one terms */ 4957 writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) | 4958 ((swz_g == PIPE_SWIZZLE_1) << 1) | 4959 ((swz_b == PIPE_SWIZZLE_1) << 2) | 4960 ((swz_a == PIPE_SWIZZLE_1) << 3)); 4961 writemask_1 &= swz->inst_dst->Register.WriteMask; 4962 4963 if (writemask_1) { 4964 struct tgsi_full_src_register one = int_tex ? 4965 make_immediate_reg_int(emit, 1) : 4966 make_immediate_reg_float(emit, 1.0f); 4967 struct tgsi_full_dst_register dst = 4968 writemask_dst(swz->inst_dst, writemask_1); 4969 4970 /* MOV dst.writemask_1, {1,1,1,1} */ 4971 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one, FALSE); 4972 } 4973 } 4974 } 4975 4976 4977 /** 4978 * Emit code for TGSI_OPCODE_SAMPLE instruction. 4979 */ 4980 static boolean 4981 emit_sample(struct svga_shader_emitter_v10 *emit, 4982 const struct tgsi_full_instruction *inst) 4983 { 4984 const unsigned resource_unit = inst->Src[1].Register.Index; 4985 const unsigned sampler_unit = inst->Src[2].Register.Index; 4986 struct tgsi_full_src_register coord; 4987 int offsets[3]; 4988 struct tex_swizzle_info swz_info; 4989 4990 begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info); 4991 4992 get_texel_offsets(emit, inst, offsets); 4993 4994 coord = setup_texcoord(emit, resource_unit, &inst->Src[0]); 4995 4996 /* SAMPLE dst, coord(s0), resource, sampler */ 4997 begin_emit_instruction(emit); 4998 4999 /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L 5000 * with LOD=0. But our virtual GPU accepts this as-is. 5001 */ 5002 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE, 5003 inst->Instruction.Saturate, offsets); 5004 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5005 emit_src_register(emit, &coord); 5006 emit_resource_register(emit, resource_unit); 5007 emit_sampler_register(emit, sampler_unit); 5008 end_emit_instruction(emit); 5009 5010 end_tex_swizzle(emit, &swz_info); 5011 5012 free_temp_indexes(emit); 5013 5014 return TRUE; 5015 } 5016 5017 5018 /** 5019 * Check if a texture instruction is valid. 5020 * An example of an invalid texture instruction is doing shadow comparison 5021 * with an integer-valued texture. 5022 * If we detect an invalid texture instruction, we replace it with: 5023 * MOV dst, {1,1,1,1}; 5024 * \return TRUE if valid, FALSE if invalid. 5025 */ 5026 static boolean 5027 is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit, 5028 const struct tgsi_full_instruction *inst) 5029 { 5030 const unsigned unit = inst->Src[1].Register.Index; 5031 const enum tgsi_texture_type target = inst->Texture.Texture; 5032 boolean valid = TRUE; 5033 5034 if (tgsi_is_shadow_target(target) && 5035 is_integer_type(emit->sampler_return_type[unit])) { 5036 debug_printf("Invalid SAMPLE_C with an integer texture!\n"); 5037 valid = FALSE; 5038 } 5039 /* XXX might check for other conditions in the future here */ 5040 5041 if (!valid) { 5042 /* emit a MOV dst, {1,1,1,1} instruction. */ 5043 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 5044 begin_emit_instruction(emit); 5045 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 5046 emit_dst_register(emit, &inst->Dst[0]); 5047 emit_src_register(emit, &one); 5048 end_emit_instruction(emit); 5049 } 5050 5051 return valid; 5052 } 5053 5054 5055 /** 5056 * Emit code for TGSI_OPCODE_TEX (simple texture lookup) 5057 */ 5058 static boolean 5059 emit_tex(struct svga_shader_emitter_v10 *emit, 5060 const struct tgsi_full_instruction *inst) 5061 { 5062 const uint unit = inst->Src[1].Register.Index; 5063 const enum tgsi_texture_type target = inst->Texture.Texture; 5064 unsigned opcode; 5065 struct tgsi_full_src_register coord; 5066 int offsets[3]; 5067 struct tex_swizzle_info swz_info; 5068 5069 /* check that the sampler returns a float */ 5070 if (!is_valid_tex_instruction(emit, inst)) 5071 return TRUE; 5072 5073 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 5074 5075 get_texel_offsets(emit, inst, offsets); 5076 5077 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5078 5079 /* SAMPLE dst, coord(s0), resource, sampler */ 5080 begin_emit_instruction(emit); 5081 5082 if (tgsi_is_shadow_target(target)) 5083 opcode = VGPU10_OPCODE_SAMPLE_C; 5084 else 5085 opcode = VGPU10_OPCODE_SAMPLE; 5086 5087 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 5088 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5089 emit_src_register(emit, &coord); 5090 emit_resource_register(emit, unit); 5091 emit_sampler_register(emit, unit); 5092 if (opcode == VGPU10_OPCODE_SAMPLE_C) { 5093 emit_tex_compare_refcoord(emit, target, &coord); 5094 } 5095 end_emit_instruction(emit); 5096 5097 end_tex_swizzle(emit, &swz_info); 5098 5099 free_temp_indexes(emit); 5100 5101 return TRUE; 5102 } 5103 5104 5105 /** 5106 * Emit code for TGSI_OPCODE_TXP (projective texture) 5107 */ 5108 static boolean 5109 emit_txp(struct svga_shader_emitter_v10 *emit, 5110 const struct tgsi_full_instruction *inst) 5111 { 5112 const uint unit = inst->Src[1].Register.Index; 5113 const enum tgsi_texture_type target = inst->Texture.Texture; 5114 unsigned opcode; 5115 int offsets[3]; 5116 unsigned tmp = get_temp_index(emit); 5117 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 5118 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 5119 struct tgsi_full_src_register src0_wwww = 5120 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 5121 struct tgsi_full_src_register coord; 5122 struct tex_swizzle_info swz_info; 5123 5124 /* check that the sampler returns a float */ 5125 if (!is_valid_tex_instruction(emit, inst)) 5126 return TRUE; 5127 5128 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 5129 5130 get_texel_offsets(emit, inst, offsets); 5131 5132 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5133 5134 /* DIV tmp, coord, coord.wwww */ 5135 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst, 5136 &coord, &src0_wwww, FALSE); 5137 5138 /* SAMPLE dst, coord(tmp), resource, sampler */ 5139 begin_emit_instruction(emit); 5140 5141 if (tgsi_is_shadow_target(target)) 5142 /* NOTE: for non-fragment shaders, we should use 5143 * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is. 5144 */ 5145 opcode = VGPU10_OPCODE_SAMPLE_C; 5146 else 5147 opcode = VGPU10_OPCODE_SAMPLE; 5148 5149 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 5150 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5151 emit_src_register(emit, &tmp_src); /* projected coord */ 5152 emit_resource_register(emit, unit); 5153 emit_sampler_register(emit, unit); 5154 if (opcode == VGPU10_OPCODE_SAMPLE_C) { 5155 emit_tex_compare_refcoord(emit, target, &tmp_src); 5156 } 5157 end_emit_instruction(emit); 5158 5159 end_tex_swizzle(emit, &swz_info); 5160 5161 free_temp_indexes(emit); 5162 5163 return TRUE; 5164 } 5165 5166 5167 /** 5168 * Emit code for TGSI_OPCODE_TXD (explicit derivatives) 5169 */ 5170 static boolean 5171 emit_txd(struct svga_shader_emitter_v10 *emit, 5172 const struct tgsi_full_instruction *inst) 5173 { 5174 const uint unit = inst->Src[3].Register.Index; 5175 const enum tgsi_texture_type target = inst->Texture.Texture; 5176 int offsets[3]; 5177 struct tgsi_full_src_register coord; 5178 struct tex_swizzle_info swz_info; 5179 5180 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), 5181 &swz_info); 5182 5183 get_texel_offsets(emit, inst, offsets); 5184 5185 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5186 5187 /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */ 5188 begin_emit_instruction(emit); 5189 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D, 5190 inst->Instruction.Saturate, offsets); 5191 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5192 emit_src_register(emit, &coord); 5193 emit_resource_register(emit, unit); 5194 emit_sampler_register(emit, unit); 5195 emit_src_register(emit, &inst->Src[1]); /* Xderiv */ 5196 emit_src_register(emit, &inst->Src[2]); /* Yderiv */ 5197 end_emit_instruction(emit); 5198 5199 end_tex_swizzle(emit, &swz_info); 5200 5201 free_temp_indexes(emit); 5202 5203 return TRUE; 5204 } 5205 5206 5207 /** 5208 * Emit code for TGSI_OPCODE_TXF (texel fetch) 5209 */ 5210 static boolean 5211 emit_txf(struct svga_shader_emitter_v10 *emit, 5212 const struct tgsi_full_instruction *inst) 5213 { 5214 const uint unit = inst->Src[1].Register.Index; 5215 const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture); 5216 int offsets[3]; 5217 struct tex_swizzle_info swz_info; 5218 5219 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 5220 5221 get_texel_offsets(emit, inst, offsets); 5222 5223 if (msaa) { 5224 /* Fetch one sample from an MSAA texture */ 5225 struct tgsi_full_src_register sampleIndex = 5226 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 5227 /* LD_MS dst, coord(s0), resource, sampleIndex */ 5228 begin_emit_instruction(emit); 5229 emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS, 5230 inst->Instruction.Saturate, offsets); 5231 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5232 emit_src_register(emit, &inst->Src[0]); 5233 emit_resource_register(emit, unit); 5234 emit_src_register(emit, &sampleIndex); 5235 end_emit_instruction(emit); 5236 } 5237 else { 5238 /* Fetch one texel specified by integer coordinate */ 5239 /* LD dst, coord(s0), resource */ 5240 begin_emit_instruction(emit); 5241 emit_sample_opcode(emit, VGPU10_OPCODE_LD, 5242 inst->Instruction.Saturate, offsets); 5243 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5244 emit_src_register(emit, &inst->Src[0]); 5245 emit_resource_register(emit, unit); 5246 end_emit_instruction(emit); 5247 } 5248 5249 end_tex_swizzle(emit, &swz_info); 5250 5251 free_temp_indexes(emit); 5252 5253 return TRUE; 5254 } 5255 5256 5257 /** 5258 * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias) 5259 * or TGSI_OPCODE_TXB2 (for cube shadow maps). 5260 */ 5261 static boolean 5262 emit_txl_txb(struct svga_shader_emitter_v10 *emit, 5263 const struct tgsi_full_instruction *inst) 5264 { 5265 const enum tgsi_texture_type target = inst->Texture.Texture; 5266 unsigned opcode, unit; 5267 int offsets[3]; 5268 struct tgsi_full_src_register coord, lod_bias; 5269 struct tex_swizzle_info swz_info; 5270 5271 assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL || 5272 inst->Instruction.Opcode == TGSI_OPCODE_TXB || 5273 inst->Instruction.Opcode == TGSI_OPCODE_TXB2); 5274 5275 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) { 5276 lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 5277 unit = inst->Src[2].Register.Index; 5278 } 5279 else { 5280 lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 5281 unit = inst->Src[1].Register.Index; 5282 } 5283 5284 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), 5285 &swz_info); 5286 5287 get_texel_offsets(emit, inst, offsets); 5288 5289 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5290 5291 /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */ 5292 begin_emit_instruction(emit); 5293 if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) { 5294 opcode = VGPU10_OPCODE_SAMPLE_L; 5295 } 5296 else { 5297 opcode = VGPU10_OPCODE_SAMPLE_B; 5298 } 5299 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 5300 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5301 emit_src_register(emit, &coord); 5302 emit_resource_register(emit, unit); 5303 emit_sampler_register(emit, unit); 5304 emit_src_register(emit, &lod_bias); 5305 end_emit_instruction(emit); 5306 5307 end_tex_swizzle(emit, &swz_info); 5308 5309 free_temp_indexes(emit); 5310 5311 return TRUE; 5312 } 5313 5314 5315 /** 5316 * Emit code for TGSI_OPCODE_TXQ (texture query) instruction. 5317 */ 5318 static boolean 5319 emit_txq(struct svga_shader_emitter_v10 *emit, 5320 const struct tgsi_full_instruction *inst) 5321 { 5322 const uint unit = inst->Src[1].Register.Index; 5323 5324 if (emit->sampler_target[unit] == TGSI_TEXTURE_BUFFER) { 5325 /* RESINFO does not support querying texture buffers, so we instead 5326 * store texture buffer sizes in shader constants, then copy them to 5327 * implement TXQ instead of emitting RESINFO. 5328 * MOV dst, const[texture_buffer_size_index[unit]] 5329 */ 5330 struct tgsi_full_src_register size_src = 5331 make_src_const_reg(emit->texture_buffer_size_index[unit]); 5332 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src, 5333 FALSE); 5334 } else { 5335 /* RESINFO dst, srcMipLevel, resource */ 5336 begin_emit_instruction(emit); 5337 emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT); 5338 emit_dst_register(emit, &inst->Dst[0]); 5339 emit_src_register(emit, &inst->Src[0]); 5340 emit_resource_register(emit, unit); 5341 end_emit_instruction(emit); 5342 } 5343 5344 free_temp_indexes(emit); 5345 5346 return TRUE; 5347 } 5348 5349 5350 /** 5351 * Emit a simple instruction (like ADD, MUL, MIN, etc). 5352 */ 5353 static boolean 5354 emit_simple(struct svga_shader_emitter_v10 *emit, 5355 const struct tgsi_full_instruction *inst) 5356 { 5357 const unsigned opcode = inst->Instruction.Opcode; 5358 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); 5359 unsigned i; 5360 5361 begin_emit_instruction(emit); 5362 emit_opcode(emit, translate_opcode(inst->Instruction.Opcode), 5363 inst->Instruction.Saturate); 5364 for (i = 0; i < op->num_dst; i++) { 5365 emit_dst_register(emit, &inst->Dst[i]); 5366 } 5367 for (i = 0; i < op->num_src; i++) { 5368 emit_src_register(emit, &inst->Src[i]); 5369 } 5370 end_emit_instruction(emit); 5371 5372 return TRUE; 5373 } 5374 5375 5376 /** 5377 * We only special case the MOV instruction to try to detect constant 5378 * color writes in the fragment shader. 5379 */ 5380 static boolean 5381 emit_mov(struct svga_shader_emitter_v10 *emit, 5382 const struct tgsi_full_instruction *inst) 5383 { 5384 const struct tgsi_full_src_register *src = &inst->Src[0]; 5385 const struct tgsi_full_dst_register *dst = &inst->Dst[0]; 5386 5387 if (emit->unit == PIPE_SHADER_FRAGMENT && 5388 dst->Register.File == TGSI_FILE_OUTPUT && 5389 dst->Register.Index == 0 && 5390 src->Register.File == TGSI_FILE_CONSTANT && 5391 !src->Register.Indirect) { 5392 emit->constant_color_output = TRUE; 5393 } 5394 5395 return emit_simple(emit, inst); 5396 } 5397 5398 5399 /** 5400 * Emit a simple VGPU10 instruction which writes to multiple dest registers, 5401 * where TGSI only uses one dest register. 5402 */ 5403 static boolean 5404 emit_simple_1dst(struct svga_shader_emitter_v10 *emit, 5405 const struct tgsi_full_instruction *inst, 5406 unsigned dst_count, 5407 unsigned dst_index) 5408 { 5409 const unsigned opcode = inst->Instruction.Opcode; 5410 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); 5411 unsigned i; 5412 5413 begin_emit_instruction(emit); 5414 emit_opcode(emit, translate_opcode(inst->Instruction.Opcode), 5415 inst->Instruction.Saturate); 5416 5417 for (i = 0; i < dst_count; i++) { 5418 if (i == dst_index) { 5419 emit_dst_register(emit, &inst->Dst[0]); 5420 } else { 5421 emit_null_dst_register(emit); 5422 } 5423 } 5424 5425 for (i = 0; i < op->num_src; i++) { 5426 emit_src_register(emit, &inst->Src[i]); 5427 } 5428 end_emit_instruction(emit); 5429 5430 return TRUE; 5431 } 5432 5433 5434 /** 5435 * Translate a single TGSI instruction to VGPU10. 5436 */ 5437 static boolean 5438 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, 5439 unsigned inst_number, 5440 const struct tgsi_full_instruction *inst) 5441 { 5442 const unsigned opcode = inst->Instruction.Opcode; 5443 5444 switch (opcode) { 5445 case TGSI_OPCODE_ADD: 5446 case TGSI_OPCODE_AND: 5447 case TGSI_OPCODE_BGNLOOP: 5448 case TGSI_OPCODE_BRK: 5449 case TGSI_OPCODE_CEIL: 5450 case TGSI_OPCODE_CONT: 5451 case TGSI_OPCODE_DDX: 5452 case TGSI_OPCODE_DDY: 5453 case TGSI_OPCODE_DIV: 5454 case TGSI_OPCODE_DP2: 5455 case TGSI_OPCODE_DP3: 5456 case TGSI_OPCODE_DP4: 5457 case TGSI_OPCODE_ELSE: 5458 case TGSI_OPCODE_ENDIF: 5459 case TGSI_OPCODE_ENDLOOP: 5460 case TGSI_OPCODE_ENDSUB: 5461 case TGSI_OPCODE_F2I: 5462 case TGSI_OPCODE_F2U: 5463 case TGSI_OPCODE_FLR: 5464 case TGSI_OPCODE_FRC: 5465 case TGSI_OPCODE_FSEQ: 5466 case TGSI_OPCODE_FSGE: 5467 case TGSI_OPCODE_FSLT: 5468 case TGSI_OPCODE_FSNE: 5469 case TGSI_OPCODE_I2F: 5470 case TGSI_OPCODE_IMAX: 5471 case TGSI_OPCODE_IMIN: 5472 case TGSI_OPCODE_INEG: 5473 case TGSI_OPCODE_ISGE: 5474 case TGSI_OPCODE_ISHR: 5475 case TGSI_OPCODE_ISLT: 5476 case TGSI_OPCODE_MAD: 5477 case TGSI_OPCODE_MAX: 5478 case TGSI_OPCODE_MIN: 5479 case TGSI_OPCODE_MUL: 5480 case TGSI_OPCODE_NOP: 5481 case TGSI_OPCODE_NOT: 5482 case TGSI_OPCODE_OR: 5483 case TGSI_OPCODE_RET: 5484 case TGSI_OPCODE_UADD: 5485 case TGSI_OPCODE_USEQ: 5486 case TGSI_OPCODE_USGE: 5487 case TGSI_OPCODE_USLT: 5488 case TGSI_OPCODE_UMIN: 5489 case TGSI_OPCODE_UMAD: 5490 case TGSI_OPCODE_UMAX: 5491 case TGSI_OPCODE_ROUND: 5492 case TGSI_OPCODE_SQRT: 5493 case TGSI_OPCODE_SHL: 5494 case TGSI_OPCODE_TRUNC: 5495 case TGSI_OPCODE_U2F: 5496 case TGSI_OPCODE_UCMP: 5497 case TGSI_OPCODE_USHR: 5498 case TGSI_OPCODE_USNE: 5499 case TGSI_OPCODE_XOR: 5500 /* simple instructions */ 5501 return emit_simple(emit, inst); 5502 5503 case TGSI_OPCODE_MOV: 5504 return emit_mov(emit, inst); 5505 case TGSI_OPCODE_EMIT: 5506 return emit_vertex(emit, inst); 5507 case TGSI_OPCODE_ENDPRIM: 5508 return emit_endprim(emit, inst); 5509 case TGSI_OPCODE_IABS: 5510 return emit_iabs(emit, inst); 5511 case TGSI_OPCODE_ARL: 5512 /* fall-through */ 5513 case TGSI_OPCODE_UARL: 5514 return emit_arl_uarl(emit, inst); 5515 case TGSI_OPCODE_BGNSUB: 5516 /* no-op */ 5517 return TRUE; 5518 case TGSI_OPCODE_CAL: 5519 return emit_cal(emit, inst); 5520 case TGSI_OPCODE_CMP: 5521 return emit_cmp(emit, inst); 5522 case TGSI_OPCODE_COS: 5523 return emit_sincos(emit, inst); 5524 case TGSI_OPCODE_DST: 5525 return emit_dst(emit, inst); 5526 case TGSI_OPCODE_EX2: 5527 return emit_ex2(emit, inst); 5528 case TGSI_OPCODE_EXP: 5529 return emit_exp(emit, inst); 5530 case TGSI_OPCODE_IF: 5531 return emit_if(emit, inst); 5532 case TGSI_OPCODE_KILL: 5533 return emit_kill(emit, inst); 5534 case TGSI_OPCODE_KILL_IF: 5535 return emit_kill_if(emit, inst); 5536 case TGSI_OPCODE_LG2: 5537 return emit_lg2(emit, inst); 5538 case TGSI_OPCODE_LIT: 5539 return emit_lit(emit, inst); 5540 case TGSI_OPCODE_LOG: 5541 return emit_log(emit, inst); 5542 case TGSI_OPCODE_LRP: 5543 return emit_lrp(emit, inst); 5544 case TGSI_OPCODE_POW: 5545 return emit_pow(emit, inst); 5546 case TGSI_OPCODE_RCP: 5547 return emit_rcp(emit, inst); 5548 case TGSI_OPCODE_RSQ: 5549 return emit_rsq(emit, inst); 5550 case TGSI_OPCODE_SAMPLE: 5551 return emit_sample(emit, inst); 5552 case TGSI_OPCODE_SEQ: 5553 return emit_seq(emit, inst); 5554 case TGSI_OPCODE_SGE: 5555 return emit_sge(emit, inst); 5556 case TGSI_OPCODE_SGT: 5557 return emit_sgt(emit, inst); 5558 case TGSI_OPCODE_SIN: 5559 return emit_sincos(emit, inst); 5560 case TGSI_OPCODE_SLE: 5561 return emit_sle(emit, inst); 5562 case TGSI_OPCODE_SLT: 5563 return emit_slt(emit, inst); 5564 case TGSI_OPCODE_SNE: 5565 return emit_sne(emit, inst); 5566 case TGSI_OPCODE_SSG: 5567 return emit_ssg(emit, inst); 5568 case TGSI_OPCODE_ISSG: 5569 return emit_issg(emit, inst); 5570 case TGSI_OPCODE_TEX: 5571 return emit_tex(emit, inst); 5572 case TGSI_OPCODE_TXP: 5573 return emit_txp(emit, inst); 5574 case TGSI_OPCODE_TXB: 5575 case TGSI_OPCODE_TXB2: 5576 case TGSI_OPCODE_TXL: 5577 return emit_txl_txb(emit, inst); 5578 case TGSI_OPCODE_TXD: 5579 return emit_txd(emit, inst); 5580 case TGSI_OPCODE_TXF: 5581 return emit_txf(emit, inst); 5582 case TGSI_OPCODE_TXQ: 5583 return emit_txq(emit, inst); 5584 case TGSI_OPCODE_UIF: 5585 return emit_if(emit, inst); 5586 case TGSI_OPCODE_UMUL_HI: 5587 case TGSI_OPCODE_IMUL_HI: 5588 case TGSI_OPCODE_UDIV: 5589 case TGSI_OPCODE_IDIV: 5590 /* These cases use only the FIRST of two destination registers */ 5591 return emit_simple_1dst(emit, inst, 2, 0); 5592 case TGSI_OPCODE_UMUL: 5593 case TGSI_OPCODE_UMOD: 5594 case TGSI_OPCODE_MOD: 5595 /* These cases use only the SECOND of two destination registers */ 5596 return emit_simple_1dst(emit, inst, 2, 1); 5597 case TGSI_OPCODE_END: 5598 if (!emit_post_helpers(emit)) 5599 return FALSE; 5600 return emit_simple(emit, inst); 5601 5602 default: 5603 debug_printf("Unimplemented tgsi instruction %s\n", 5604 tgsi_get_opcode_name(opcode)); 5605 return FALSE; 5606 } 5607 5608 return TRUE; 5609 } 5610 5611 5612 /** 5613 * Emit the extra instructions to adjust the vertex position. 5614 * There are two possible adjustments: 5615 * 1. Converting from Gallium to VGPU10 coordinate space by applying the 5616 * "prescale" and "pretranslate" values. 5617 * 2. Undoing the viewport transformation when we use the swtnl/draw path. 5618 * \param vs_pos_tmp_index which temporary register contains the vertex pos. 5619 */ 5620 static void 5621 emit_vpos_instructions(struct svga_shader_emitter_v10 *emit, 5622 unsigned vs_pos_tmp_index) 5623 { 5624 struct tgsi_full_src_register tmp_pos_src; 5625 struct tgsi_full_dst_register pos_dst; 5626 5627 /* Don't bother to emit any extra vertex instructions if vertex position is 5628 * not written out 5629 */ 5630 if (emit->vposition.out_index == INVALID_INDEX) 5631 return; 5632 5633 tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index); 5634 pos_dst = make_dst_output_reg(emit->vposition.out_index); 5635 5636 /* If non-adjusted vertex position register index 5637 * is valid, copy the vertex position from the temporary 5638 * vertex position register before it is modified by the 5639 * prescale computation. 5640 */ 5641 if (emit->vposition.so_index != INVALID_INDEX) { 5642 struct tgsi_full_dst_register pos_so_dst = 5643 make_dst_output_reg(emit->vposition.so_index); 5644 5645 /* MOV pos_so, tmp_pos */ 5646 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, 5647 &tmp_pos_src, FALSE); 5648 } 5649 5650 if (emit->vposition.need_prescale) { 5651 /* This code adjusts the vertex position to match the VGPU10 convention. 5652 * If p is the position computed by the shader (usually by applying the 5653 * modelview and projection matrices), the new position q is computed by: 5654 * 5655 * q.x = p.w * trans.x + p.x * scale.x 5656 * q.y = p.w * trans.y + p.y * scale.y 5657 * q.z = p.w * trans.z + p.z * scale.z; 5658 * q.w = p.w * trans.w + p.w; 5659 */ 5660 struct tgsi_full_src_register tmp_pos_src_w = 5661 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); 5662 struct tgsi_full_dst_register tmp_pos_dst = 5663 make_dst_temp_reg(vs_pos_tmp_index); 5664 struct tgsi_full_dst_register tmp_pos_dst_xyz = 5665 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ); 5666 5667 struct tgsi_full_src_register prescale_scale = 5668 make_src_const_reg(emit->vposition.prescale_scale_index); 5669 struct tgsi_full_src_register prescale_trans = 5670 make_src_const_reg(emit->vposition.prescale_trans_index); 5671 5672 /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */ 5673 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz, 5674 &tmp_pos_src, &prescale_scale, FALSE); 5675 5676 /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */ 5677 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w, 5678 &prescale_trans, &tmp_pos_src, FALSE); 5679 } 5680 else if (emit->key.vs.undo_viewport) { 5681 /* This code computes the final vertex position from the temporary 5682 * vertex position by undoing the viewport transformation and the 5683 * divide-by-W operation (we convert window coords back to clip coords). 5684 * This is needed when we use the 'draw' module for fallbacks. 5685 * If p is the temp pos in window coords, then the NDC coord q is: 5686 * q.x = (p.x - vp.x_trans) / vp.x_scale * p.w 5687 * q.y = (p.y - vp.y_trans) / vp.y_scale * p.w 5688 * q.z = p.z * p.w 5689 * q.w = p.w 5690 * CONST[vs_viewport_index] contains: 5691 * { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans } 5692 */ 5693 struct tgsi_full_dst_register tmp_pos_dst = 5694 make_dst_temp_reg(vs_pos_tmp_index); 5695 struct tgsi_full_dst_register tmp_pos_dst_xy = 5696 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY); 5697 struct tgsi_full_src_register tmp_pos_src_wwww = 5698 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); 5699 5700 struct tgsi_full_dst_register pos_dst_xyz = 5701 writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ); 5702 struct tgsi_full_dst_register pos_dst_w = 5703 writemask_dst(&pos_dst, TGSI_WRITEMASK_W); 5704 5705 struct tgsi_full_src_register vp_xyzw = 5706 make_src_const_reg(emit->vs.viewport_index); 5707 struct tgsi_full_src_register vp_zwww = 5708 swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, 5709 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); 5710 5711 /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */ 5712 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy, 5713 &tmp_pos_src, &vp_zwww, FALSE); 5714 5715 /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */ 5716 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy, 5717 &tmp_pos_src, &vp_xyzw, FALSE); 5718 5719 /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */ 5720 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz, 5721 &tmp_pos_src, &tmp_pos_src_wwww, FALSE); 5722 5723 /* MOV pos.w, tmp_pos.w */ 5724 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, 5725 &tmp_pos_src, FALSE); 5726 } 5727 else if (vs_pos_tmp_index != INVALID_INDEX) { 5728 /* This code is to handle the case where the temporary vertex 5729 * position register is created when the vertex shader has stream 5730 * output and prescale is disabled because rasterization is to be 5731 * discarded. 5732 */ 5733 struct tgsi_full_dst_register pos_dst = 5734 make_dst_output_reg(emit->vposition.out_index); 5735 5736 /* MOV pos, tmp_pos */ 5737 begin_emit_instruction(emit); 5738 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 5739 emit_dst_register(emit, &pos_dst); 5740 emit_src_register(emit, &tmp_pos_src); 5741 end_emit_instruction(emit); 5742 } 5743 } 5744 5745 static void 5746 emit_clipping_instructions(struct svga_shader_emitter_v10 *emit) 5747 { 5748 if (emit->clip_mode == CLIP_DISTANCE) { 5749 /* Copy from copy distance temporary to CLIPDIST & the shadow copy */ 5750 emit_clip_distance_instructions(emit); 5751 5752 } else if (emit->clip_mode == CLIP_VERTEX) { 5753 /* Convert TGSI CLIPVERTEX to CLIPDIST */ 5754 emit_clip_vertex_instructions(emit); 5755 } 5756 5757 /** 5758 * Emit vertex position and take care of legacy user planes only if 5759 * there is a valid vertex position register index. 5760 * This is to take care of the case 5761 * where the shader doesn't output vertex position. Then in 5762 * this case, don't bother to emit more vertex instructions. 5763 */ 5764 if (emit->vposition.out_index == INVALID_INDEX) 5765 return; 5766 5767 /** 5768 * Emit per-vertex clipping instructions for legacy user defined clip planes. 5769 * NOTE: we must emit the clip distance instructions before the 5770 * emit_vpos_instructions() call since the later function will change 5771 * the TEMP[vs_pos_tmp_index] value. 5772 */ 5773 if (emit->clip_mode == CLIP_LEGACY) { 5774 /* Emit CLIPDIST for legacy user defined clip planes */ 5775 emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index); 5776 } 5777 } 5778 5779 5780 /** 5781 * Emit extra per-vertex instructions. This includes clip-coordinate 5782 * space conversion and computing clip distances. This is called for 5783 * each GS emit-vertex instruction and at the end of VS translation. 5784 */ 5785 static void 5786 emit_vertex_instructions(struct svga_shader_emitter_v10 *emit) 5787 { 5788 const unsigned vs_pos_tmp_index = emit->vposition.tmp_index; 5789 5790 /* Emit clipping instructions based on clipping mode */ 5791 emit_clipping_instructions(emit); 5792 5793 /** 5794 * Reset the temporary vertex position register index 5795 * so that emit_dst_register() will use the real vertex position output 5796 */ 5797 emit->vposition.tmp_index = INVALID_INDEX; 5798 5799 /* Emit vertex position instructions */ 5800 emit_vpos_instructions(emit, vs_pos_tmp_index); 5801 5802 /* Restore original vposition.tmp_index value for the next GS vertex. 5803 * It doesn't matter for VS. 5804 */ 5805 emit->vposition.tmp_index = vs_pos_tmp_index; 5806 } 5807 5808 /** 5809 * Translate the TGSI_OPCODE_EMIT GS instruction. 5810 */ 5811 static boolean 5812 emit_vertex(struct svga_shader_emitter_v10 *emit, 5813 const struct tgsi_full_instruction *inst) 5814 { 5815 unsigned ret = TRUE; 5816 5817 assert(emit->unit == PIPE_SHADER_GEOMETRY); 5818 5819 emit_vertex_instructions(emit); 5820 5821 /* We can't use emit_simple() because the TGSI instruction has one 5822 * operand (vertex stream number) which we must ignore for VGPU10. 5823 */ 5824 begin_emit_instruction(emit); 5825 emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE); 5826 end_emit_instruction(emit); 5827 5828 return ret; 5829 } 5830 5831 5832 /** 5833 * Emit the extra code to convert from VGPU10's boolean front-face 5834 * register to TGSI's signed front-face register. 5835 * 5836 * TODO: Make temporary front-face register a scalar. 5837 */ 5838 static void 5839 emit_frontface_instructions(struct svga_shader_emitter_v10 *emit) 5840 { 5841 assert(emit->unit == PIPE_SHADER_FRAGMENT); 5842 5843 if (emit->fs.face_input_index != INVALID_INDEX) { 5844 /* convert vgpu10 boolean face register to gallium +/-1 value */ 5845 struct tgsi_full_dst_register tmp_dst = 5846 make_dst_temp_reg(emit->fs.face_tmp_index); 5847 struct tgsi_full_src_register one = 5848 make_immediate_reg_float(emit, 1.0f); 5849 struct tgsi_full_src_register neg_one = 5850 make_immediate_reg_float(emit, -1.0f); 5851 5852 /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */ 5853 begin_emit_instruction(emit); 5854 emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE); 5855 emit_dst_register(emit, &tmp_dst); 5856 emit_face_register(emit); 5857 emit_src_register(emit, &one); 5858 emit_src_register(emit, &neg_one); 5859 end_emit_instruction(emit); 5860 } 5861 } 5862 5863 5864 /** 5865 * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w. 5866 */ 5867 static void 5868 emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit) 5869 { 5870 assert(emit->unit == PIPE_SHADER_FRAGMENT); 5871 5872 if (emit->fs.fragcoord_input_index != INVALID_INDEX) { 5873 struct tgsi_full_dst_register tmp_dst = 5874 make_dst_temp_reg(emit->fs.fragcoord_tmp_index); 5875 struct tgsi_full_dst_register tmp_dst_xyz = 5876 writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ); 5877 struct tgsi_full_dst_register tmp_dst_w = 5878 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); 5879 struct tgsi_full_src_register one = 5880 make_immediate_reg_float(emit, 1.0f); 5881 struct tgsi_full_src_register fragcoord = 5882 make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index); 5883 5884 /* save the input index */ 5885 unsigned fragcoord_input_index = emit->fs.fragcoord_input_index; 5886 /* set to invalid to prevent substitution in emit_src_register() */ 5887 emit->fs.fragcoord_input_index = INVALID_INDEX; 5888 5889 /* MOV fragcoord_tmp.xyz, fragcoord.xyz */ 5890 begin_emit_instruction(emit); 5891 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 5892 emit_dst_register(emit, &tmp_dst_xyz); 5893 emit_src_register(emit, &fragcoord); 5894 end_emit_instruction(emit); 5895 5896 /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */ 5897 begin_emit_instruction(emit); 5898 emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE); 5899 emit_dst_register(emit, &tmp_dst_w); 5900 emit_src_register(emit, &one); 5901 emit_src_register(emit, &fragcoord); 5902 end_emit_instruction(emit); 5903 5904 /* restore saved value */ 5905 emit->fs.fragcoord_input_index = fragcoord_input_index; 5906 } 5907 } 5908 5909 5910 /** 5911 * Emit extra instructions to adjust VS inputs/attributes. This can 5912 * mean casting a vertex attribute from int to float or setting the 5913 * W component to 1, or both. 5914 */ 5915 static void 5916 emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit) 5917 { 5918 const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1; 5919 const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof; 5920 const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof; 5921 const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra; 5922 const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm; 5923 const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled; 5924 const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled; 5925 5926 unsigned adjust_mask = (save_w_1_mask | 5927 save_itof_mask | 5928 save_utof_mask | 5929 save_is_bgra_mask | 5930 save_puint_to_snorm_mask | 5931 save_puint_to_uscaled_mask | 5932 save_puint_to_sscaled_mask); 5933 5934 assert(emit->unit == PIPE_SHADER_VERTEX); 5935 5936 if (adjust_mask) { 5937 struct tgsi_full_src_register one = 5938 make_immediate_reg_float(emit, 1.0f); 5939 5940 struct tgsi_full_src_register one_int = 5941 make_immediate_reg_int(emit, 1); 5942 5943 /* We need to turn off these bitmasks while emitting the 5944 * instructions below, then restore them afterward. 5945 */ 5946 emit->key.vs.adjust_attrib_w_1 = 0; 5947 emit->key.vs.adjust_attrib_itof = 0; 5948 emit->key.vs.adjust_attrib_utof = 0; 5949 emit->key.vs.attrib_is_bgra = 0; 5950 emit->key.vs.attrib_puint_to_snorm = 0; 5951 emit->key.vs.attrib_puint_to_uscaled = 0; 5952 emit->key.vs.attrib_puint_to_sscaled = 0; 5953 5954 while (adjust_mask) { 5955 unsigned index = u_bit_scan(&adjust_mask); 5956 5957 /* skip the instruction if this vertex attribute is not being used */ 5958 if (emit->info.input_usage_mask[index] == 0) 5959 continue; 5960 5961 unsigned tmp = emit->vs.adjusted_input[index]; 5962 struct tgsi_full_src_register input_src = 5963 make_src_reg(TGSI_FILE_INPUT, index); 5964 5965 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 5966 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 5967 struct tgsi_full_dst_register tmp_dst_w = 5968 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); 5969 5970 /* ITOF/UTOF/MOV tmp, input[index] */ 5971 if (save_itof_mask & (1 << index)) { 5972 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, 5973 &tmp_dst, &input_src, FALSE); 5974 } 5975 else if (save_utof_mask & (1 << index)) { 5976 emit_instruction_op1(emit, VGPU10_OPCODE_UTOF, 5977 &tmp_dst, &input_src, FALSE); 5978 } 5979 else if (save_puint_to_snorm_mask & (1 << index)) { 5980 emit_puint_to_snorm(emit, &tmp_dst, &input_src); 5981 } 5982 else if (save_puint_to_uscaled_mask & (1 << index)) { 5983 emit_puint_to_uscaled(emit, &tmp_dst, &input_src); 5984 } 5985 else if (save_puint_to_sscaled_mask & (1 << index)) { 5986 emit_puint_to_sscaled(emit, &tmp_dst, &input_src); 5987 } 5988 else { 5989 assert((save_w_1_mask | save_is_bgra_mask) & (1 << index)); 5990 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 5991 &tmp_dst, &input_src, FALSE); 5992 } 5993 5994 if (save_is_bgra_mask & (1 << index)) { 5995 emit_swap_r_b(emit, &tmp_dst, &tmp_src); 5996 } 5997 5998 if (save_w_1_mask & (1 << index)) { 5999 /* MOV tmp.w, 1.0 */ 6000 if (emit->key.vs.attrib_is_pure_int & (1 << index)) { 6001 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6002 &tmp_dst_w, &one_int, FALSE); 6003 } 6004 else { 6005 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6006 &tmp_dst_w, &one, FALSE); 6007 } 6008 } 6009 } 6010 6011 emit->key.vs.adjust_attrib_w_1 = save_w_1_mask; 6012 emit->key.vs.adjust_attrib_itof = save_itof_mask; 6013 emit->key.vs.adjust_attrib_utof = save_utof_mask; 6014 emit->key.vs.attrib_is_bgra = save_is_bgra_mask; 6015 emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask; 6016 emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask; 6017 emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask; 6018 } 6019 } 6020 6021 6022 /** 6023 * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed 6024 * to implement some instructions. We pre-allocate those values here 6025 * in the immediate constant buffer. 6026 */ 6027 static void 6028 alloc_common_immediates(struct svga_shader_emitter_v10 *emit) 6029 { 6030 unsigned n = 0; 6031 6032 emit->common_immediate_pos[n++] = 6033 alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f); 6034 6035 if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) { 6036 emit->common_immediate_pos[n++] = 6037 alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f); 6038 } 6039 6040 emit->common_immediate_pos[n++] = 6041 alloc_immediate_int4(emit, 0, 1, 0, -1); 6042 6043 if (emit->key.vs.attrib_puint_to_snorm) { 6044 emit->common_immediate_pos[n++] = 6045 alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f); 6046 } 6047 6048 if (emit->key.vs.attrib_puint_to_uscaled) { 6049 emit->common_immediate_pos[n++] = 6050 alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f); 6051 } 6052 6053 if (emit->key.vs.attrib_puint_to_sscaled) { 6054 emit->common_immediate_pos[n++] = 6055 alloc_immediate_int4(emit, 22, 12, 2, 0); 6056 6057 emit->common_immediate_pos[n++] = 6058 alloc_immediate_int4(emit, 22, 30, 0, 0); 6059 } 6060 6061 unsigned i; 6062 6063 for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { 6064 if (emit->key.tex[i].texel_bias) { 6065 /* Replace 0.0f if more immediate float value is needed */ 6066 emit->common_immediate_pos[n++] = 6067 alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f); 6068 break; 6069 } 6070 } 6071 6072 assert(n <= ARRAY_SIZE(emit->common_immediate_pos)); 6073 emit->num_common_immediates = n; 6074 } 6075 6076 6077 /** 6078 * Emit any extra/helper declarations/code that we might need between 6079 * the declaration section and code section. 6080 */ 6081 static boolean 6082 emit_pre_helpers(struct svga_shader_emitter_v10 *emit) 6083 { 6084 /* Properties */ 6085 if (emit->unit == PIPE_SHADER_GEOMETRY) 6086 emit_property_instructions(emit); 6087 6088 /* Declare inputs */ 6089 if (!emit_input_declarations(emit)) 6090 return FALSE; 6091 6092 /* Declare outputs */ 6093 if (!emit_output_declarations(emit)) 6094 return FALSE; 6095 6096 /* Declare temporary registers */ 6097 emit_temporaries_declaration(emit); 6098 6099 /* Declare constant registers */ 6100 emit_constant_declaration(emit); 6101 6102 /* Declare samplers and resources */ 6103 emit_sampler_declarations(emit); 6104 emit_resource_declarations(emit); 6105 6106 /* Declare clip distance output registers */ 6107 if (emit->unit == PIPE_SHADER_VERTEX || 6108 emit->unit == PIPE_SHADER_GEOMETRY) { 6109 emit_clip_distance_declarations(emit); 6110 } 6111 6112 alloc_common_immediates(emit); 6113 6114 if (emit->unit == PIPE_SHADER_FRAGMENT && 6115 emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { 6116 float alpha = emit->key.fs.alpha_ref; 6117 emit->fs.alpha_ref_index = 6118 alloc_immediate_float4(emit, alpha, alpha, alpha, alpha); 6119 } 6120 6121 /* Now, emit the constant block containing all the immediates 6122 * declared by shader, as well as the extra ones seen above. 6123 */ 6124 emit_vgpu10_immediates_block(emit); 6125 6126 if (emit->unit == PIPE_SHADER_FRAGMENT) { 6127 emit_frontface_instructions(emit); 6128 emit_fragcoord_instructions(emit); 6129 } 6130 else if (emit->unit == PIPE_SHADER_VERTEX) { 6131 emit_vertex_attrib_instructions(emit); 6132 } 6133 6134 return TRUE; 6135 } 6136 6137 6138 /** 6139 * The device has no direct support for the pipe_blend_state::alpha_to_one 6140 * option so we implement it here with shader code. 6141 * 6142 * Note that this is kind of pointless, actually. Here we're clobbering 6143 * the alpha value with 1.0. So if alpha-to-coverage is enabled, we'll wind 6144 * up with 100% coverage. That's almost certainly not what the user wants. 6145 * The work-around is to add extra shader code to compute coverage from alpha 6146 * and write it to the coverage output register (if the user's shader doesn't 6147 * do so already). We'll probably do that in the future. 6148 */ 6149 static void 6150 emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit, 6151 unsigned fs_color_tmp_index) 6152 { 6153 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 6154 unsigned i; 6155 6156 /* Note: it's not 100% clear from the spec if we're supposed to clobber 6157 * the alpha for all render targets. But that's what NVIDIA does and 6158 * that's what Piglit tests. 6159 */ 6160 for (i = 0; i < emit->fs.num_color_outputs; i++) { 6161 struct tgsi_full_dst_register color_dst; 6162 6163 if (fs_color_tmp_index != INVALID_INDEX && i == 0) { 6164 /* write to the temp color register */ 6165 color_dst = make_dst_temp_reg(fs_color_tmp_index); 6166 } 6167 else { 6168 /* write directly to the color[i] output */ 6169 color_dst = make_dst_output_reg(emit->fs.color_out_index[i]); 6170 } 6171 6172 color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W); 6173 6174 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one, FALSE); 6175 } 6176 } 6177 6178 6179 /** 6180 * Emit alpha test code. This compares TEMP[fs_color_tmp_index].w 6181 * against the alpha reference value and discards the fragment if the 6182 * comparison fails. 6183 */ 6184 static void 6185 emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit, 6186 unsigned fs_color_tmp_index) 6187 { 6188 /* compare output color's alpha to alpha ref and kill */ 6189 unsigned tmp = get_temp_index(emit); 6190 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 6191 struct tgsi_full_src_register tmp_src_x = 6192 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 6193 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 6194 struct tgsi_full_src_register color_src = 6195 make_src_temp_reg(fs_color_tmp_index); 6196 struct tgsi_full_src_register color_src_w = 6197 scalar_src(&color_src, TGSI_SWIZZLE_W); 6198 struct tgsi_full_src_register ref_src = 6199 make_src_immediate_reg(emit->fs.alpha_ref_index); 6200 struct tgsi_full_dst_register color_dst = 6201 make_dst_output_reg(emit->fs.color_out_index[0]); 6202 6203 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6204 6205 /* dst = src0 'alpha_func' src1 */ 6206 emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst, 6207 &color_src_w, &ref_src); 6208 6209 /* DISCARD if dst.x == 0 */ 6210 begin_emit_instruction(emit); 6211 emit_discard_opcode(emit, FALSE); /* discard if src0.x is zero */ 6212 emit_src_register(emit, &tmp_src_x); 6213 end_emit_instruction(emit); 6214 6215 /* If we don't need to broadcast the color below, emit the final color here. 6216 */ 6217 if (emit->key.fs.write_color0_to_n_cbufs <= 1) { 6218 /* MOV output.color, tempcolor */ 6219 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, 6220 &color_src, FALSE); /* XXX saturate? */ 6221 } 6222 6223 free_temp_indexes(emit); 6224 } 6225 6226 6227 /** 6228 * Emit instructions for writing a single color output to multiple 6229 * color buffers. 6230 * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or 6231 * when key.fs.white_fragments is true). 6232 * property is set and the number of render targets is greater than one. 6233 * \param fs_color_tmp_index index of the temp register that holds the 6234 * color to broadcast. 6235 */ 6236 static void 6237 emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit, 6238 unsigned fs_color_tmp_index) 6239 { 6240 const unsigned n = emit->key.fs.write_color0_to_n_cbufs; 6241 unsigned i; 6242 struct tgsi_full_src_register color_src; 6243 6244 if (emit->key.fs.white_fragments) { 6245 /* set all color outputs to white */ 6246 color_src = make_immediate_reg_float(emit, 1.0f); 6247 } 6248 else { 6249 /* set all color outputs to TEMP[fs_color_tmp_index] */ 6250 assert(fs_color_tmp_index != INVALID_INDEX); 6251 color_src = make_src_temp_reg(fs_color_tmp_index); 6252 } 6253 6254 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6255 6256 for (i = 0; i < n; i++) { 6257 unsigned output_reg = emit->fs.color_out_index[i]; 6258 struct tgsi_full_dst_register color_dst = 6259 make_dst_output_reg(output_reg); 6260 6261 /* Fill in this semantic here since we'll use it later in 6262 * emit_dst_register(). 6263 */ 6264 emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR; 6265 6266 /* MOV output.color[i], tempcolor */ 6267 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, 6268 &color_src, FALSE); /* XXX saturate? */ 6269 } 6270 } 6271 6272 6273 /** 6274 * Emit extra helper code after the original shader code, but before the 6275 * last END/RET instruction. 6276 * For vertex shaders this means emitting the extra code to apply the 6277 * prescale scale/translation. 6278 */ 6279 static boolean 6280 emit_post_helpers(struct svga_shader_emitter_v10 *emit) 6281 { 6282 if (emit->unit == PIPE_SHADER_VERTEX) { 6283 emit_vertex_instructions(emit); 6284 } 6285 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 6286 const unsigned fs_color_tmp_index = emit->fs.color_tmp_index; 6287 6288 assert(!(emit->key.fs.white_fragments && 6289 emit->key.fs.write_color0_to_n_cbufs == 0)); 6290 6291 /* We no longer want emit_dst_register() to substitute the 6292 * temporary fragment color register for the real color output. 6293 */ 6294 emit->fs.color_tmp_index = INVALID_INDEX; 6295 6296 if (emit->key.fs.alpha_to_one) { 6297 emit_alpha_to_one_instructions(emit, fs_color_tmp_index); 6298 } 6299 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { 6300 emit_alpha_test_instructions(emit, fs_color_tmp_index); 6301 } 6302 if (emit->key.fs.write_color0_to_n_cbufs > 1 || 6303 emit->key.fs.white_fragments) { 6304 emit_broadcast_color_instructions(emit, fs_color_tmp_index); 6305 } 6306 } 6307 6308 return TRUE; 6309 } 6310 6311 6312 /** 6313 * Translate the TGSI tokens into VGPU10 tokens. 6314 */ 6315 static boolean 6316 emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit, 6317 const struct tgsi_token *tokens) 6318 { 6319 struct tgsi_parse_context parse; 6320 boolean ret = TRUE; 6321 boolean pre_helpers_emitted = FALSE; 6322 unsigned inst_number = 0; 6323 6324 tgsi_parse_init(&parse, tokens); 6325 6326 while (!tgsi_parse_end_of_tokens(&parse)) { 6327 tgsi_parse_token(&parse); 6328 6329 switch (parse.FullToken.Token.Type) { 6330 case TGSI_TOKEN_TYPE_IMMEDIATE: 6331 ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate); 6332 if (!ret) 6333 goto done; 6334 break; 6335 6336 case TGSI_TOKEN_TYPE_DECLARATION: 6337 ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration); 6338 if (!ret) 6339 goto done; 6340 break; 6341 6342 case TGSI_TOKEN_TYPE_INSTRUCTION: 6343 if (!pre_helpers_emitted) { 6344 ret = emit_pre_helpers(emit); 6345 if (!ret) 6346 goto done; 6347 pre_helpers_emitted = TRUE; 6348 } 6349 ret = emit_vgpu10_instruction(emit, inst_number++, 6350 &parse.FullToken.FullInstruction); 6351 if (!ret) 6352 goto done; 6353 break; 6354 6355 case TGSI_TOKEN_TYPE_PROPERTY: 6356 ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty); 6357 if (!ret) 6358 goto done; 6359 break; 6360 6361 default: 6362 break; 6363 } 6364 } 6365 6366 done: 6367 tgsi_parse_free(&parse); 6368 return ret; 6369 } 6370 6371 6372 /** 6373 * Emit the first VGPU10 shader tokens. 6374 */ 6375 static boolean 6376 emit_vgpu10_header(struct svga_shader_emitter_v10 *emit) 6377 { 6378 VGPU10ProgramToken ptoken; 6379 6380 /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */ 6381 ptoken.majorVersion = 4; 6382 ptoken.minorVersion = 0; 6383 ptoken.programType = translate_shader_type(emit->unit); 6384 if (!emit_dword(emit, ptoken.value)) 6385 return FALSE; 6386 6387 /* Second token: total length of shader, in tokens. We can't fill this 6388 * in until we're all done. Emit zero for now. 6389 */ 6390 return emit_dword(emit, 0); 6391 } 6392 6393 6394 static boolean 6395 emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit) 6396 { 6397 VGPU10ProgramToken *tokens; 6398 6399 /* Replace the second token with total shader length */ 6400 tokens = (VGPU10ProgramToken *) emit->buf; 6401 tokens[1].value = emit_get_num_tokens(emit); 6402 6403 return TRUE; 6404 } 6405 6406 6407 /** 6408 * Modify the FS to read the BCOLORs and use the FACE register 6409 * to choose between the front/back colors. 6410 */ 6411 static const struct tgsi_token * 6412 transform_fs_twoside(const struct tgsi_token *tokens) 6413 { 6414 if (0) { 6415 debug_printf("Before tgsi_add_two_side ------------------\n"); 6416 tgsi_dump(tokens,0); 6417 } 6418 tokens = tgsi_add_two_side(tokens); 6419 if (0) { 6420 debug_printf("After tgsi_add_two_side ------------------\n"); 6421 tgsi_dump(tokens, 0); 6422 } 6423 return tokens; 6424 } 6425 6426 6427 /** 6428 * Modify the FS to do polygon stipple. 6429 */ 6430 static const struct tgsi_token * 6431 transform_fs_pstipple(struct svga_shader_emitter_v10 *emit, 6432 const struct tgsi_token *tokens) 6433 { 6434 const struct tgsi_token *new_tokens; 6435 unsigned unit; 6436 6437 if (0) { 6438 debug_printf("Before pstipple ------------------\n"); 6439 tgsi_dump(tokens,0); 6440 } 6441 6442 new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0, 6443 TGSI_FILE_INPUT); 6444 6445 emit->fs.pstipple_sampler_unit = unit; 6446 6447 /* Setup texture state for stipple */ 6448 emit->sampler_target[unit] = TGSI_TEXTURE_2D; 6449 emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; 6450 emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y; 6451 emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; 6452 emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W; 6453 6454 if (0) { 6455 debug_printf("After pstipple ------------------\n"); 6456 tgsi_dump(new_tokens, 0); 6457 } 6458 6459 return new_tokens; 6460 } 6461 6462 /** 6463 * Modify the FS to support anti-aliasing point. 6464 */ 6465 static const struct tgsi_token * 6466 transform_fs_aapoint(const struct tgsi_token *tokens, 6467 int aa_coord_index) 6468 { 6469 if (0) { 6470 debug_printf("Before tgsi_add_aa_point ------------------\n"); 6471 tgsi_dump(tokens,0); 6472 } 6473 tokens = tgsi_add_aa_point(tokens, aa_coord_index); 6474 if (0) { 6475 debug_printf("After tgsi_add_aa_point ------------------\n"); 6476 tgsi_dump(tokens, 0); 6477 } 6478 return tokens; 6479 } 6480 6481 /** 6482 * This is the main entrypoint for the TGSI -> VPGU10 translator. 6483 */ 6484 struct svga_shader_variant * 6485 svga_tgsi_vgpu10_translate(struct svga_context *svga, 6486 const struct svga_shader *shader, 6487 const struct svga_compile_key *key, 6488 unsigned unit) 6489 { 6490 struct svga_shader_variant *variant = NULL; 6491 struct svga_shader_emitter_v10 *emit; 6492 const struct tgsi_token *tokens = shader->tokens; 6493 struct svga_vertex_shader *vs = svga->curr.vs; 6494 struct svga_geometry_shader *gs = svga->curr.gs; 6495 6496 assert(unit == PIPE_SHADER_VERTEX || 6497 unit == PIPE_SHADER_GEOMETRY || 6498 unit == PIPE_SHADER_FRAGMENT); 6499 6500 /* These two flags cannot be used together */ 6501 assert(key->vs.need_prescale + key->vs.undo_viewport <= 1); 6502 6503 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE); 6504 /* 6505 * Setup the code emitter 6506 */ 6507 emit = alloc_emitter(); 6508 if (!emit) 6509 goto done; 6510 6511 emit->unit = unit; 6512 emit->key = *key; 6513 6514 emit->vposition.need_prescale = (emit->key.vs.need_prescale || 6515 emit->key.gs.need_prescale); 6516 emit->vposition.tmp_index = INVALID_INDEX; 6517 emit->vposition.so_index = INVALID_INDEX; 6518 emit->vposition.out_index = INVALID_INDEX; 6519 6520 emit->fs.color_tmp_index = INVALID_INDEX; 6521 emit->fs.face_input_index = INVALID_INDEX; 6522 emit->fs.fragcoord_input_index = INVALID_INDEX; 6523 6524 emit->gs.prim_id_index = INVALID_INDEX; 6525 6526 emit->clip_dist_out_index = INVALID_INDEX; 6527 emit->clip_dist_tmp_index = INVALID_INDEX; 6528 emit->clip_dist_so_index = INVALID_INDEX; 6529 emit->clip_vertex_out_index = INVALID_INDEX; 6530 6531 if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) { 6532 emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS; 6533 } 6534 6535 if (unit == PIPE_SHADER_FRAGMENT) { 6536 if (key->fs.light_twoside) { 6537 tokens = transform_fs_twoside(tokens); 6538 } 6539 if (key->fs.pstipple) { 6540 const struct tgsi_token *new_tokens = 6541 transform_fs_pstipple(emit, tokens); 6542 if (tokens != shader->tokens) { 6543 /* free the two-sided shader tokens */ 6544 tgsi_free_tokens(tokens); 6545 } 6546 tokens = new_tokens; 6547 } 6548 if (key->fs.aa_point) { 6549 tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index); 6550 } 6551 } 6552 6553 if (SVGA_DEBUG & DEBUG_TGSI) { 6554 debug_printf("#####################################\n"); 6555 debug_printf("### TGSI Shader %u\n", shader->id); 6556 tgsi_dump(tokens, 0); 6557 } 6558 6559 /** 6560 * Rescan the header if the token string is different from the one 6561 * included in the shader; otherwise, the header info is already up-to-date 6562 */ 6563 if (tokens != shader->tokens) { 6564 tgsi_scan_shader(tokens, &emit->info); 6565 } else { 6566 emit->info = shader->info; 6567 } 6568 6569 emit->num_outputs = emit->info.num_outputs; 6570 6571 if (unit == PIPE_SHADER_FRAGMENT) { 6572 /* Compute FS input remapping to match the output from VS/GS */ 6573 if (gs) { 6574 svga_link_shaders(&gs->base.info, &emit->info, &emit->linkage); 6575 } else { 6576 assert(vs); 6577 svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage); 6578 } 6579 } else if (unit == PIPE_SHADER_GEOMETRY) { 6580 assert(vs); 6581 svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage); 6582 } 6583 6584 determine_clipping_mode(emit); 6585 6586 if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) { 6587 if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) { 6588 /* if there is stream output declarations associated 6589 * with this shader or the shader writes to ClipDistance 6590 * then reserve extra registers for the non-adjusted vertex position 6591 * and the ClipDistance shadow copy 6592 */ 6593 emit->vposition.so_index = emit->num_outputs++; 6594 6595 if (emit->clip_mode == CLIP_DISTANCE) { 6596 emit->clip_dist_so_index = emit->num_outputs++; 6597 if (emit->info.num_written_clipdistance > 4) 6598 emit->num_outputs++; 6599 } 6600 } 6601 } 6602 6603 /* 6604 * Do actual shader translation. 6605 */ 6606 if (!emit_vgpu10_header(emit)) { 6607 debug_printf("svga: emit VGPU10 header failed\n"); 6608 goto cleanup; 6609 } 6610 6611 if (!emit_vgpu10_instructions(emit, tokens)) { 6612 debug_printf("svga: emit VGPU10 instructions failed\n"); 6613 goto cleanup; 6614 } 6615 6616 if (!emit_vgpu10_tail(emit)) { 6617 debug_printf("svga: emit VGPU10 tail failed\n"); 6618 goto cleanup; 6619 } 6620 6621 if (emit->register_overflow) { 6622 goto cleanup; 6623 } 6624 6625 /* 6626 * Create, initialize the 'variant' object. 6627 */ 6628 variant = svga_new_shader_variant(svga); 6629 if (!variant) 6630 goto cleanup; 6631 6632 variant->shader = shader; 6633 variant->nr_tokens = emit_get_num_tokens(emit); 6634 variant->tokens = (const unsigned *)emit->buf; 6635 emit->buf = NULL; /* buffer is no longer owed by emitter context */ 6636 memcpy(&variant->key, key, sizeof(*key)); 6637 variant->id = UTIL_BITMASK_INVALID_INDEX; 6638 6639 /* The extra constant starting offset starts with the number of 6640 * shader constants declared in the shader. 6641 */ 6642 variant->extra_const_start = emit->num_shader_consts[0]; 6643 if (key->gs.wide_point) { 6644 /** 6645 * The extra constant added in the transformed shader 6646 * for inverse viewport scale is to be supplied by the driver. 6647 * So the extra constant starting offset needs to be reduced by 1. 6648 */ 6649 assert(variant->extra_const_start > 0); 6650 variant->extra_const_start--; 6651 } 6652 6653 variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit; 6654 6655 /* If there was exactly one write to a fragment shader output register 6656 * and it came from a constant buffer, we know all fragments will have 6657 * the same color (except for blending). 6658 */ 6659 variant->constant_color_output = 6660 emit->constant_color_output && emit->num_output_writes == 1; 6661 6662 /** keep track in the variant if flat interpolation is used 6663 * for any of the varyings. 6664 */ 6665 variant->uses_flat_interp = emit->uses_flat_interp; 6666 6667 variant->fs_shadow_compare_units = emit->fs.shadow_compare_units; 6668 6669 if (tokens != shader->tokens) { 6670 tgsi_free_tokens(tokens); 6671 } 6672 6673 cleanup: 6674 free_emitter(emit); 6675 6676 done: 6677 SVGA_STATS_TIME_POP(svga_sws(svga)); 6678 return variant; 6679 } 6680