1 /********************************************************** 2 * Copyright 1998-2013 VMware, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **********************************************************/ 25 26 /** 27 * @file svga_tgsi_vgpu10.c 28 * 29 * TGSI -> VGPU10 shader translation. 30 * 31 * \author Mingcheng Chen 32 * \author Brian Paul 33 */ 34 35 #include "pipe/p_compiler.h" 36 #include "pipe/p_shader_tokens.h" 37 #include "pipe/p_defines.h" 38 #include "tgsi/tgsi_build.h" 39 #include "tgsi/tgsi_dump.h" 40 #include "tgsi/tgsi_info.h" 41 #include "tgsi/tgsi_parse.h" 42 #include "tgsi/tgsi_scan.h" 43 #include "tgsi/tgsi_two_side.h" 44 #include "tgsi/tgsi_aa_point.h" 45 #include "tgsi/tgsi_util.h" 46 #include "util/u_math.h" 47 #include "util/u_memory.h" 48 #include "util/u_bitmask.h" 49 #include "util/u_debug.h" 50 #include "util/u_pstipple.h" 51 52 #include "svga_context.h" 53 #include "svga_debug.h" 54 #include "svga_link.h" 55 #include "svga_shader.h" 56 #include "svga_tgsi.h" 57 58 #include "VGPU10ShaderTokens.h" 59 60 61 #define INVALID_INDEX 99999 62 #define MAX_INTERNAL_TEMPS 3 63 #define MAX_SYSTEM_VALUES 4 64 #define MAX_IMMEDIATE_COUNT \ 65 (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4) 66 #define MAX_TEMP_ARRAYS 64 /* Enough? */ 67 68 69 /** 70 * Clipping is complicated. There's four different cases which we 71 * handle during VS/GS shader translation: 72 */ 73 enum clipping_mode 74 { 75 CLIP_NONE, /**< No clipping enabled */ 76 CLIP_LEGACY, /**< The shader has no clipping declarations or code but 77 * one or more user-defined clip planes are enabled. We 78 * generate extra code to emit clip distances. 79 */ 80 CLIP_DISTANCE, /**< The shader already declares clip distance output 81 * registers and has code to write to them. 82 */ 83 CLIP_VERTEX /**< The shader declares a clip vertex output register and 84 * has code that writes to the register. We convert the 85 * clipvertex position into one or more clip distances. 86 */ 87 }; 88 89 90 struct svga_shader_emitter_v10 91 { 92 /* The token output buffer */ 93 unsigned size; 94 char *buf; 95 char *ptr; 96 97 /* Information about the shader and state (does not change) */ 98 struct svga_compile_key key; 99 struct tgsi_shader_info info; 100 unsigned unit; 101 102 unsigned inst_start_token; 103 boolean discard_instruction; /**< throw away current instruction? */ 104 105 union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4]; 106 unsigned num_immediates; /**< Number of immediates emitted */ 107 unsigned common_immediate_pos[8]; /**< literals for common immediates */ 108 unsigned num_common_immediates; 109 boolean immediates_emitted; 110 111 unsigned num_outputs; /**< include any extra outputs */ 112 /** The first extra output is reserved for 113 * non-adjusted vertex position for 114 * stream output purpose 115 */ 116 117 /* Temporary Registers */ 118 unsigned num_shader_temps; /**< num of temps used by original shader */ 119 unsigned internal_temp_count; /**< currently allocated internal temps */ 120 struct { 121 unsigned start, size; 122 } temp_arrays[MAX_TEMP_ARRAYS]; 123 unsigned num_temp_arrays; 124 125 /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */ 126 struct { 127 unsigned arrayId, index; 128 } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */ 129 130 /** Number of constants used by original shader for each constant buffer. 131 * The size should probably always match with that of svga_state.constbufs. 132 */ 133 unsigned num_shader_consts[SVGA_MAX_CONST_BUFS]; 134 135 /* Samplers */ 136 unsigned num_samplers; 137 ubyte sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */ 138 ubyte sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */ 139 140 /* Address regs (really implemented with temps) */ 141 unsigned num_address_regs; 142 unsigned address_reg_index[MAX_VGPU10_ADDR_REGS]; 143 144 /* Output register usage masks */ 145 ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS]; 146 147 /* To map TGSI system value index to VGPU shader input indexes */ 148 ubyte system_value_indexes[MAX_SYSTEM_VALUES]; 149 150 struct { 151 /* vertex position scale/translation */ 152 unsigned out_index; /**< the real position output reg */ 153 unsigned tmp_index; /**< the fake/temp position output reg */ 154 unsigned so_index; /**< the non-adjusted position output reg */ 155 unsigned prescale_scale_index, prescale_trans_index; 156 boolean need_prescale; 157 } vposition; 158 159 /* For vertex shaders only */ 160 struct { 161 /* viewport constant */ 162 unsigned viewport_index; 163 164 /* temp index of adjusted vertex attributes */ 165 unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS]; 166 } vs; 167 168 /* For fragment shaders only */ 169 struct { 170 /* apha test */ 171 unsigned color_out_index[PIPE_MAX_COLOR_BUFS]; /**< the real color output regs */ 172 unsigned color_tmp_index; /**< fake/temp color output reg */ 173 unsigned alpha_ref_index; /**< immediate constant for alpha ref */ 174 175 /* front-face */ 176 unsigned face_input_index; /**< real fragment shader face reg (bool) */ 177 unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */ 178 179 unsigned pstipple_sampler_unit; 180 181 unsigned fragcoord_input_index; /**< real fragment position input reg */ 182 unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */ 183 } fs; 184 185 /* For geometry shaders only */ 186 struct { 187 VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */ 188 VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */ 189 unsigned input_size; /**< size of input arrays */ 190 unsigned prim_id_index; /**< primitive id register index */ 191 unsigned max_out_vertices; /**< maximum number of output vertices */ 192 } gs; 193 194 /* For vertex or geometry shaders */ 195 enum clipping_mode clip_mode; 196 unsigned clip_dist_out_index; /**< clip distance output register index */ 197 unsigned clip_dist_tmp_index; /**< clip distance temporary register */ 198 unsigned clip_dist_so_index; /**< clip distance shadow copy */ 199 200 /** Index of temporary holding the clipvertex coordinate */ 201 unsigned clip_vertex_out_index; /**< clip vertex output register index */ 202 unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */ 203 204 /* user clip plane constant slot indexes */ 205 unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES]; 206 207 unsigned num_output_writes; 208 boolean constant_color_output; 209 210 boolean uses_flat_interp; 211 212 /* For all shaders: const reg index for RECT coord scaling */ 213 unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS]; 214 215 /* For all shaders: const reg index for texture buffer size */ 216 unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS]; 217 218 /* VS/GS/FS Linkage info */ 219 struct shader_linkage linkage; 220 221 bool register_overflow; /**< Set if we exceed a VGPU10 register limit */ 222 }; 223 224 225 static boolean 226 emit_post_helpers(struct svga_shader_emitter_v10 *emit); 227 228 static boolean 229 emit_vertex(struct svga_shader_emitter_v10 *emit, 230 const struct tgsi_full_instruction *inst); 231 232 static char err_buf[128]; 233 234 static boolean 235 expand(struct svga_shader_emitter_v10 *emit) 236 { 237 char *new_buf; 238 unsigned newsize = emit->size * 2; 239 240 if (emit->buf != err_buf) 241 new_buf = REALLOC(emit->buf, emit->size, newsize); 242 else 243 new_buf = NULL; 244 245 if (!new_buf) { 246 emit->ptr = err_buf; 247 emit->buf = err_buf; 248 emit->size = sizeof(err_buf); 249 return FALSE; 250 } 251 252 emit->size = newsize; 253 emit->ptr = new_buf + (emit->ptr - emit->buf); 254 emit->buf = new_buf; 255 return TRUE; 256 } 257 258 /** 259 * Create and initialize a new svga_shader_emitter_v10 object. 260 */ 261 static struct svga_shader_emitter_v10 * 262 alloc_emitter(void) 263 { 264 struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit)); 265 266 if (!emit) 267 return NULL; 268 269 /* to initialize the output buffer */ 270 emit->size = 512; 271 if (!expand(emit)) { 272 FREE(emit); 273 return NULL; 274 } 275 return emit; 276 } 277 278 /** 279 * Free an svga_shader_emitter_v10 object. 280 */ 281 static void 282 free_emitter(struct svga_shader_emitter_v10 *emit) 283 { 284 assert(emit); 285 FREE(emit->buf); /* will be NULL if translation succeeded */ 286 FREE(emit); 287 } 288 289 static inline boolean 290 reserve(struct svga_shader_emitter_v10 *emit, 291 unsigned nr_dwords) 292 { 293 while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) { 294 if (!expand(emit)) 295 return FALSE; 296 } 297 298 return TRUE; 299 } 300 301 static boolean 302 emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword) 303 { 304 if (!reserve(emit, 1)) 305 return FALSE; 306 307 *(uint32 *)emit->ptr = dword; 308 emit->ptr += sizeof dword; 309 return TRUE; 310 } 311 312 static boolean 313 emit_dwords(struct svga_shader_emitter_v10 *emit, 314 const uint32 *dwords, 315 unsigned nr) 316 { 317 if (!reserve(emit, nr)) 318 return FALSE; 319 320 memcpy(emit->ptr, dwords, nr * sizeof *dwords); 321 emit->ptr += nr * sizeof *dwords; 322 return TRUE; 323 } 324 325 /** Return the number of tokens in the emitter's buffer */ 326 static unsigned 327 emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit) 328 { 329 return (emit->ptr - emit->buf) / sizeof(unsigned); 330 } 331 332 333 /** 334 * Check for register overflow. If we overflow we'll set an 335 * error flag. This function can be called for register declarations 336 * or use as src/dst instruction operands. 337 * \param type register type. One of VGPU10_OPERAND_TYPE_x 338 or VGPU10_OPCODE_DCL_x 339 * \param index the register index 340 */ 341 static void 342 check_register_index(struct svga_shader_emitter_v10 *emit, 343 unsigned operandType, unsigned index) 344 { 345 bool overflow_before = emit->register_overflow; 346 347 switch (operandType) { 348 case VGPU10_OPERAND_TYPE_TEMP: 349 case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP: 350 case VGPU10_OPCODE_DCL_TEMPS: 351 if (index >= VGPU10_MAX_TEMPS) { 352 emit->register_overflow = TRUE; 353 } 354 break; 355 case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER: 356 case VGPU10_OPCODE_DCL_CONSTANT_BUFFER: 357 if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { 358 emit->register_overflow = TRUE; 359 } 360 break; 361 case VGPU10_OPERAND_TYPE_INPUT: 362 case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID: 363 case VGPU10_OPCODE_DCL_INPUT: 364 case VGPU10_OPCODE_DCL_INPUT_SGV: 365 case VGPU10_OPCODE_DCL_INPUT_SIV: 366 case VGPU10_OPCODE_DCL_INPUT_PS: 367 case VGPU10_OPCODE_DCL_INPUT_PS_SGV: 368 case VGPU10_OPCODE_DCL_INPUT_PS_SIV: 369 if ((emit->unit == PIPE_SHADER_VERTEX && 370 index >= VGPU10_MAX_VS_INPUTS) || 371 (emit->unit == PIPE_SHADER_GEOMETRY && 372 index >= VGPU10_MAX_GS_INPUTS) || 373 (emit->unit == PIPE_SHADER_FRAGMENT && 374 index >= VGPU10_MAX_FS_INPUTS)) { 375 emit->register_overflow = TRUE; 376 } 377 break; 378 case VGPU10_OPERAND_TYPE_OUTPUT: 379 case VGPU10_OPCODE_DCL_OUTPUT: 380 case VGPU10_OPCODE_DCL_OUTPUT_SGV: 381 case VGPU10_OPCODE_DCL_OUTPUT_SIV: 382 if ((emit->unit == PIPE_SHADER_VERTEX && 383 index >= VGPU10_MAX_VS_OUTPUTS) || 384 (emit->unit == PIPE_SHADER_GEOMETRY && 385 index >= VGPU10_MAX_GS_OUTPUTS) || 386 (emit->unit == PIPE_SHADER_FRAGMENT && 387 index >= VGPU10_MAX_FS_OUTPUTS)) { 388 emit->register_overflow = TRUE; 389 } 390 break; 391 case VGPU10_OPERAND_TYPE_SAMPLER: 392 case VGPU10_OPCODE_DCL_SAMPLER: 393 if (index >= VGPU10_MAX_SAMPLERS) { 394 emit->register_overflow = TRUE; 395 } 396 break; 397 case VGPU10_OPERAND_TYPE_RESOURCE: 398 case VGPU10_OPCODE_DCL_RESOURCE: 399 if (index >= VGPU10_MAX_RESOURCES) { 400 emit->register_overflow = TRUE; 401 } 402 break; 403 case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: 404 if (index >= MAX_IMMEDIATE_COUNT) { 405 emit->register_overflow = TRUE; 406 } 407 break; 408 default: 409 assert(0); 410 ; /* nothing */ 411 } 412 413 if (emit->register_overflow && !overflow_before) { 414 debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n", 415 operandType, index); 416 } 417 } 418 419 420 /** 421 * Examine misc state to determine the clipping mode. 422 */ 423 static void 424 determine_clipping_mode(struct svga_shader_emitter_v10 *emit) 425 { 426 if (emit->info.num_written_clipdistance > 0) { 427 emit->clip_mode = CLIP_DISTANCE; 428 } 429 else if (emit->info.writes_clipvertex) { 430 emit->clip_mode = CLIP_VERTEX; 431 } 432 else if (emit->key.clip_plane_enable) { 433 emit->clip_mode = CLIP_LEGACY; 434 } 435 else { 436 emit->clip_mode = CLIP_NONE; 437 } 438 } 439 440 441 /** 442 * For clip distance register declarations and clip distance register 443 * writes we need to mask the declaration usage or instruction writemask 444 * (respectively) against the set of the really-enabled clipping planes. 445 * 446 * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables 447 * has a VS that writes to all 8 clip distance registers, but the plane enable 448 * flags are a subset of that. 449 * 450 * This function is used to apply the plane enable flags to the register 451 * declaration or instruction writemask. 452 * 453 * \param writemask the declaration usage mask or instruction writemask 454 * \param clip_reg_index which clip plane register is being declared/written. 455 * The legal values are 0 and 1 (two clip planes per 456 * register, for a total of 8 clip planes) 457 */ 458 static unsigned 459 apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit, 460 unsigned writemask, unsigned clip_reg_index) 461 { 462 unsigned shift; 463 464 assert(clip_reg_index < 2); 465 466 /* four clip planes per clip register: */ 467 shift = clip_reg_index * 4; 468 writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf); 469 470 return writemask; 471 } 472 473 474 /** 475 * Translate gallium shader type into VGPU10 type. 476 */ 477 static VGPU10_PROGRAM_TYPE 478 translate_shader_type(unsigned type) 479 { 480 switch (type) { 481 case PIPE_SHADER_VERTEX: 482 return VGPU10_VERTEX_SHADER; 483 case PIPE_SHADER_GEOMETRY: 484 return VGPU10_GEOMETRY_SHADER; 485 case PIPE_SHADER_FRAGMENT: 486 return VGPU10_PIXEL_SHADER; 487 default: 488 assert(!"Unexpected shader type"); 489 return VGPU10_VERTEX_SHADER; 490 } 491 } 492 493 494 /** 495 * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x 496 * Note: we only need to translate the opcodes for "simple" instructions, 497 * as seen below. All other opcodes are handled/translated specially. 498 */ 499 static VGPU10_OPCODE_TYPE 500 translate_opcode(unsigned opcode) 501 { 502 switch (opcode) { 503 case TGSI_OPCODE_MOV: 504 return VGPU10_OPCODE_MOV; 505 case TGSI_OPCODE_MUL: 506 return VGPU10_OPCODE_MUL; 507 case TGSI_OPCODE_ADD: 508 return VGPU10_OPCODE_ADD; 509 case TGSI_OPCODE_DP3: 510 return VGPU10_OPCODE_DP3; 511 case TGSI_OPCODE_DP4: 512 return VGPU10_OPCODE_DP4; 513 case TGSI_OPCODE_MIN: 514 return VGPU10_OPCODE_MIN; 515 case TGSI_OPCODE_MAX: 516 return VGPU10_OPCODE_MAX; 517 case TGSI_OPCODE_MAD: 518 return VGPU10_OPCODE_MAD; 519 case TGSI_OPCODE_SQRT: 520 return VGPU10_OPCODE_SQRT; 521 case TGSI_OPCODE_FRC: 522 return VGPU10_OPCODE_FRC; 523 case TGSI_OPCODE_FLR: 524 return VGPU10_OPCODE_ROUND_NI; 525 case TGSI_OPCODE_FSEQ: 526 return VGPU10_OPCODE_EQ; 527 case TGSI_OPCODE_FSGE: 528 return VGPU10_OPCODE_GE; 529 case TGSI_OPCODE_FSNE: 530 return VGPU10_OPCODE_NE; 531 case TGSI_OPCODE_DDX: 532 return VGPU10_OPCODE_DERIV_RTX; 533 case TGSI_OPCODE_DDY: 534 return VGPU10_OPCODE_DERIV_RTY; 535 case TGSI_OPCODE_RET: 536 return VGPU10_OPCODE_RET; 537 case TGSI_OPCODE_DIV: 538 return VGPU10_OPCODE_DIV; 539 case TGSI_OPCODE_IDIV: 540 return VGPU10_OPCODE_IDIV; 541 case TGSI_OPCODE_DP2: 542 return VGPU10_OPCODE_DP2; 543 case TGSI_OPCODE_BRK: 544 return VGPU10_OPCODE_BREAK; 545 case TGSI_OPCODE_IF: 546 return VGPU10_OPCODE_IF; 547 case TGSI_OPCODE_ELSE: 548 return VGPU10_OPCODE_ELSE; 549 case TGSI_OPCODE_ENDIF: 550 return VGPU10_OPCODE_ENDIF; 551 case TGSI_OPCODE_CEIL: 552 return VGPU10_OPCODE_ROUND_PI; 553 case TGSI_OPCODE_I2F: 554 return VGPU10_OPCODE_ITOF; 555 case TGSI_OPCODE_NOT: 556 return VGPU10_OPCODE_NOT; 557 case TGSI_OPCODE_TRUNC: 558 return VGPU10_OPCODE_ROUND_Z; 559 case TGSI_OPCODE_SHL: 560 return VGPU10_OPCODE_ISHL; 561 case TGSI_OPCODE_AND: 562 return VGPU10_OPCODE_AND; 563 case TGSI_OPCODE_OR: 564 return VGPU10_OPCODE_OR; 565 case TGSI_OPCODE_XOR: 566 return VGPU10_OPCODE_XOR; 567 case TGSI_OPCODE_CONT: 568 return VGPU10_OPCODE_CONTINUE; 569 case TGSI_OPCODE_EMIT: 570 return VGPU10_OPCODE_EMIT; 571 case TGSI_OPCODE_ENDPRIM: 572 return VGPU10_OPCODE_CUT; 573 case TGSI_OPCODE_BGNLOOP: 574 return VGPU10_OPCODE_LOOP; 575 case TGSI_OPCODE_ENDLOOP: 576 return VGPU10_OPCODE_ENDLOOP; 577 case TGSI_OPCODE_ENDSUB: 578 return VGPU10_OPCODE_RET; 579 case TGSI_OPCODE_NOP: 580 return VGPU10_OPCODE_NOP; 581 case TGSI_OPCODE_BREAKC: 582 return VGPU10_OPCODE_BREAKC; 583 case TGSI_OPCODE_END: 584 return VGPU10_OPCODE_RET; 585 case TGSI_OPCODE_F2I: 586 return VGPU10_OPCODE_FTOI; 587 case TGSI_OPCODE_IMAX: 588 return VGPU10_OPCODE_IMAX; 589 case TGSI_OPCODE_IMIN: 590 return VGPU10_OPCODE_IMIN; 591 case TGSI_OPCODE_UDIV: 592 case TGSI_OPCODE_UMOD: 593 case TGSI_OPCODE_MOD: 594 return VGPU10_OPCODE_UDIV; 595 case TGSI_OPCODE_IMUL_HI: 596 return VGPU10_OPCODE_IMUL; 597 case TGSI_OPCODE_INEG: 598 return VGPU10_OPCODE_INEG; 599 case TGSI_OPCODE_ISHR: 600 return VGPU10_OPCODE_ISHR; 601 case TGSI_OPCODE_ISGE: 602 return VGPU10_OPCODE_IGE; 603 case TGSI_OPCODE_ISLT: 604 return VGPU10_OPCODE_ILT; 605 case TGSI_OPCODE_F2U: 606 return VGPU10_OPCODE_FTOU; 607 case TGSI_OPCODE_UADD: 608 return VGPU10_OPCODE_IADD; 609 case TGSI_OPCODE_U2F: 610 return VGPU10_OPCODE_UTOF; 611 case TGSI_OPCODE_UCMP: 612 return VGPU10_OPCODE_MOVC; 613 case TGSI_OPCODE_UMAD: 614 return VGPU10_OPCODE_UMAD; 615 case TGSI_OPCODE_UMAX: 616 return VGPU10_OPCODE_UMAX; 617 case TGSI_OPCODE_UMIN: 618 return VGPU10_OPCODE_UMIN; 619 case TGSI_OPCODE_UMUL: 620 case TGSI_OPCODE_UMUL_HI: 621 return VGPU10_OPCODE_UMUL; 622 case TGSI_OPCODE_USEQ: 623 return VGPU10_OPCODE_IEQ; 624 case TGSI_OPCODE_USGE: 625 return VGPU10_OPCODE_UGE; 626 case TGSI_OPCODE_USHR: 627 return VGPU10_OPCODE_USHR; 628 case TGSI_OPCODE_USLT: 629 return VGPU10_OPCODE_ULT; 630 case TGSI_OPCODE_USNE: 631 return VGPU10_OPCODE_INE; 632 case TGSI_OPCODE_SWITCH: 633 return VGPU10_OPCODE_SWITCH; 634 case TGSI_OPCODE_CASE: 635 return VGPU10_OPCODE_CASE; 636 case TGSI_OPCODE_DEFAULT: 637 return VGPU10_OPCODE_DEFAULT; 638 case TGSI_OPCODE_ENDSWITCH: 639 return VGPU10_OPCODE_ENDSWITCH; 640 case TGSI_OPCODE_FSLT: 641 return VGPU10_OPCODE_LT; 642 case TGSI_OPCODE_ROUND: 643 return VGPU10_OPCODE_ROUND_NE; 644 default: 645 assert(!"Unexpected TGSI opcode in translate_opcode()"); 646 return VGPU10_OPCODE_NOP; 647 } 648 } 649 650 651 /** 652 * Translate a TGSI register file type into a VGPU10 operand type. 653 * \param array is the TGSI_FILE_TEMPORARY register an array? 654 */ 655 static VGPU10_OPERAND_TYPE 656 translate_register_file(enum tgsi_file_type file, boolean array) 657 { 658 switch (file) { 659 case TGSI_FILE_CONSTANT: 660 return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; 661 case TGSI_FILE_INPUT: 662 return VGPU10_OPERAND_TYPE_INPUT; 663 case TGSI_FILE_OUTPUT: 664 return VGPU10_OPERAND_TYPE_OUTPUT; 665 case TGSI_FILE_TEMPORARY: 666 return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP 667 : VGPU10_OPERAND_TYPE_TEMP; 668 case TGSI_FILE_IMMEDIATE: 669 /* all immediates are 32-bit values at this time so 670 * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time. 671 */ 672 return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER; 673 case TGSI_FILE_SAMPLER: 674 return VGPU10_OPERAND_TYPE_SAMPLER; 675 case TGSI_FILE_SYSTEM_VALUE: 676 return VGPU10_OPERAND_TYPE_INPUT; 677 678 /* XXX TODO more cases to finish */ 679 680 default: 681 assert(!"Bad tgsi register file!"); 682 return VGPU10_OPERAND_TYPE_NULL; 683 } 684 } 685 686 687 /** 688 * Emit a null dst register 689 */ 690 static void 691 emit_null_dst_register(struct svga_shader_emitter_v10 *emit) 692 { 693 VGPU10OperandToken0 operand; 694 695 operand.value = 0; 696 operand.operandType = VGPU10_OPERAND_TYPE_NULL; 697 operand.numComponents = VGPU10_OPERAND_0_COMPONENT; 698 699 emit_dword(emit, operand.value); 700 } 701 702 703 /** 704 * If the given register is a temporary, return the array ID. 705 * Else return zero. 706 */ 707 static unsigned 708 get_temp_array_id(const struct svga_shader_emitter_v10 *emit, 709 unsigned file, unsigned index) 710 { 711 if (file == TGSI_FILE_TEMPORARY) { 712 return emit->temp_map[index].arrayId; 713 } 714 else { 715 return 0; 716 } 717 } 718 719 720 /** 721 * If the given register is a temporary, convert the index from a TGSI 722 * TEMPORARY index to a VGPU10 temp index. 723 */ 724 static unsigned 725 remap_temp_index(const struct svga_shader_emitter_v10 *emit, 726 unsigned file, unsigned index) 727 { 728 if (file == TGSI_FILE_TEMPORARY) { 729 return emit->temp_map[index].index; 730 } 731 else { 732 return index; 733 } 734 } 735 736 737 /** 738 * Setup the operand0 fields related to indexing (1D, 2D, relative, etc). 739 * Note: the operandType field must already be initialized. 740 */ 741 static VGPU10OperandToken0 742 setup_operand0_indexing(struct svga_shader_emitter_v10 *emit, 743 VGPU10OperandToken0 operand0, 744 unsigned file, 745 boolean indirect, boolean index2D, 746 unsigned tempArrayID) 747 { 748 unsigned indexDim, index0Rep, index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 749 750 /* 751 * Compute index dimensions 752 */ 753 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 || 754 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { 755 /* there's no swizzle for in-line immediates */ 756 indexDim = VGPU10_OPERAND_INDEX_0D; 757 assert(operand0.selectionMode == 0); 758 } 759 else { 760 if (index2D || 761 tempArrayID > 0 || 762 operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) { 763 indexDim = VGPU10_OPERAND_INDEX_2D; 764 } 765 else { 766 indexDim = VGPU10_OPERAND_INDEX_1D; 767 } 768 } 769 770 /* 771 * Compute index representations (immediate, relative, etc). 772 */ 773 if (tempArrayID > 0) { 774 assert(file == TGSI_FILE_TEMPORARY); 775 /* First index is the array ID, second index is the array element */ 776 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 777 if (indirect) { 778 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; 779 } 780 else { 781 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 782 } 783 } 784 else if (indirect) { 785 if (file == TGSI_FILE_CONSTANT) { 786 /* index[0] indicates which constant buffer while index[1] indicates 787 * the position in the constant buffer. 788 */ 789 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 790 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; 791 } 792 else { 793 /* All other register files are 1-dimensional */ 794 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE; 795 } 796 } 797 else { 798 index0Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 799 index1Rep = VGPU10_OPERAND_INDEX_IMMEDIATE32; 800 } 801 802 operand0.indexDimension = indexDim; 803 operand0.index0Representation = index0Rep; 804 operand0.index1Representation = index1Rep; 805 806 return operand0; 807 } 808 809 810 /** 811 * Emit the operand for expressing an address register for indirect indexing. 812 * Note that the address register is really just a temp register. 813 * \param addr_reg_index which address register to use 814 */ 815 static void 816 emit_indirect_register(struct svga_shader_emitter_v10 *emit, 817 unsigned addr_reg_index) 818 { 819 unsigned tmp_reg_index; 820 VGPU10OperandToken0 operand0; 821 822 assert(addr_reg_index < MAX_VGPU10_ADDR_REGS); 823 824 tmp_reg_index = emit->address_reg_index[addr_reg_index]; 825 826 /* operand0 is a simple temporary register, selecting one component */ 827 operand0.value = 0; 828 operand0.operandType = VGPU10_OPERAND_TYPE_TEMP; 829 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 830 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 831 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 832 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 833 operand0.swizzleX = 0; 834 operand0.swizzleY = 1; 835 operand0.swizzleZ = 2; 836 operand0.swizzleW = 3; 837 838 emit_dword(emit, operand0.value); 839 emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index)); 840 } 841 842 843 /** 844 * Translate the dst register of a TGSI instruction and emit VGPU10 tokens. 845 * \param emit the emitter context 846 * \param reg the TGSI dst register to translate 847 */ 848 static void 849 emit_dst_register(struct svga_shader_emitter_v10 *emit, 850 const struct tgsi_full_dst_register *reg) 851 { 852 unsigned file = reg->Register.File; 853 unsigned index = reg->Register.Index; 854 const unsigned sem_name = emit->info.output_semantic_name[index]; 855 const unsigned sem_index = emit->info.output_semantic_index[index]; 856 unsigned writemask = reg->Register.WriteMask; 857 const unsigned indirect = reg->Register.Indirect; 858 const unsigned tempArrayId = get_temp_array_id(emit, file, index); 859 const unsigned index2d = reg->Register.Dimension; 860 VGPU10OperandToken0 operand0; 861 862 if (file == TGSI_FILE_OUTPUT) { 863 if (emit->unit == PIPE_SHADER_VERTEX || 864 emit->unit == PIPE_SHADER_GEOMETRY) { 865 if (index == emit->vposition.out_index && 866 emit->vposition.tmp_index != INVALID_INDEX) { 867 /* replace OUTPUT[POS] with TEMP[POS]. We need to store the 868 * vertex position result in a temporary so that we can modify 869 * it in the post_helper() code. 870 */ 871 file = TGSI_FILE_TEMPORARY; 872 index = emit->vposition.tmp_index; 873 } 874 else if (sem_name == TGSI_SEMANTIC_CLIPDIST && 875 emit->clip_dist_tmp_index != INVALID_INDEX) { 876 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST]. 877 * We store the clip distance in a temporary first, then 878 * we'll copy it to the shadow copy and to CLIPDIST with the 879 * enabled planes mask in emit_clip_distance_instructions(). 880 */ 881 file = TGSI_FILE_TEMPORARY; 882 index = emit->clip_dist_tmp_index + sem_index; 883 } 884 else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX && 885 emit->clip_vertex_tmp_index != INVALID_INDEX) { 886 /* replace the CLIPVERTEX output register with a temporary */ 887 assert(emit->clip_mode == CLIP_VERTEX); 888 assert(sem_index == 0); 889 file = TGSI_FILE_TEMPORARY; 890 index = emit->clip_vertex_tmp_index; 891 } 892 } 893 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 894 if (sem_name == TGSI_SEMANTIC_POSITION) { 895 /* Fragment depth output register */ 896 operand0.value = 0; 897 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; 898 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 899 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 900 emit_dword(emit, operand0.value); 901 return; 902 } 903 else if (index == emit->fs.color_out_index[0] && 904 emit->fs.color_tmp_index != INVALID_INDEX) { 905 /* replace OUTPUT[COLOR] with TEMP[COLOR]. We need to store the 906 * fragment color result in a temporary so that we can read it 907 * it in the post_helper() code. 908 */ 909 file = TGSI_FILE_TEMPORARY; 910 index = emit->fs.color_tmp_index; 911 } 912 else { 913 /* Typically, for fragment shaders, the output register index 914 * matches the color semantic index. But not when we write to 915 * the fragment depth register. In that case, OUT[0] will be 916 * fragdepth and OUT[1] will be the 0th color output. We need 917 * to use the semantic index for color outputs. 918 */ 919 assert(sem_name == TGSI_SEMANTIC_COLOR); 920 index = emit->info.output_semantic_index[index]; 921 922 emit->num_output_writes++; 923 } 924 } 925 } 926 927 /* init operand tokens to all zero */ 928 operand0.value = 0; 929 930 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 931 932 /* the operand has a writemask */ 933 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 934 935 /* Which of the four dest components to write to. Note that we can use a 936 * simple assignment here since TGSI writemasks match VGPU10 writemasks. 937 */ 938 STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X); 939 operand0.mask = writemask; 940 941 /* translate TGSI register file type to VGPU10 operand type */ 942 operand0.operandType = translate_register_file(file, tempArrayId > 0); 943 944 check_register_index(emit, operand0.operandType, index); 945 946 operand0 = setup_operand0_indexing(emit, operand0, file, indirect, 947 index2d, tempArrayId); 948 949 /* Emit tokens */ 950 emit_dword(emit, operand0.value); 951 if (tempArrayId > 0) { 952 emit_dword(emit, tempArrayId); 953 } 954 955 emit_dword(emit, remap_temp_index(emit, file, index)); 956 957 if (indirect) { 958 emit_indirect_register(emit, reg->Indirect.Index); 959 } 960 } 961 962 963 /** 964 * Translate a src register of a TGSI instruction and emit VGPU10 tokens. 965 */ 966 static void 967 emit_src_register(struct svga_shader_emitter_v10 *emit, 968 const struct tgsi_full_src_register *reg) 969 { 970 unsigned file = reg->Register.File; 971 unsigned index = reg->Register.Index; 972 const unsigned indirect = reg->Register.Indirect; 973 const unsigned tempArrayId = get_temp_array_id(emit, file, index); 974 const unsigned index2d = reg->Register.Dimension; 975 const unsigned swizzleX = reg->Register.SwizzleX; 976 const unsigned swizzleY = reg->Register.SwizzleY; 977 const unsigned swizzleZ = reg->Register.SwizzleZ; 978 const unsigned swizzleW = reg->Register.SwizzleW; 979 const unsigned absolute = reg->Register.Absolute; 980 const unsigned negate = reg->Register.Negate; 981 bool is_prim_id = FALSE; 982 983 VGPU10OperandToken0 operand0; 984 VGPU10OperandToken1 operand1; 985 986 if (emit->unit == PIPE_SHADER_FRAGMENT && 987 file == TGSI_FILE_INPUT) { 988 if (index == emit->fs.face_input_index) { 989 /* Replace INPUT[FACE] with TEMP[FACE] */ 990 file = TGSI_FILE_TEMPORARY; 991 index = emit->fs.face_tmp_index; 992 } 993 else if (index == emit->fs.fragcoord_input_index) { 994 /* Replace INPUT[POSITION] with TEMP[POSITION] */ 995 file = TGSI_FILE_TEMPORARY; 996 index = emit->fs.fragcoord_tmp_index; 997 } 998 else { 999 /* We remap fragment shader inputs to that FS input indexes 1000 * match up with VS/GS output indexes. 1001 */ 1002 index = emit->linkage.input_map[index]; 1003 } 1004 } 1005 else if (emit->unit == PIPE_SHADER_GEOMETRY && 1006 file == TGSI_FILE_INPUT) { 1007 is_prim_id = (index == emit->gs.prim_id_index); 1008 index = emit->linkage.input_map[index]; 1009 } 1010 else if (emit->unit == PIPE_SHADER_VERTEX) { 1011 if (file == TGSI_FILE_INPUT) { 1012 /* if input is adjusted... */ 1013 if ((emit->key.vs.adjust_attrib_w_1 | 1014 emit->key.vs.adjust_attrib_itof | 1015 emit->key.vs.adjust_attrib_utof | 1016 emit->key.vs.attrib_is_bgra | 1017 emit->key.vs.attrib_puint_to_snorm | 1018 emit->key.vs.attrib_puint_to_uscaled | 1019 emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) { 1020 file = TGSI_FILE_TEMPORARY; 1021 index = emit->vs.adjusted_input[index]; 1022 } 1023 } 1024 else if (file == TGSI_FILE_SYSTEM_VALUE) { 1025 assert(index < ARRAY_SIZE(emit->system_value_indexes)); 1026 index = emit->system_value_indexes[index]; 1027 } 1028 } 1029 1030 operand0.value = operand1.value = 0; 1031 1032 if (is_prim_id) { 1033 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 1034 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 1035 } 1036 else { 1037 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1038 operand0.operandType = translate_register_file(file, tempArrayId > 0); 1039 } 1040 1041 operand0 = setup_operand0_indexing(emit, operand0, file, indirect, 1042 index2d, tempArrayId); 1043 1044 if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 && 1045 operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { 1046 /* there's no swizzle for in-line immediates */ 1047 if (swizzleX == swizzleY && 1048 swizzleX == swizzleZ && 1049 swizzleX == swizzleW) { 1050 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 1051 } 1052 else { 1053 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 1054 } 1055 1056 operand0.swizzleX = swizzleX; 1057 operand0.swizzleY = swizzleY; 1058 operand0.swizzleZ = swizzleZ; 1059 operand0.swizzleW = swizzleW; 1060 1061 if (absolute || negate) { 1062 operand0.extended = 1; 1063 operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER; 1064 if (absolute && !negate) 1065 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS; 1066 if (!absolute && negate) 1067 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG; 1068 if (absolute && negate) 1069 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG; 1070 } 1071 } 1072 1073 /* Emit the operand tokens */ 1074 emit_dword(emit, operand0.value); 1075 if (operand0.extended) 1076 emit_dword(emit, operand1.value); 1077 1078 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) { 1079 /* Emit the four float/int in-line immediate values */ 1080 unsigned *c; 1081 assert(index < ARRAY_SIZE(emit->immediates)); 1082 assert(file == TGSI_FILE_IMMEDIATE); 1083 assert(swizzleX < 4); 1084 assert(swizzleY < 4); 1085 assert(swizzleZ < 4); 1086 assert(swizzleW < 4); 1087 c = (unsigned *) emit->immediates[index]; 1088 emit_dword(emit, c[swizzleX]); 1089 emit_dword(emit, c[swizzleY]); 1090 emit_dword(emit, c[swizzleZ]); 1091 emit_dword(emit, c[swizzleW]); 1092 } 1093 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) { 1094 /* Emit the register index(es) */ 1095 if (index2d || 1096 operand0.operandType == VGPU10_OPERAND_TYPE_CONSTANT_BUFFER) { 1097 emit_dword(emit, reg->Dimension.Index); 1098 } 1099 1100 if (tempArrayId > 0) { 1101 emit_dword(emit, tempArrayId); 1102 } 1103 1104 emit_dword(emit, remap_temp_index(emit, file, index)); 1105 1106 if (indirect) { 1107 emit_indirect_register(emit, reg->Indirect.Index); 1108 } 1109 } 1110 } 1111 1112 1113 /** 1114 * Emit a resource operand (for use with a SAMPLE instruction). 1115 */ 1116 static void 1117 emit_resource_register(struct svga_shader_emitter_v10 *emit, 1118 unsigned resource_number) 1119 { 1120 VGPU10OperandToken0 operand0; 1121 1122 check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number); 1123 1124 /* init */ 1125 operand0.value = 0; 1126 1127 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; 1128 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1129 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1130 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 1131 operand0.swizzleX = VGPU10_COMPONENT_X; 1132 operand0.swizzleY = VGPU10_COMPONENT_Y; 1133 operand0.swizzleZ = VGPU10_COMPONENT_Z; 1134 operand0.swizzleW = VGPU10_COMPONENT_W; 1135 1136 emit_dword(emit, operand0.value); 1137 emit_dword(emit, resource_number); 1138 } 1139 1140 1141 /** 1142 * Emit a sampler operand (for use with a SAMPLE instruction). 1143 */ 1144 static void 1145 emit_sampler_register(struct svga_shader_emitter_v10 *emit, 1146 unsigned sampler_number) 1147 { 1148 VGPU10OperandToken0 operand0; 1149 1150 check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number); 1151 1152 /* init */ 1153 operand0.value = 0; 1154 1155 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; 1156 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1157 1158 emit_dword(emit, operand0.value); 1159 emit_dword(emit, sampler_number); 1160 } 1161 1162 1163 /** 1164 * Emit an operand which reads the IS_FRONT_FACING register. 1165 */ 1166 static void 1167 emit_face_register(struct svga_shader_emitter_v10 *emit) 1168 { 1169 VGPU10OperandToken0 operand0; 1170 unsigned index = emit->linkage.input_map[emit->fs.face_input_index]; 1171 1172 /* init */ 1173 operand0.value = 0; 1174 1175 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT; 1176 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1177 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 1178 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1179 1180 operand0.swizzleX = VGPU10_COMPONENT_X; 1181 operand0.swizzleY = VGPU10_COMPONENT_X; 1182 operand0.swizzleZ = VGPU10_COMPONENT_X; 1183 operand0.swizzleW = VGPU10_COMPONENT_X; 1184 1185 emit_dword(emit, operand0.value); 1186 emit_dword(emit, index); 1187 } 1188 1189 1190 /** 1191 * Emit the token for a VGPU10 opcode. 1192 * \param saturate clamp result to [0,1]? 1193 */ 1194 static void 1195 emit_opcode(struct svga_shader_emitter_v10 *emit, 1196 unsigned vgpu10_opcode, boolean saturate) 1197 { 1198 VGPU10OpcodeToken0 token0; 1199 1200 token0.value = 0; /* init all fields to zero */ 1201 token0.opcodeType = vgpu10_opcode; 1202 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1203 token0.saturate = saturate; 1204 1205 emit_dword(emit, token0.value); 1206 } 1207 1208 1209 /** 1210 * Emit the token for a VGPU10 resinfo instruction. 1211 * \param modifier return type modifier, _uint or _rcpFloat. 1212 * TODO: We may want to remove this parameter if it will 1213 * only ever be used as _uint. 1214 */ 1215 static void 1216 emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit, 1217 VGPU10_RESINFO_RETURN_TYPE modifier) 1218 { 1219 VGPU10OpcodeToken0 token0; 1220 1221 token0.value = 0; /* init all fields to zero */ 1222 token0.opcodeType = VGPU10_OPCODE_RESINFO; 1223 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1224 token0.resinfoReturnType = modifier; 1225 1226 emit_dword(emit, token0.value); 1227 } 1228 1229 1230 /** 1231 * Emit opcode tokens for a texture sample instruction. Texture instructions 1232 * can be rather complicated (texel offsets, etc) so we have this specialized 1233 * function. 1234 */ 1235 static void 1236 emit_sample_opcode(struct svga_shader_emitter_v10 *emit, 1237 unsigned vgpu10_opcode, boolean saturate, 1238 const int offsets[3]) 1239 { 1240 VGPU10OpcodeToken0 token0; 1241 VGPU10OpcodeToken1 token1; 1242 1243 token0.value = 0; /* init all fields to zero */ 1244 token0.opcodeType = vgpu10_opcode; 1245 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1246 token0.saturate = saturate; 1247 1248 if (offsets[0] || offsets[1] || offsets[2]) { 1249 assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 1250 assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 1251 assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 1252 assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 1253 assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 1254 assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 1255 1256 token0.extended = 1; 1257 token1.value = 0; 1258 token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS; 1259 token1.offsetU = offsets[0]; 1260 token1.offsetV = offsets[1]; 1261 token1.offsetW = offsets[2]; 1262 } 1263 1264 emit_dword(emit, token0.value); 1265 if (token0.extended) { 1266 emit_dword(emit, token1.value); 1267 } 1268 } 1269 1270 1271 /** 1272 * Emit a DISCARD opcode token. 1273 * If nonzero is set, we'll discard the fragment if the X component is not 0. 1274 * Otherwise, we'll discard the fragment if the X component is 0. 1275 */ 1276 static void 1277 emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero) 1278 { 1279 VGPU10OpcodeToken0 opcode0; 1280 1281 opcode0.value = 0; 1282 opcode0.opcodeType = VGPU10_OPCODE_DISCARD; 1283 if (nonzero) 1284 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; 1285 1286 emit_dword(emit, opcode0.value); 1287 } 1288 1289 1290 /** 1291 * We need to call this before we begin emitting a VGPU10 instruction. 1292 */ 1293 static void 1294 begin_emit_instruction(struct svga_shader_emitter_v10 *emit) 1295 { 1296 assert(emit->inst_start_token == 0); 1297 /* Save location of the instruction's VGPU10OpcodeToken0 token. 1298 * Note, we can't save a pointer because it would become invalid if 1299 * we have to realloc the output buffer. 1300 */ 1301 emit->inst_start_token = emit_get_num_tokens(emit); 1302 } 1303 1304 1305 /** 1306 * We need to call this after we emit the last token of a VGPU10 instruction. 1307 * This function patches in the opcode token's instructionLength field. 1308 */ 1309 static void 1310 end_emit_instruction(struct svga_shader_emitter_v10 *emit) 1311 { 1312 VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf; 1313 unsigned inst_length; 1314 1315 assert(emit->inst_start_token > 0); 1316 1317 if (emit->discard_instruction) { 1318 /* Back up the emit->ptr to where this instruction started so 1319 * that we discard the current instruction. 1320 */ 1321 emit->ptr = (char *) (tokens + emit->inst_start_token); 1322 } 1323 else { 1324 /* Compute instruction length and patch that into the start of 1325 * the instruction. 1326 */ 1327 inst_length = emit_get_num_tokens(emit) - emit->inst_start_token; 1328 1329 assert(inst_length > 0); 1330 1331 tokens[emit->inst_start_token].instructionLength = inst_length; 1332 } 1333 1334 emit->inst_start_token = 0; /* reset to zero for error checking */ 1335 emit->discard_instruction = FALSE; 1336 } 1337 1338 1339 /** 1340 * Return index for a free temporary register. 1341 */ 1342 static unsigned 1343 get_temp_index(struct svga_shader_emitter_v10 *emit) 1344 { 1345 assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS); 1346 return emit->num_shader_temps + emit->internal_temp_count++; 1347 } 1348 1349 1350 /** 1351 * Release the temporaries which were generated by get_temp_index(). 1352 */ 1353 static void 1354 free_temp_indexes(struct svga_shader_emitter_v10 *emit) 1355 { 1356 emit->internal_temp_count = 0; 1357 } 1358 1359 1360 /** 1361 * Create a tgsi_full_src_register. 1362 */ 1363 static struct tgsi_full_src_register 1364 make_src_reg(unsigned file, unsigned index) 1365 { 1366 struct tgsi_full_src_register reg; 1367 1368 memset(®, 0, sizeof(reg)); 1369 reg.Register.File = file; 1370 reg.Register.Index = index; 1371 reg.Register.SwizzleX = TGSI_SWIZZLE_X; 1372 reg.Register.SwizzleY = TGSI_SWIZZLE_Y; 1373 reg.Register.SwizzleZ = TGSI_SWIZZLE_Z; 1374 reg.Register.SwizzleW = TGSI_SWIZZLE_W; 1375 return reg; 1376 } 1377 1378 1379 /** 1380 * Create a tgsi_full_src_register for a temporary. 1381 */ 1382 static struct tgsi_full_src_register 1383 make_src_temp_reg(unsigned index) 1384 { 1385 return make_src_reg(TGSI_FILE_TEMPORARY, index); 1386 } 1387 1388 1389 /** 1390 * Create a tgsi_full_src_register for a constant. 1391 */ 1392 static struct tgsi_full_src_register 1393 make_src_const_reg(unsigned index) 1394 { 1395 return make_src_reg(TGSI_FILE_CONSTANT, index); 1396 } 1397 1398 1399 /** 1400 * Create a tgsi_full_src_register for an immediate constant. 1401 */ 1402 static struct tgsi_full_src_register 1403 make_src_immediate_reg(unsigned index) 1404 { 1405 return make_src_reg(TGSI_FILE_IMMEDIATE, index); 1406 } 1407 1408 1409 /** 1410 * Create a tgsi_full_dst_register. 1411 */ 1412 static struct tgsi_full_dst_register 1413 make_dst_reg(unsigned file, unsigned index) 1414 { 1415 struct tgsi_full_dst_register reg; 1416 1417 memset(®, 0, sizeof(reg)); 1418 reg.Register.File = file; 1419 reg.Register.Index = index; 1420 reg.Register.WriteMask = TGSI_WRITEMASK_XYZW; 1421 return reg; 1422 } 1423 1424 1425 /** 1426 * Create a tgsi_full_dst_register for a temporary. 1427 */ 1428 static struct tgsi_full_dst_register 1429 make_dst_temp_reg(unsigned index) 1430 { 1431 return make_dst_reg(TGSI_FILE_TEMPORARY, index); 1432 } 1433 1434 1435 /** 1436 * Create a tgsi_full_dst_register for an output. 1437 */ 1438 static struct tgsi_full_dst_register 1439 make_dst_output_reg(unsigned index) 1440 { 1441 return make_dst_reg(TGSI_FILE_OUTPUT, index); 1442 } 1443 1444 1445 /** 1446 * Create negated tgsi_full_src_register. 1447 */ 1448 static struct tgsi_full_src_register 1449 negate_src(const struct tgsi_full_src_register *reg) 1450 { 1451 struct tgsi_full_src_register neg = *reg; 1452 neg.Register.Negate = !reg->Register.Negate; 1453 return neg; 1454 } 1455 1456 /** 1457 * Create absolute value of a tgsi_full_src_register. 1458 */ 1459 static struct tgsi_full_src_register 1460 absolute_src(const struct tgsi_full_src_register *reg) 1461 { 1462 struct tgsi_full_src_register absolute = *reg; 1463 absolute.Register.Absolute = 1; 1464 return absolute; 1465 } 1466 1467 1468 /** Return the named swizzle term from the src register */ 1469 static inline unsigned 1470 get_swizzle(const struct tgsi_full_src_register *reg, unsigned term) 1471 { 1472 switch (term) { 1473 case TGSI_SWIZZLE_X: 1474 return reg->Register.SwizzleX; 1475 case TGSI_SWIZZLE_Y: 1476 return reg->Register.SwizzleY; 1477 case TGSI_SWIZZLE_Z: 1478 return reg->Register.SwizzleZ; 1479 case TGSI_SWIZZLE_W: 1480 return reg->Register.SwizzleW; 1481 default: 1482 assert(!"Bad swizzle"); 1483 return TGSI_SWIZZLE_X; 1484 } 1485 } 1486 1487 1488 /** 1489 * Create swizzled tgsi_full_src_register. 1490 */ 1491 static struct tgsi_full_src_register 1492 swizzle_src(const struct tgsi_full_src_register *reg, 1493 unsigned swizzleX, unsigned swizzleY, 1494 unsigned swizzleZ, unsigned swizzleW) 1495 { 1496 struct tgsi_full_src_register swizzled = *reg; 1497 /* Note: we swizzle the current swizzle */ 1498 swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX); 1499 swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY); 1500 swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ); 1501 swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW); 1502 return swizzled; 1503 } 1504 1505 1506 /** 1507 * Create swizzled tgsi_full_src_register where all the swizzle 1508 * terms are the same. 1509 */ 1510 static struct tgsi_full_src_register 1511 scalar_src(const struct tgsi_full_src_register *reg, unsigned swizzle) 1512 { 1513 struct tgsi_full_src_register swizzled = *reg; 1514 /* Note: we swizzle the current swizzle */ 1515 swizzled.Register.SwizzleX = 1516 swizzled.Register.SwizzleY = 1517 swizzled.Register.SwizzleZ = 1518 swizzled.Register.SwizzleW = get_swizzle(reg, swizzle); 1519 return swizzled; 1520 } 1521 1522 1523 /** 1524 * Create new tgsi_full_dst_register with writemask. 1525 * \param mask bitmask of TGSI_WRITEMASK_[XYZW] 1526 */ 1527 static struct tgsi_full_dst_register 1528 writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask) 1529 { 1530 struct tgsi_full_dst_register masked = *reg; 1531 masked.Register.WriteMask = mask; 1532 return masked; 1533 } 1534 1535 1536 /** 1537 * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW. 1538 */ 1539 static boolean 1540 same_swizzle_terms(const struct tgsi_full_src_register *reg) 1541 { 1542 return (reg->Register.SwizzleX == reg->Register.SwizzleY && 1543 reg->Register.SwizzleY == reg->Register.SwizzleZ && 1544 reg->Register.SwizzleZ == reg->Register.SwizzleW); 1545 } 1546 1547 1548 /** 1549 * Search the vector for the value 'x' and return its position. 1550 */ 1551 static int 1552 find_imm_in_vec4(const union tgsi_immediate_data vec[4], 1553 union tgsi_immediate_data x) 1554 { 1555 unsigned i; 1556 for (i = 0; i < 4; i++) { 1557 if (vec[i].Int == x.Int) 1558 return i; 1559 } 1560 return -1; 1561 } 1562 1563 1564 /** 1565 * Helper used by make_immediate_reg(), make_immediate_reg_4(). 1566 */ 1567 static int 1568 find_immediate(struct svga_shader_emitter_v10 *emit, 1569 union tgsi_immediate_data x, unsigned startIndex) 1570 { 1571 const unsigned endIndex = emit->num_immediates; 1572 unsigned i; 1573 1574 assert(emit->immediates_emitted); 1575 1576 /* Search immediates for x, y, z, w */ 1577 for (i = startIndex; i < endIndex; i++) { 1578 if (x.Int == emit->immediates[i][0].Int || 1579 x.Int == emit->immediates[i][1].Int || 1580 x.Int == emit->immediates[i][2].Int || 1581 x.Int == emit->immediates[i][3].Int) { 1582 return i; 1583 } 1584 } 1585 /* Should never try to use an immediate value that wasn't pre-declared */ 1586 assert(!"find_immediate() failed!"); 1587 return -1; 1588 } 1589 1590 1591 /** 1592 * Return a tgsi_full_src_register for an immediate/literal 1593 * union tgsi_immediate_data[4] value. 1594 * Note: the values must have been previously declared/allocated in 1595 * emit_pre_helpers(). And, all of x,y,z,w must be located in the same 1596 * vec4 immediate. 1597 */ 1598 static struct tgsi_full_src_register 1599 make_immediate_reg_4(struct svga_shader_emitter_v10 *emit, 1600 const union tgsi_immediate_data imm[4]) 1601 { 1602 struct tgsi_full_src_register reg; 1603 unsigned i; 1604 1605 for (i = 0; i < emit->num_common_immediates; i++) { 1606 /* search for first component value */ 1607 int immpos = find_immediate(emit, imm[0], i); 1608 int x, y, z, w; 1609 1610 assert(immpos >= 0); 1611 1612 /* find remaining components within the immediate vector */ 1613 x = find_imm_in_vec4(emit->immediates[immpos], imm[0]); 1614 y = find_imm_in_vec4(emit->immediates[immpos], imm[1]); 1615 z = find_imm_in_vec4(emit->immediates[immpos], imm[2]); 1616 w = find_imm_in_vec4(emit->immediates[immpos], imm[3]); 1617 1618 if (x >=0 && y >= 0 && z >= 0 && w >= 0) { 1619 /* found them all */ 1620 memset(®, 0, sizeof(reg)); 1621 reg.Register.File = TGSI_FILE_IMMEDIATE; 1622 reg.Register.Index = immpos; 1623 reg.Register.SwizzleX = x; 1624 reg.Register.SwizzleY = y; 1625 reg.Register.SwizzleZ = z; 1626 reg.Register.SwizzleW = w; 1627 return reg; 1628 } 1629 /* else, keep searching */ 1630 } 1631 1632 assert(!"Failed to find immediate register!"); 1633 1634 /* Just return IMM[0].xxxx */ 1635 memset(®, 0, sizeof(reg)); 1636 reg.Register.File = TGSI_FILE_IMMEDIATE; 1637 return reg; 1638 } 1639 1640 1641 /** 1642 * Return a tgsi_full_src_register for an immediate/literal 1643 * union tgsi_immediate_data value of the form {value, value, value, value}. 1644 * \sa make_immediate_reg_4() regarding allowed values. 1645 */ 1646 static struct tgsi_full_src_register 1647 make_immediate_reg(struct svga_shader_emitter_v10 *emit, 1648 union tgsi_immediate_data value) 1649 { 1650 struct tgsi_full_src_register reg; 1651 int immpos = find_immediate(emit, value, 0); 1652 1653 assert(immpos >= 0); 1654 1655 memset(®, 0, sizeof(reg)); 1656 reg.Register.File = TGSI_FILE_IMMEDIATE; 1657 reg.Register.Index = immpos; 1658 reg.Register.SwizzleX = 1659 reg.Register.SwizzleY = 1660 reg.Register.SwizzleZ = 1661 reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value); 1662 1663 return reg; 1664 } 1665 1666 1667 /** 1668 * Return a tgsi_full_src_register for an immediate/literal float[4] value. 1669 * \sa make_immediate_reg_4() regarding allowed values. 1670 */ 1671 static struct tgsi_full_src_register 1672 make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit, 1673 float x, float y, float z, float w) 1674 { 1675 union tgsi_immediate_data imm[4]; 1676 imm[0].Float = x; 1677 imm[1].Float = y; 1678 imm[2].Float = z; 1679 imm[3].Float = w; 1680 return make_immediate_reg_4(emit, imm); 1681 } 1682 1683 1684 /** 1685 * Return a tgsi_full_src_register for an immediate/literal float value 1686 * of the form {value, value, value, value}. 1687 * \sa make_immediate_reg_4() regarding allowed values. 1688 */ 1689 static struct tgsi_full_src_register 1690 make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value) 1691 { 1692 union tgsi_immediate_data imm; 1693 imm.Float = value; 1694 return make_immediate_reg(emit, imm); 1695 } 1696 1697 1698 /** 1699 * Return a tgsi_full_src_register for an immediate/literal int[4] vector. 1700 */ 1701 static struct tgsi_full_src_register 1702 make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit, 1703 int x, int y, int z, int w) 1704 { 1705 union tgsi_immediate_data imm[4]; 1706 imm[0].Int = x; 1707 imm[1].Int = y; 1708 imm[2].Int = z; 1709 imm[3].Int = w; 1710 return make_immediate_reg_4(emit, imm); 1711 } 1712 1713 1714 /** 1715 * Return a tgsi_full_src_register for an immediate/literal int value 1716 * of the form {value, value, value, value}. 1717 * \sa make_immediate_reg_4() regarding allowed values. 1718 */ 1719 static struct tgsi_full_src_register 1720 make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value) 1721 { 1722 union tgsi_immediate_data imm; 1723 imm.Int = value; 1724 return make_immediate_reg(emit, imm); 1725 } 1726 1727 1728 /** 1729 * Allocate space for a union tgsi_immediate_data[4] immediate. 1730 * \return the index/position of the immediate. 1731 */ 1732 static unsigned 1733 alloc_immediate_4(struct svga_shader_emitter_v10 *emit, 1734 const union tgsi_immediate_data imm[4]) 1735 { 1736 unsigned n = emit->num_immediates++; 1737 assert(!emit->immediates_emitted); 1738 assert(n < ARRAY_SIZE(emit->immediates)); 1739 emit->immediates[n][0] = imm[0]; 1740 emit->immediates[n][1] = imm[1]; 1741 emit->immediates[n][2] = imm[2]; 1742 emit->immediates[n][3] = imm[3]; 1743 return n; 1744 } 1745 1746 1747 /** 1748 * Allocate space for a float[4] immediate. 1749 * \return the index/position of the immediate. 1750 */ 1751 static unsigned 1752 alloc_immediate_float4(struct svga_shader_emitter_v10 *emit, 1753 float x, float y, float z, float w) 1754 { 1755 union tgsi_immediate_data imm[4]; 1756 imm[0].Float = x; 1757 imm[1].Float = y; 1758 imm[2].Float = z; 1759 imm[3].Float = w; 1760 return alloc_immediate_4(emit, imm); 1761 } 1762 1763 1764 /** 1765 * Allocate space for an int[4] immediate. 1766 * \return the index/position of the immediate. 1767 */ 1768 static unsigned 1769 alloc_immediate_int4(struct svga_shader_emitter_v10 *emit, 1770 int x, int y, int z, int w) 1771 { 1772 union tgsi_immediate_data imm[4]; 1773 imm[0].Int = x; 1774 imm[1].Int = y; 1775 imm[2].Int = z; 1776 imm[3].Int = w; 1777 return alloc_immediate_4(emit, imm); 1778 } 1779 1780 1781 /** 1782 * Allocate a shader input to store a system value. 1783 */ 1784 static unsigned 1785 alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index) 1786 { 1787 const unsigned n = emit->info.file_max[TGSI_FILE_INPUT] + 1 + index; 1788 assert(index < ARRAY_SIZE(emit->system_value_indexes)); 1789 emit->system_value_indexes[index] = n; 1790 return n; 1791 } 1792 1793 1794 /** 1795 * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10. 1796 */ 1797 static boolean 1798 emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit, 1799 const struct tgsi_full_immediate *imm) 1800 { 1801 /* We don't actually emit any code here. We just save the 1802 * immediate values and emit them later. 1803 */ 1804 alloc_immediate_4(emit, imm->u); 1805 return TRUE; 1806 } 1807 1808 1809 /** 1810 * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block 1811 * containing all the immediate values previously allocated 1812 * with alloc_immediate_4(). 1813 */ 1814 static boolean 1815 emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit) 1816 { 1817 VGPU10OpcodeToken0 token; 1818 1819 assert(!emit->immediates_emitted); 1820 1821 token.value = 0; 1822 token.opcodeType = VGPU10_OPCODE_CUSTOMDATA; 1823 token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER; 1824 1825 /* Note: no begin/end_emit_instruction() calls */ 1826 emit_dword(emit, token.value); 1827 emit_dword(emit, 2 + 4 * emit->num_immediates); 1828 emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates); 1829 1830 emit->immediates_emitted = TRUE; 1831 1832 return TRUE; 1833 } 1834 1835 1836 /** 1837 * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10 1838 * interpolation mode. 1839 * \return a VGPU10_INTERPOLATION_x value 1840 */ 1841 static unsigned 1842 translate_interpolation(const struct svga_shader_emitter_v10 *emit, 1843 unsigned interp, unsigned interpolate_loc) 1844 { 1845 if (interp == TGSI_INTERPOLATE_COLOR) { 1846 interp = emit->key.fs.flatshade ? 1847 TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE; 1848 } 1849 1850 switch (interp) { 1851 case TGSI_INTERPOLATE_CONSTANT: 1852 return VGPU10_INTERPOLATION_CONSTANT; 1853 case TGSI_INTERPOLATE_LINEAR: 1854 return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ? 1855 VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID : 1856 VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE; 1857 case TGSI_INTERPOLATE_PERSPECTIVE: 1858 return interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID ? 1859 VGPU10_INTERPOLATION_LINEAR_CENTROID : 1860 VGPU10_INTERPOLATION_LINEAR; 1861 default: 1862 assert(!"Unexpected interpolation mode"); 1863 return VGPU10_INTERPOLATION_CONSTANT; 1864 } 1865 } 1866 1867 1868 /** 1869 * Translate a TGSI property to VGPU10. 1870 * Don't emit any instructions yet, only need to gather the primitive property information. 1871 * The output primitive topology might be changed later. The final property instructions 1872 * will be emitted as part of the pre-helper code. 1873 */ 1874 static boolean 1875 emit_vgpu10_property(struct svga_shader_emitter_v10 *emit, 1876 const struct tgsi_full_property *prop) 1877 { 1878 static const VGPU10_PRIMITIVE primType[] = { 1879 VGPU10_PRIMITIVE_POINT, /* PIPE_PRIM_POINTS */ 1880 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINES */ 1881 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_LOOP */ 1882 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_STRIP */ 1883 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLES */ 1884 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_STRIP */ 1885 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_FAN */ 1886 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUADS */ 1887 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ 1888 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_POLYGON */ 1889 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ 1890 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ 1891 VGPU10_PRIMITIVE_TRIANGLE_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ 1892 VGPU10_PRIMITIVE_TRIANGLE_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ 1893 }; 1894 1895 static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = { 1896 VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST, /* PIPE_PRIM_POINTS */ 1897 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINES */ 1898 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINE_LOOP */ 1899 VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP, /* PIPE_PRIM_LINE_STRIP */ 1900 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, /* PIPE_PRIM_TRIANGLES */ 1901 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */ 1902 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */ 1903 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUADS */ 1904 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ 1905 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_POLYGON */ 1906 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ 1907 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ 1908 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ 1909 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ 1910 }; 1911 1912 static const unsigned inputArraySize[] = { 1913 0, /* VGPU10_PRIMITIVE_UNDEFINED */ 1914 1, /* VGPU10_PRIMITIVE_POINT */ 1915 2, /* VGPU10_PRIMITIVE_LINE */ 1916 3, /* VGPU10_PRIMITIVE_TRIANGLE */ 1917 0, 1918 0, 1919 4, /* VGPU10_PRIMITIVE_LINE_ADJ */ 1920 6 /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */ 1921 }; 1922 1923 switch (prop->Property.PropertyName) { 1924 case TGSI_PROPERTY_GS_INPUT_PRIM: 1925 assert(prop->u[0].Data < ARRAY_SIZE(primType)); 1926 emit->gs.prim_type = primType[prop->u[0].Data]; 1927 assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED); 1928 emit->gs.input_size = inputArraySize[emit->gs.prim_type]; 1929 break; 1930 1931 case TGSI_PROPERTY_GS_OUTPUT_PRIM: 1932 assert(prop->u[0].Data < ARRAY_SIZE(primTopology)); 1933 emit->gs.prim_topology = primTopology[prop->u[0].Data]; 1934 assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED); 1935 break; 1936 1937 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: 1938 emit->gs.max_out_vertices = prop->u[0].Data; 1939 break; 1940 1941 default: 1942 break; 1943 } 1944 1945 return TRUE; 1946 } 1947 1948 1949 static void 1950 emit_property_instruction(struct svga_shader_emitter_v10 *emit, 1951 VGPU10OpcodeToken0 opcode0, unsigned nData, 1952 unsigned data) 1953 { 1954 begin_emit_instruction(emit); 1955 emit_dword(emit, opcode0.value); 1956 if (nData) 1957 emit_dword(emit, data); 1958 end_emit_instruction(emit); 1959 } 1960 1961 1962 /** 1963 * Emit property instructions 1964 */ 1965 static void 1966 emit_property_instructions(struct svga_shader_emitter_v10 *emit) 1967 { 1968 VGPU10OpcodeToken0 opcode0; 1969 1970 assert(emit->unit == PIPE_SHADER_GEOMETRY); 1971 1972 /* emit input primitive type declaration */ 1973 opcode0.value = 0; 1974 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE; 1975 opcode0.primitive = emit->gs.prim_type; 1976 emit_property_instruction(emit, opcode0, 0, 0); 1977 1978 /* emit output primitive topology declaration */ 1979 opcode0.value = 0; 1980 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY; 1981 opcode0.primitiveTopology = emit->gs.prim_topology; 1982 emit_property_instruction(emit, opcode0, 0, 0); 1983 1984 /* emit max output vertices */ 1985 opcode0.value = 0; 1986 opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT; 1987 emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices); 1988 } 1989 1990 1991 /** 1992 * Emit a vgpu10 declaration "instruction". 1993 * \param index the register index 1994 * \param size array size of the operand. In most cases, it is 1, 1995 * but for inputs to geometry shader, the array size varies 1996 * depending on the primitive type. 1997 */ 1998 static void 1999 emit_decl_instruction(struct svga_shader_emitter_v10 *emit, 2000 VGPU10OpcodeToken0 opcode0, 2001 VGPU10OperandToken0 operand0, 2002 VGPU10NameToken name_token, 2003 unsigned index, unsigned size) 2004 { 2005 assert(opcode0.opcodeType); 2006 assert(operand0.mask); 2007 2008 begin_emit_instruction(emit); 2009 emit_dword(emit, opcode0.value); 2010 2011 emit_dword(emit, operand0.value); 2012 2013 if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) { 2014 /* Next token is the index of the register to declare */ 2015 emit_dword(emit, index); 2016 } 2017 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) { 2018 /* Next token is the size of the register */ 2019 emit_dword(emit, size); 2020 2021 /* Followed by the index of the register */ 2022 emit_dword(emit, index); 2023 } 2024 2025 if (name_token.value) { 2026 emit_dword(emit, name_token.value); 2027 } 2028 2029 end_emit_instruction(emit); 2030 } 2031 2032 2033 /** 2034 * Emit the declaration for a shader input. 2035 * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx 2036 * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x 2037 * \param dim index dimension 2038 * \param index the input register index 2039 * \param size array size of the operand. In most cases, it is 1, 2040 * but for inputs to geometry shader, the array size varies 2041 * depending on the primitive type. 2042 * \param name one of VGPU10_NAME_x 2043 * \parma numComp number of components 2044 * \param selMode component selection mode 2045 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values 2046 * \param interpMode interpolation mode 2047 */ 2048 static void 2049 emit_input_declaration(struct svga_shader_emitter_v10 *emit, 2050 unsigned opcodeType, unsigned operandType, 2051 unsigned dim, unsigned index, unsigned size, 2052 unsigned name, unsigned numComp, 2053 unsigned selMode, unsigned usageMask, 2054 unsigned interpMode) 2055 { 2056 VGPU10OpcodeToken0 opcode0; 2057 VGPU10OperandToken0 operand0; 2058 VGPU10NameToken name_token; 2059 2060 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2061 assert(opcodeType == VGPU10_OPCODE_DCL_INPUT || 2062 opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV || 2063 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS || 2064 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV); 2065 assert(operandType == VGPU10_OPERAND_TYPE_INPUT || 2066 operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID); 2067 assert(numComp <= VGPU10_OPERAND_4_COMPONENT); 2068 assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE); 2069 assert(dim <= VGPU10_OPERAND_INDEX_3D); 2070 assert(name == VGPU10_NAME_UNDEFINED || 2071 name == VGPU10_NAME_POSITION || 2072 name == VGPU10_NAME_INSTANCE_ID || 2073 name == VGPU10_NAME_VERTEX_ID || 2074 name == VGPU10_NAME_PRIMITIVE_ID || 2075 name == VGPU10_NAME_IS_FRONT_FACE); 2076 assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED || 2077 interpMode == VGPU10_INTERPOLATION_CONSTANT || 2078 interpMode == VGPU10_INTERPOLATION_LINEAR || 2079 interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID || 2080 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE || 2081 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID); 2082 2083 check_register_index(emit, opcodeType, index); 2084 2085 opcode0.value = operand0.value = name_token.value = 0; 2086 2087 opcode0.opcodeType = opcodeType; 2088 opcode0.interpolationMode = interpMode; 2089 2090 operand0.operandType = operandType; 2091 operand0.numComponents = numComp; 2092 operand0.selectionMode = selMode; 2093 operand0.mask = usageMask; 2094 operand0.indexDimension = dim; 2095 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2096 if (dim == VGPU10_OPERAND_INDEX_2D) 2097 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2098 2099 name_token.name = name; 2100 2101 emit_decl_instruction(emit, opcode0, operand0, name_token, index, size); 2102 } 2103 2104 2105 /** 2106 * Emit the declaration for a shader output. 2107 * \param type one of VGPU10_OPCODE_DCL_OUTPUTx 2108 * \param index the output register index 2109 * \param name one of VGPU10_NAME_x 2110 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values 2111 */ 2112 static void 2113 emit_output_declaration(struct svga_shader_emitter_v10 *emit, 2114 unsigned type, unsigned index, 2115 unsigned name, unsigned usageMask) 2116 { 2117 VGPU10OpcodeToken0 opcode0; 2118 VGPU10OperandToken0 operand0; 2119 VGPU10NameToken name_token; 2120 2121 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2122 assert(type == VGPU10_OPCODE_DCL_OUTPUT || 2123 type == VGPU10_OPCODE_DCL_OUTPUT_SGV || 2124 type == VGPU10_OPCODE_DCL_OUTPUT_SIV); 2125 assert(name == VGPU10_NAME_UNDEFINED || 2126 name == VGPU10_NAME_POSITION || 2127 name == VGPU10_NAME_PRIMITIVE_ID || 2128 name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX || 2129 name == VGPU10_NAME_CLIP_DISTANCE); 2130 2131 check_register_index(emit, type, index); 2132 2133 opcode0.value = operand0.value = name_token.value = 0; 2134 2135 opcode0.opcodeType = type; 2136 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT; 2137 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 2138 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 2139 operand0.mask = usageMask; 2140 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 2141 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2142 2143 name_token.name = name; 2144 2145 emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1); 2146 } 2147 2148 2149 /** 2150 * Emit the declaration for the fragment depth output. 2151 */ 2152 static void 2153 emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit) 2154 { 2155 VGPU10OpcodeToken0 opcode0; 2156 VGPU10OperandToken0 operand0; 2157 VGPU10NameToken name_token; 2158 2159 assert(emit->unit == PIPE_SHADER_FRAGMENT); 2160 2161 opcode0.value = operand0.value = name_token.value = 0; 2162 2163 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT; 2164 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; 2165 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 2166 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 2167 operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 2168 2169 emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1); 2170 } 2171 2172 2173 /** 2174 * Emit the declaration for a system value input/output. 2175 */ 2176 static void 2177 emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, 2178 unsigned semantic_name, unsigned index) 2179 { 2180 switch (semantic_name) { 2181 case TGSI_SEMANTIC_INSTANCEID: 2182 index = alloc_system_value_index(emit, index); 2183 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, 2184 VGPU10_OPERAND_TYPE_INPUT, 2185 VGPU10_OPERAND_INDEX_1D, 2186 index, 1, 2187 VGPU10_NAME_INSTANCE_ID, 2188 VGPU10_OPERAND_4_COMPONENT, 2189 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2190 VGPU10_OPERAND_4_COMPONENT_MASK_X, 2191 VGPU10_INTERPOLATION_UNDEFINED); 2192 break; 2193 case TGSI_SEMANTIC_VERTEXID: 2194 index = alloc_system_value_index(emit, index); 2195 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, 2196 VGPU10_OPERAND_TYPE_INPUT, 2197 VGPU10_OPERAND_INDEX_1D, 2198 index, 1, 2199 VGPU10_NAME_VERTEX_ID, 2200 VGPU10_OPERAND_4_COMPONENT, 2201 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2202 VGPU10_OPERAND_4_COMPONENT_MASK_X, 2203 VGPU10_INTERPOLATION_UNDEFINED); 2204 break; 2205 default: 2206 ; /* XXX */ 2207 } 2208 } 2209 2210 /** 2211 * Translate a TGSI declaration to VGPU10. 2212 */ 2213 static boolean 2214 emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, 2215 const struct tgsi_full_declaration *decl) 2216 { 2217 switch (decl->Declaration.File) { 2218 case TGSI_FILE_INPUT: 2219 /* do nothing - see emit_input_declarations() */ 2220 return TRUE; 2221 2222 case TGSI_FILE_OUTPUT: 2223 assert(decl->Range.First == decl->Range.Last); 2224 emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask; 2225 return TRUE; 2226 2227 case TGSI_FILE_TEMPORARY: 2228 /* Don't declare the temps here. Just keep track of how many 2229 * and emit the declaration later. 2230 */ 2231 if (decl->Declaration.Array) { 2232 /* Indexed temporary array. Save the start index of the array 2233 * and the size of the array. 2234 */ 2235 const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS); 2236 unsigned i; 2237 2238 assert(arrayID < ARRAY_SIZE(emit->temp_arrays)); 2239 2240 /* Save this array so we can emit the declaration for it later */ 2241 emit->temp_arrays[arrayID].start = decl->Range.First; 2242 emit->temp_arrays[arrayID].size = 2243 decl->Range.Last - decl->Range.First + 1; 2244 2245 emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1); 2246 assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS); 2247 emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS); 2248 2249 /* Fill in the temp_map entries for this array */ 2250 for (i = decl->Range.First; i <= decl->Range.Last; i++) { 2251 emit->temp_map[i].arrayId = arrayID; 2252 emit->temp_map[i].index = i - decl->Range.First; 2253 } 2254 } 2255 2256 /* for all temps, indexed or not, keep track of highest index */ 2257 emit->num_shader_temps = MAX2(emit->num_shader_temps, 2258 decl->Range.Last + 1); 2259 return TRUE; 2260 2261 case TGSI_FILE_CONSTANT: 2262 /* Don't declare constants here. Just keep track and emit later. */ 2263 { 2264 unsigned constbuf = 0, num_consts; 2265 if (decl->Declaration.Dimension) { 2266 constbuf = decl->Dim.Index2D; 2267 } 2268 /* We throw an assertion here when, in fact, the shader should never 2269 * have linked due to constbuf index out of bounds, so we shouldn't 2270 * have reached here. 2271 */ 2272 assert(constbuf < ARRAY_SIZE(emit->num_shader_consts)); 2273 2274 num_consts = MAX2(emit->num_shader_consts[constbuf], 2275 decl->Range.Last + 1); 2276 2277 if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { 2278 debug_printf("Warning: constant buffer is declared to size [%u]" 2279 " but [%u] is the limit.\n", 2280 num_consts, 2281 VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); 2282 } 2283 /* The linker doesn't enforce the max UBO size so we clamp here */ 2284 emit->num_shader_consts[constbuf] = 2285 MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); 2286 } 2287 return TRUE; 2288 2289 case TGSI_FILE_IMMEDIATE: 2290 assert(!"TGSI_FILE_IMMEDIATE not handled yet!"); 2291 return FALSE; 2292 2293 case TGSI_FILE_SYSTEM_VALUE: 2294 emit_system_value_declaration(emit, decl->Semantic.Name, 2295 decl->Range.First); 2296 return TRUE; 2297 2298 case TGSI_FILE_SAMPLER: 2299 /* Don't declare samplers here. Just keep track and emit later. */ 2300 emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1); 2301 return TRUE; 2302 2303 #if 0 2304 case TGSI_FILE_RESOURCE: 2305 /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/ 2306 /* XXX more, VGPU10_RETURN_TYPE_FLOAT */ 2307 assert(!"TGSI_FILE_RESOURCE not handled yet"); 2308 return FALSE; 2309 #endif 2310 2311 case TGSI_FILE_ADDRESS: 2312 emit->num_address_regs = MAX2(emit->num_address_regs, 2313 decl->Range.Last + 1); 2314 return TRUE; 2315 2316 case TGSI_FILE_SAMPLER_VIEW: 2317 { 2318 unsigned unit = decl->Range.First; 2319 assert(decl->Range.First == decl->Range.Last); 2320 emit->sampler_target[unit] = decl->SamplerView.Resource; 2321 /* Note: we can ignore YZW return types for now */ 2322 emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX; 2323 } 2324 return TRUE; 2325 2326 default: 2327 assert(!"Unexpected type of declaration"); 2328 return FALSE; 2329 } 2330 } 2331 2332 2333 2334 /** 2335 * Emit all input declarations. 2336 */ 2337 static boolean 2338 emit_input_declarations(struct svga_shader_emitter_v10 *emit) 2339 { 2340 unsigned i; 2341 2342 if (emit->unit == PIPE_SHADER_FRAGMENT) { 2343 2344 for (i = 0; i < emit->linkage.num_inputs; i++) { 2345 unsigned semantic_name = emit->info.input_semantic_name[i]; 2346 unsigned usage_mask = emit->info.input_usage_mask[i]; 2347 unsigned index = emit->linkage.input_map[i]; 2348 unsigned type, interpolationMode, name; 2349 2350 if (usage_mask == 0) 2351 continue; /* register is not actually used */ 2352 2353 if (semantic_name == TGSI_SEMANTIC_POSITION) { 2354 /* fragment position input */ 2355 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 2356 interpolationMode = VGPU10_INTERPOLATION_LINEAR; 2357 name = VGPU10_NAME_POSITION; 2358 if (usage_mask & TGSI_WRITEMASK_W) { 2359 /* we need to replace use of 'w' with '1/w' */ 2360 emit->fs.fragcoord_input_index = i; 2361 } 2362 } 2363 else if (semantic_name == TGSI_SEMANTIC_FACE) { 2364 /* fragment front-facing input */ 2365 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 2366 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 2367 name = VGPU10_NAME_IS_FRONT_FACE; 2368 emit->fs.face_input_index = i; 2369 } 2370 else if (semantic_name == TGSI_SEMANTIC_PRIMID) { 2371 /* primitive ID */ 2372 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 2373 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 2374 name = VGPU10_NAME_PRIMITIVE_ID; 2375 } 2376 else { 2377 /* general fragment input */ 2378 type = VGPU10_OPCODE_DCL_INPUT_PS; 2379 interpolationMode = 2380 translate_interpolation(emit, 2381 emit->info.input_interpolate[i], 2382 emit->info.input_interpolate_loc[i]); 2383 2384 /* keeps track if flat interpolation mode is being used */ 2385 emit->uses_flat_interp = emit->uses_flat_interp || 2386 (interpolationMode == VGPU10_INTERPOLATION_CONSTANT); 2387 2388 name = VGPU10_NAME_UNDEFINED; 2389 } 2390 2391 emit_input_declaration(emit, type, 2392 VGPU10_OPERAND_TYPE_INPUT, 2393 VGPU10_OPERAND_INDEX_1D, index, 1, 2394 name, 2395 VGPU10_OPERAND_4_COMPONENT, 2396 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2397 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 2398 interpolationMode); 2399 } 2400 } 2401 else if (emit->unit == PIPE_SHADER_GEOMETRY) { 2402 2403 for (i = 0; i < emit->info.num_inputs; i++) { 2404 unsigned semantic_name = emit->info.input_semantic_name[i]; 2405 unsigned usage_mask = emit->info.input_usage_mask[i]; 2406 unsigned index = emit->linkage.input_map[i]; 2407 unsigned opcodeType, operandType; 2408 unsigned numComp, selMode; 2409 unsigned name; 2410 unsigned dim; 2411 2412 if (usage_mask == 0) 2413 continue; /* register is not actually used */ 2414 2415 opcodeType = VGPU10_OPCODE_DCL_INPUT; 2416 operandType = VGPU10_OPERAND_TYPE_INPUT; 2417 numComp = VGPU10_OPERAND_4_COMPONENT; 2418 selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 2419 name = VGPU10_NAME_UNDEFINED; 2420 2421 /* all geometry shader inputs are two dimensional except gl_PrimitiveID */ 2422 dim = VGPU10_OPERAND_INDEX_2D; 2423 2424 if (semantic_name == TGSI_SEMANTIC_PRIMID) { 2425 /* Primitive ID */ 2426 operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 2427 dim = VGPU10_OPERAND_INDEX_0D; 2428 numComp = VGPU10_OPERAND_0_COMPONENT; 2429 selMode = 0; 2430 2431 /* also save the register index so we can check for 2432 * primitive id when emit src register. We need to modify the 2433 * operand type, index dimension when emit primitive id src reg. 2434 */ 2435 emit->gs.prim_id_index = i; 2436 } 2437 else if (semantic_name == TGSI_SEMANTIC_POSITION) { 2438 /* vertex position input */ 2439 opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV; 2440 name = VGPU10_NAME_POSITION; 2441 } 2442 2443 emit_input_declaration(emit, opcodeType, operandType, 2444 dim, index, 2445 emit->gs.input_size, 2446 name, 2447 numComp, selMode, 2448 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 2449 VGPU10_INTERPOLATION_UNDEFINED); 2450 } 2451 } 2452 else { 2453 assert(emit->unit == PIPE_SHADER_VERTEX); 2454 2455 for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) { 2456 unsigned usage_mask = emit->info.input_usage_mask[i]; 2457 unsigned index = i; 2458 2459 if (usage_mask == 0) 2460 continue; /* register is not actually used */ 2461 2462 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 2463 VGPU10_OPERAND_TYPE_INPUT, 2464 VGPU10_OPERAND_INDEX_1D, index, 1, 2465 VGPU10_NAME_UNDEFINED, 2466 VGPU10_OPERAND_4_COMPONENT, 2467 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 2468 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 2469 VGPU10_INTERPOLATION_UNDEFINED); 2470 } 2471 } 2472 2473 return TRUE; 2474 } 2475 2476 2477 /** 2478 * Emit all output declarations. 2479 */ 2480 static boolean 2481 emit_output_declarations(struct svga_shader_emitter_v10 *emit) 2482 { 2483 unsigned i; 2484 2485 for (i = 0; i < emit->info.num_outputs; i++) { 2486 /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/ 2487 const unsigned semantic_name = emit->info.output_semantic_name[i]; 2488 const unsigned semantic_index = emit->info.output_semantic_index[i]; 2489 unsigned index = i; 2490 2491 if (emit->unit == PIPE_SHADER_FRAGMENT) { 2492 if (semantic_name == TGSI_SEMANTIC_COLOR) { 2493 assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index)); 2494 2495 emit->fs.color_out_index[semantic_index] = index; 2496 2497 /* The semantic index is the shader's color output/buffer index */ 2498 emit_output_declaration(emit, 2499 VGPU10_OPCODE_DCL_OUTPUT, semantic_index, 2500 VGPU10_NAME_UNDEFINED, 2501 VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2502 2503 if (semantic_index == 0) { 2504 if (emit->key.fs.write_color0_to_n_cbufs > 1) { 2505 /* Emit declarations for the additional color outputs 2506 * for broadcasting. 2507 */ 2508 unsigned j; 2509 for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) { 2510 /* Allocate a new output index */ 2511 unsigned idx = emit->info.num_outputs + j - 1; 2512 emit->fs.color_out_index[j] = idx; 2513 emit_output_declaration(emit, 2514 VGPU10_OPCODE_DCL_OUTPUT, idx, 2515 VGPU10_NAME_UNDEFINED, 2516 VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2517 emit->info.output_semantic_index[idx] = j; 2518 } 2519 } 2520 } 2521 else { 2522 assert(!emit->key.fs.write_color0_to_n_cbufs); 2523 } 2524 } 2525 else if (semantic_name == TGSI_SEMANTIC_POSITION) { 2526 /* Fragment depth output */ 2527 emit_fragdepth_output_declaration(emit); 2528 } 2529 else { 2530 assert(!"Bad output semantic name"); 2531 } 2532 } 2533 else { 2534 /* VS or GS */ 2535 unsigned name, type; 2536 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 2537 2538 switch (semantic_name) { 2539 case TGSI_SEMANTIC_POSITION: 2540 assert(emit->unit != PIPE_SHADER_FRAGMENT); 2541 type = VGPU10_OPCODE_DCL_OUTPUT_SIV; 2542 name = VGPU10_NAME_POSITION; 2543 /* Save the index of the vertex position output register */ 2544 emit->vposition.out_index = index; 2545 break; 2546 case TGSI_SEMANTIC_CLIPDIST: 2547 type = VGPU10_OPCODE_DCL_OUTPUT_SIV; 2548 name = VGPU10_NAME_CLIP_DISTANCE; 2549 /* save the starting index of the clip distance output register */ 2550 if (semantic_index == 0) 2551 emit->clip_dist_out_index = index; 2552 writemask = emit->output_usage_mask[index]; 2553 writemask = apply_clip_plane_mask(emit, writemask, semantic_index); 2554 if (writemask == 0x0) { 2555 continue; /* discard this do-nothing declaration */ 2556 } 2557 break; 2558 case TGSI_SEMANTIC_PRIMID: 2559 assert(emit->unit == PIPE_SHADER_GEOMETRY); 2560 type = VGPU10_OPCODE_DCL_OUTPUT_SGV; 2561 name = VGPU10_NAME_PRIMITIVE_ID; 2562 break; 2563 case TGSI_SEMANTIC_LAYER: 2564 assert(emit->unit == PIPE_SHADER_GEOMETRY); 2565 type = VGPU10_OPCODE_DCL_OUTPUT_SGV; 2566 name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX; 2567 break; 2568 case TGSI_SEMANTIC_CLIPVERTEX: 2569 type = VGPU10_OPCODE_DCL_OUTPUT; 2570 name = VGPU10_NAME_UNDEFINED; 2571 emit->clip_vertex_out_index = index; 2572 break; 2573 default: 2574 /* generic output */ 2575 type = VGPU10_OPCODE_DCL_OUTPUT; 2576 name = VGPU10_NAME_UNDEFINED; 2577 } 2578 2579 emit_output_declaration(emit, type, index, name, writemask); 2580 } 2581 } 2582 2583 if (emit->vposition.so_index != INVALID_INDEX && 2584 emit->vposition.out_index != INVALID_INDEX) { 2585 2586 assert(emit->unit != PIPE_SHADER_FRAGMENT); 2587 2588 /* Emit the declaration for the non-adjusted vertex position 2589 * for stream output purpose 2590 */ 2591 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 2592 emit->vposition.so_index, 2593 VGPU10_NAME_UNDEFINED, 2594 VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 2595 } 2596 2597 if (emit->clip_dist_so_index != INVALID_INDEX && 2598 emit->clip_dist_out_index != INVALID_INDEX) { 2599 2600 assert(emit->unit != PIPE_SHADER_FRAGMENT); 2601 2602 /* Emit the declaration for the clip distance shadow copy which 2603 * will be used for stream output purpose and for clip distance 2604 * varying variable 2605 */ 2606 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 2607 emit->clip_dist_so_index, 2608 VGPU10_NAME_UNDEFINED, 2609 emit->output_usage_mask[emit->clip_dist_out_index]); 2610 2611 if (emit->info.num_written_clipdistance > 4) { 2612 /* for the second clip distance register, each handles 4 planes */ 2613 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 2614 emit->clip_dist_so_index + 1, 2615 VGPU10_NAME_UNDEFINED, 2616 emit->output_usage_mask[emit->clip_dist_out_index+1]); 2617 } 2618 } 2619 2620 return TRUE; 2621 } 2622 2623 2624 /** 2625 * Emit the declaration for the temporary registers. 2626 */ 2627 static boolean 2628 emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) 2629 { 2630 unsigned total_temps, reg, i; 2631 2632 total_temps = emit->num_shader_temps; 2633 2634 /* If there is indirect access to non-indexable temps in the shader, 2635 * convert those temps to indexable temps. This works around a bug 2636 * in the GLSL->TGSI translator exposed in piglit test 2637 * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test. 2638 * Internal temps added by the driver remain as non-indexable temps. 2639 */ 2640 if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) && 2641 emit->num_temp_arrays == 0) { 2642 unsigned arrayID; 2643 2644 arrayID = 1; 2645 emit->num_temp_arrays = arrayID + 1; 2646 emit->temp_arrays[arrayID].start = 0; 2647 emit->temp_arrays[arrayID].size = total_temps; 2648 2649 /* Fill in the temp_map entries for this temp array */ 2650 for (i = 0; i < total_temps; i++) { 2651 emit->temp_map[i].arrayId = arrayID; 2652 emit->temp_map[i].index = i; 2653 } 2654 } 2655 2656 /* Allocate extra temps for specially-implemented instructions, 2657 * such as LIT. 2658 */ 2659 total_temps += MAX_INTERNAL_TEMPS; 2660 2661 if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) { 2662 if (emit->vposition.need_prescale || emit->key.vs.undo_viewport || 2663 emit->key.clip_plane_enable || 2664 emit->vposition.so_index != INVALID_INDEX) { 2665 emit->vposition.tmp_index = total_temps; 2666 total_temps += 1; 2667 } 2668 2669 if (emit->unit == PIPE_SHADER_VERTEX) { 2670 unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 | 2671 emit->key.vs.adjust_attrib_itof | 2672 emit->key.vs.adjust_attrib_utof | 2673 emit->key.vs.attrib_is_bgra | 2674 emit->key.vs.attrib_puint_to_snorm | 2675 emit->key.vs.attrib_puint_to_uscaled | 2676 emit->key.vs.attrib_puint_to_sscaled); 2677 while (attrib_mask) { 2678 unsigned index = u_bit_scan(&attrib_mask); 2679 emit->vs.adjusted_input[index] = total_temps++; 2680 } 2681 } 2682 2683 if (emit->clip_mode == CLIP_DISTANCE) { 2684 /* We need to write the clip distance to a temporary register 2685 * first. Then it will be copied to the shadow copy for 2686 * the clip distance varying variable and stream output purpose. 2687 * It will also be copied to the actual CLIPDIST register 2688 * according to the enabled clip planes 2689 */ 2690 emit->clip_dist_tmp_index = total_temps++; 2691 if (emit->info.num_written_clipdistance > 4) 2692 total_temps++; /* second clip register */ 2693 } 2694 else if (emit->clip_mode == CLIP_VERTEX) { 2695 /* We need to convert the TGSI CLIPVERTEX output to one or more 2696 * clip distances. Allocate a temp reg for the clipvertex here. 2697 */ 2698 assert(emit->info.writes_clipvertex > 0); 2699 emit->clip_vertex_tmp_index = total_temps; 2700 total_temps++; 2701 } 2702 } 2703 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 2704 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS || 2705 emit->key.fs.white_fragments || 2706 emit->key.fs.write_color0_to_n_cbufs > 1) { 2707 /* Allocate a temp to hold the output color */ 2708 emit->fs.color_tmp_index = total_temps; 2709 total_temps += 1; 2710 } 2711 2712 if (emit->fs.face_input_index != INVALID_INDEX) { 2713 /* Allocate a temp for the +/-1 face register */ 2714 emit->fs.face_tmp_index = total_temps; 2715 total_temps += 1; 2716 } 2717 2718 if (emit->fs.fragcoord_input_index != INVALID_INDEX) { 2719 /* Allocate a temp for modified fragment position register */ 2720 emit->fs.fragcoord_tmp_index = total_temps; 2721 total_temps += 1; 2722 } 2723 } 2724 2725 for (i = 0; i < emit->num_address_regs; i++) { 2726 emit->address_reg_index[i] = total_temps++; 2727 } 2728 2729 /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10 2730 * temp indexes. Basically, we compact all the non-array temp register 2731 * indexes into a consecutive series. 2732 * 2733 * Before, we may have some TGSI declarations like: 2734 * DCL TEMP[0..1], LOCAL 2735 * DCL TEMP[2..4], ARRAY(1), LOCAL 2736 * DCL TEMP[5..7], ARRAY(2), LOCAL 2737 * plus, some extra temps, like TEMP[8], TEMP[9] for misc things 2738 * 2739 * After, we'll have a map like this: 2740 * temp_map[0] = { array 0, index 0 } 2741 * temp_map[1] = { array 0, index 1 } 2742 * temp_map[2] = { array 1, index 0 } 2743 * temp_map[3] = { array 1, index 1 } 2744 * temp_map[4] = { array 1, index 2 } 2745 * temp_map[5] = { array 2, index 0 } 2746 * temp_map[6] = { array 2, index 1 } 2747 * temp_map[7] = { array 2, index 2 } 2748 * temp_map[8] = { array 0, index 2 } 2749 * temp_map[9] = { array 0, index 3 } 2750 * 2751 * We'll declare two arrays of 3 elements, plus a set of four non-indexed 2752 * temps numbered 0..3 2753 * 2754 * Any time we emit a temporary register index, we'll have to use the 2755 * temp_map[] table to convert the TGSI index to the VGPU10 index. 2756 * 2757 * Finally, we recompute the total_temps value here. 2758 */ 2759 reg = 0; 2760 for (i = 0; i < total_temps; i++) { 2761 if (emit->temp_map[i].arrayId == 0) { 2762 emit->temp_map[i].index = reg++; 2763 } 2764 } 2765 2766 if (0) { 2767 debug_printf("total_temps %u\n", total_temps); 2768 for (i = 0; i < total_temps; i++) { 2769 debug_printf("temp %u -> array %u index %u\n", 2770 i, emit->temp_map[i].arrayId, emit->temp_map[i].index); 2771 } 2772 } 2773 2774 total_temps = reg; 2775 2776 /* Emit declaration of ordinary temp registers */ 2777 if (total_temps > 0) { 2778 VGPU10OpcodeToken0 opcode0; 2779 2780 opcode0.value = 0; 2781 opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS; 2782 2783 begin_emit_instruction(emit); 2784 emit_dword(emit, opcode0.value); 2785 emit_dword(emit, total_temps); 2786 end_emit_instruction(emit); 2787 } 2788 2789 /* Emit declarations for indexable temp arrays. Skip 0th entry since 2790 * it's unused. 2791 */ 2792 for (i = 1; i < emit->num_temp_arrays; i++) { 2793 unsigned num_temps = emit->temp_arrays[i].size; 2794 2795 if (num_temps > 0) { 2796 VGPU10OpcodeToken0 opcode0; 2797 2798 opcode0.value = 0; 2799 opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP; 2800 2801 begin_emit_instruction(emit); 2802 emit_dword(emit, opcode0.value); 2803 emit_dword(emit, i); /* which array */ 2804 emit_dword(emit, num_temps); 2805 emit_dword(emit, 4); /* num components */ 2806 end_emit_instruction(emit); 2807 2808 total_temps += num_temps; 2809 } 2810 } 2811 2812 /* Check that the grand total of all regular and indexed temps is 2813 * under the limit. 2814 */ 2815 check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1); 2816 2817 return TRUE; 2818 } 2819 2820 2821 static boolean 2822 emit_constant_declaration(struct svga_shader_emitter_v10 *emit) 2823 { 2824 VGPU10OpcodeToken0 opcode0; 2825 VGPU10OperandToken0 operand0; 2826 unsigned total_consts, i; 2827 2828 opcode0.value = 0; 2829 opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER; 2830 opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED; 2831 /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */ 2832 2833 operand0.value = 0; 2834 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 2835 operand0.indexDimension = VGPU10_OPERAND_INDEX_2D; 2836 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2837 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2838 operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; 2839 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 2840 operand0.swizzleX = 0; 2841 operand0.swizzleY = 1; 2842 operand0.swizzleZ = 2; 2843 operand0.swizzleW = 3; 2844 2845 /** 2846 * Emit declaration for constant buffer [0]. We also allocate 2847 * room for the extra constants here. 2848 */ 2849 total_consts = emit->num_shader_consts[0]; 2850 2851 /* Now, allocate constant slots for the "extra" constants */ 2852 2853 /* Vertex position scale/translation */ 2854 if (emit->vposition.need_prescale) { 2855 emit->vposition.prescale_scale_index = total_consts++; 2856 emit->vposition.prescale_trans_index = total_consts++; 2857 } 2858 2859 if (emit->unit == PIPE_SHADER_VERTEX) { 2860 if (emit->key.vs.undo_viewport) { 2861 emit->vs.viewport_index = total_consts++; 2862 } 2863 } 2864 2865 /* user-defined clip planes */ 2866 if (emit->key.clip_plane_enable) { 2867 unsigned n = util_bitcount(emit->key.clip_plane_enable); 2868 assert(emit->unit == PIPE_SHADER_VERTEX || 2869 emit->unit == PIPE_SHADER_GEOMETRY); 2870 for (i = 0; i < n; i++) { 2871 emit->clip_plane_const[i] = total_consts++; 2872 } 2873 } 2874 2875 /* Texcoord scale factors for RECT textures */ 2876 { 2877 for (i = 0; i < emit->num_samplers; i++) { 2878 if (emit->key.tex[i].unnormalized) { 2879 emit->texcoord_scale_index[i] = total_consts++; 2880 } 2881 } 2882 } 2883 2884 /* Texture buffer sizes */ 2885 for (i = 0; i < emit->num_samplers; i++) { 2886 if (emit->sampler_target[i] == TGSI_TEXTURE_BUFFER) { 2887 emit->texture_buffer_size_index[i] = total_consts++; 2888 } 2889 } 2890 2891 if (total_consts > 0) { 2892 begin_emit_instruction(emit); 2893 emit_dword(emit, opcode0.value); 2894 emit_dword(emit, operand0.value); 2895 emit_dword(emit, 0); /* which const buffer slot */ 2896 emit_dword(emit, total_consts); 2897 end_emit_instruction(emit); 2898 } 2899 2900 /* Declare remaining constant buffers (UBOs) */ 2901 for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) { 2902 if (emit->num_shader_consts[i] > 0) { 2903 begin_emit_instruction(emit); 2904 emit_dword(emit, opcode0.value); 2905 emit_dword(emit, operand0.value); 2906 emit_dword(emit, i); /* which const buffer slot */ 2907 emit_dword(emit, emit->num_shader_consts[i]); 2908 end_emit_instruction(emit); 2909 } 2910 } 2911 2912 return TRUE; 2913 } 2914 2915 2916 /** 2917 * Emit declarations for samplers. 2918 */ 2919 static boolean 2920 emit_sampler_declarations(struct svga_shader_emitter_v10 *emit) 2921 { 2922 unsigned i; 2923 2924 for (i = 0; i < emit->num_samplers; i++) { 2925 VGPU10OpcodeToken0 opcode0; 2926 VGPU10OperandToken0 operand0; 2927 2928 opcode0.value = 0; 2929 opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER; 2930 opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT; 2931 2932 operand0.value = 0; 2933 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 2934 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; 2935 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 2936 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 2937 2938 begin_emit_instruction(emit); 2939 emit_dword(emit, opcode0.value); 2940 emit_dword(emit, operand0.value); 2941 emit_dword(emit, i); 2942 end_emit_instruction(emit); 2943 } 2944 2945 return TRUE; 2946 } 2947 2948 2949 /** 2950 * Translate TGSI_TEXTURE_x to VGAPU10_RESOURCE_DIMENSION_x. 2951 */ 2952 static unsigned 2953 tgsi_texture_to_resource_dimension(unsigned target, boolean is_array) 2954 { 2955 switch (target) { 2956 case TGSI_TEXTURE_BUFFER: 2957 return VGPU10_RESOURCE_DIMENSION_BUFFER; 2958 case TGSI_TEXTURE_1D: 2959 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 2960 case TGSI_TEXTURE_2D: 2961 case TGSI_TEXTURE_RECT: 2962 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2963 case TGSI_TEXTURE_3D: 2964 return VGPU10_RESOURCE_DIMENSION_TEXTURE3D; 2965 case TGSI_TEXTURE_CUBE: 2966 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; 2967 case TGSI_TEXTURE_SHADOW1D: 2968 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 2969 case TGSI_TEXTURE_SHADOW2D: 2970 case TGSI_TEXTURE_SHADOWRECT: 2971 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2972 case TGSI_TEXTURE_1D_ARRAY: 2973 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2974 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY 2975 : VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 2976 case TGSI_TEXTURE_2D_ARRAY: 2977 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2978 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY 2979 : VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2980 case TGSI_TEXTURE_SHADOWCUBE: 2981 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; 2982 case TGSI_TEXTURE_2D_MSAA: 2983 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS; 2984 case TGSI_TEXTURE_2D_ARRAY_MSAA: 2985 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY 2986 : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS; 2987 case TGSI_TEXTURE_CUBE_ARRAY: 2988 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY; 2989 default: 2990 assert(!"Unexpected resource type"); 2991 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 2992 } 2993 } 2994 2995 2996 /** 2997 * Given a tgsi_return_type, return true iff it is an integer type. 2998 */ 2999 static boolean 3000 is_integer_type(enum tgsi_return_type type) 3001 { 3002 switch (type) { 3003 case TGSI_RETURN_TYPE_SINT: 3004 case TGSI_RETURN_TYPE_UINT: 3005 return TRUE; 3006 case TGSI_RETURN_TYPE_FLOAT: 3007 case TGSI_RETURN_TYPE_UNORM: 3008 case TGSI_RETURN_TYPE_SNORM: 3009 return FALSE; 3010 case TGSI_RETURN_TYPE_COUNT: 3011 default: 3012 assert(!"is_integer_type: Unknown tgsi_return_type"); 3013 return FALSE; 3014 } 3015 } 3016 3017 3018 /** 3019 * Emit declarations for resources. 3020 * XXX When we're sure that all TGSI shaders will be generated with 3021 * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may 3022 * rework this code. 3023 */ 3024 static boolean 3025 emit_resource_declarations(struct svga_shader_emitter_v10 *emit) 3026 { 3027 unsigned i; 3028 3029 /* Emit resource decl for each sampler */ 3030 for (i = 0; i < emit->num_samplers; i++) { 3031 VGPU10OpcodeToken0 opcode0; 3032 VGPU10OperandToken0 operand0; 3033 VGPU10ResourceReturnTypeToken return_type; 3034 VGPU10_RESOURCE_RETURN_TYPE rt; 3035 3036 opcode0.value = 0; 3037 opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE; 3038 opcode0.resourceDimension = 3039 tgsi_texture_to_resource_dimension(emit->sampler_target[i], 3040 emit->key.tex[i].is_array); 3041 operand0.value = 0; 3042 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 3043 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; 3044 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 3045 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 3046 3047 #if 1 3048 /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */ 3049 STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1); 3050 STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1); 3051 STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1); 3052 STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1); 3053 STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1); 3054 assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT); 3055 rt = emit->sampler_return_type[i] + 1; 3056 #else 3057 switch (emit->sampler_return_type[i]) { 3058 case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break; 3059 case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break; 3060 case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break; 3061 case TGSI_RETURN_TYPE_UINT: rt = VGPU10_RETURN_TYPE_UINT; break; 3062 case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break; 3063 case TGSI_RETURN_TYPE_COUNT: 3064 default: 3065 rt = VGPU10_RETURN_TYPE_FLOAT; 3066 assert(!"emit_resource_declarations: Unknown tgsi_return_type"); 3067 } 3068 #endif 3069 3070 return_type.value = 0; 3071 return_type.component0 = rt; 3072 return_type.component1 = rt; 3073 return_type.component2 = rt; 3074 return_type.component3 = rt; 3075 3076 begin_emit_instruction(emit); 3077 emit_dword(emit, opcode0.value); 3078 emit_dword(emit, operand0.value); 3079 emit_dword(emit, i); 3080 emit_dword(emit, return_type.value); 3081 end_emit_instruction(emit); 3082 } 3083 3084 return TRUE; 3085 } 3086 3087 static void 3088 emit_instruction_op1(struct svga_shader_emitter_v10 *emit, 3089 unsigned opcode, 3090 const struct tgsi_full_dst_register *dst, 3091 const struct tgsi_full_src_register *src, 3092 boolean saturate) 3093 { 3094 begin_emit_instruction(emit); 3095 emit_opcode(emit, opcode, saturate); 3096 emit_dst_register(emit, dst); 3097 emit_src_register(emit, src); 3098 end_emit_instruction(emit); 3099 } 3100 3101 static void 3102 emit_instruction_op2(struct svga_shader_emitter_v10 *emit, 3103 unsigned opcode, 3104 const struct tgsi_full_dst_register *dst, 3105 const struct tgsi_full_src_register *src1, 3106 const struct tgsi_full_src_register *src2, 3107 boolean saturate) 3108 { 3109 begin_emit_instruction(emit); 3110 emit_opcode(emit, opcode, saturate); 3111 emit_dst_register(emit, dst); 3112 emit_src_register(emit, src1); 3113 emit_src_register(emit, src2); 3114 end_emit_instruction(emit); 3115 } 3116 3117 static void 3118 emit_instruction_op3(struct svga_shader_emitter_v10 *emit, 3119 unsigned opcode, 3120 const struct tgsi_full_dst_register *dst, 3121 const struct tgsi_full_src_register *src1, 3122 const struct tgsi_full_src_register *src2, 3123 const struct tgsi_full_src_register *src3, 3124 boolean saturate) 3125 { 3126 begin_emit_instruction(emit); 3127 emit_opcode(emit, opcode, saturate); 3128 emit_dst_register(emit, dst); 3129 emit_src_register(emit, src1); 3130 emit_src_register(emit, src2); 3131 emit_src_register(emit, src3); 3132 end_emit_instruction(emit); 3133 } 3134 3135 /** 3136 * Emit the actual clip distance instructions to be used for clipping 3137 * by copying the clip distance from the temporary registers to the 3138 * CLIPDIST registers written with the enabled planes mask. 3139 * Also copy the clip distance from the temporary to the clip distance 3140 * shadow copy register which will be referenced by the input shader 3141 */ 3142 static void 3143 emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit) 3144 { 3145 struct tgsi_full_src_register tmp_clip_dist_src; 3146 struct tgsi_full_dst_register clip_dist_dst; 3147 3148 unsigned i; 3149 unsigned clip_plane_enable = emit->key.clip_plane_enable; 3150 unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index; 3151 int num_written_clipdist = emit->info.num_written_clipdistance; 3152 3153 assert(emit->clip_dist_out_index != INVALID_INDEX); 3154 assert(emit->clip_dist_tmp_index != INVALID_INDEX); 3155 3156 /** 3157 * Temporary reset the temporary clip dist register index so 3158 * that the copy to the real clip dist register will not 3159 * attempt to copy to the temporary register again 3160 */ 3161 emit->clip_dist_tmp_index = INVALID_INDEX; 3162 3163 for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) { 3164 3165 tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i); 3166 3167 /** 3168 * copy to the shadow copy for use by varying variable and 3169 * stream output. All clip distances 3170 * will be written regardless of the enabled clipping planes. 3171 */ 3172 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, 3173 emit->clip_dist_so_index + i); 3174 3175 /* MOV clip_dist_so, tmp_clip_dist */ 3176 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, 3177 &tmp_clip_dist_src, FALSE); 3178 3179 /** 3180 * copy those clip distances to enabled clipping planes 3181 * to CLIPDIST registers for clipping 3182 */ 3183 if (clip_plane_enable & 0xf) { 3184 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, 3185 emit->clip_dist_out_index + i); 3186 clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf); 3187 3188 /* MOV CLIPDIST, tmp_clip_dist */ 3189 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, 3190 &tmp_clip_dist_src, FALSE); 3191 } 3192 /* four clip planes per clip register */ 3193 clip_plane_enable >>= 4; 3194 } 3195 /** 3196 * set the temporary clip dist register index back to the 3197 * temporary index for the next vertex 3198 */ 3199 emit->clip_dist_tmp_index = clip_dist_tmp_index; 3200 } 3201 3202 /* Declare clip distance output registers for user-defined clip planes 3203 * or the TGSI_CLIPVERTEX output. 3204 */ 3205 static void 3206 emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit) 3207 { 3208 unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable); 3209 unsigned index = emit->num_outputs; 3210 unsigned plane_mask; 3211 3212 assert(emit->unit == PIPE_SHADER_VERTEX || 3213 emit->unit == PIPE_SHADER_GEOMETRY); 3214 assert(num_clip_planes <= 8); 3215 3216 if (emit->clip_mode != CLIP_LEGACY && 3217 emit->clip_mode != CLIP_VERTEX) { 3218 return; 3219 } 3220 3221 if (num_clip_planes == 0) 3222 return; 3223 3224 /* Declare one or two clip output registers. The number of components 3225 * in the mask reflects the number of clip planes. For example, if 5 3226 * clip planes are needed, we'll declare outputs similar to: 3227 * dcl_output_siv o2.xyzw, clip_distance 3228 * dcl_output_siv o3.x, clip_distance 3229 */ 3230 emit->clip_dist_out_index = index; /* save the starting clip dist reg index */ 3231 3232 plane_mask = (1 << num_clip_planes) - 1; 3233 if (plane_mask & 0xf) { 3234 unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 3235 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index, 3236 VGPU10_NAME_CLIP_DISTANCE, cmask); 3237 emit->num_outputs++; 3238 } 3239 if (plane_mask & 0xf0) { 3240 unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 3241 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1, 3242 VGPU10_NAME_CLIP_DISTANCE, cmask); 3243 emit->num_outputs++; 3244 } 3245 } 3246 3247 3248 /** 3249 * Emit the instructions for writing to the clip distance registers 3250 * to handle legacy/automatic clip planes. 3251 * For each clip plane, the distance is the dot product of the vertex 3252 * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients. 3253 * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE 3254 * output registers already declared. 3255 */ 3256 static void 3257 emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit, 3258 unsigned vpos_tmp_index) 3259 { 3260 unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable); 3261 3262 assert(emit->clip_mode == CLIP_LEGACY); 3263 assert(num_clip_planes <= 8); 3264 3265 assert(emit->unit == PIPE_SHADER_VERTEX || 3266 emit->unit == PIPE_SHADER_GEOMETRY); 3267 3268 for (i = 0; i < num_clip_planes; i++) { 3269 struct tgsi_full_dst_register dst; 3270 struct tgsi_full_src_register plane_src, vpos_src; 3271 unsigned reg_index = emit->clip_dist_out_index + i / 4; 3272 unsigned comp = i % 4; 3273 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; 3274 3275 /* create dst, src regs */ 3276 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); 3277 dst = writemask_dst(&dst, writemask); 3278 3279 plane_src = make_src_const_reg(emit->clip_plane_const[i]); 3280 vpos_src = make_src_temp_reg(vpos_tmp_index); 3281 3282 /* DP4 clip_dist, plane, vpos */ 3283 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, 3284 &plane_src, &vpos_src, FALSE); 3285 } 3286 } 3287 3288 3289 /** 3290 * Emit the instructions for computing the clip distance results from 3291 * the clip vertex temporary. 3292 * For each clip plane, the distance is the dot product of the clip vertex 3293 * position (found in a temp reg) and the clip plane coefficients. 3294 */ 3295 static void 3296 emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit) 3297 { 3298 const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable); 3299 unsigned i; 3300 struct tgsi_full_dst_register dst; 3301 struct tgsi_full_src_register clipvert_src; 3302 const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index; 3303 3304 assert(emit->unit == PIPE_SHADER_VERTEX || 3305 emit->unit == PIPE_SHADER_GEOMETRY); 3306 3307 assert(emit->clip_mode == CLIP_VERTEX); 3308 3309 clipvert_src = make_src_temp_reg(clip_vertex_tmp); 3310 3311 for (i = 0; i < num_clip; i++) { 3312 struct tgsi_full_src_register plane_src; 3313 unsigned reg_index = emit->clip_dist_out_index + i / 4; 3314 unsigned comp = i % 4; 3315 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; 3316 3317 /* create dst, src regs */ 3318 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); 3319 dst = writemask_dst(&dst, writemask); 3320 3321 plane_src = make_src_const_reg(emit->clip_plane_const[i]); 3322 3323 /* DP4 clip_dist, plane, vpos */ 3324 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, 3325 &plane_src, &clipvert_src, FALSE); 3326 } 3327 3328 /* copy temporary clip vertex register to the clip vertex register */ 3329 3330 assert(emit->clip_vertex_out_index != INVALID_INDEX); 3331 3332 /** 3333 * temporary reset the temporary clip vertex register index so 3334 * that copy to the clip vertex register will not attempt 3335 * to copy to the temporary register again 3336 */ 3337 emit->clip_vertex_tmp_index = INVALID_INDEX; 3338 3339 /* MOV clip_vertex, clip_vertex_tmp */ 3340 dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index); 3341 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 3342 &dst, &clipvert_src, FALSE); 3343 3344 /** 3345 * set the temporary clip vertex register index back to the 3346 * temporary index for the next vertex 3347 */ 3348 emit->clip_vertex_tmp_index = clip_vertex_tmp; 3349 } 3350 3351 /** 3352 * Emit code to convert RGBA to BGRA 3353 */ 3354 static void 3355 emit_swap_r_b(struct svga_shader_emitter_v10 *emit, 3356 const struct tgsi_full_dst_register *dst, 3357 const struct tgsi_full_src_register *src) 3358 { 3359 struct tgsi_full_src_register bgra_src = 3360 swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W); 3361 3362 begin_emit_instruction(emit); 3363 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 3364 emit_dst_register(emit, dst); 3365 emit_src_register(emit, &bgra_src); 3366 end_emit_instruction(emit); 3367 } 3368 3369 3370 /** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */ 3371 static void 3372 emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit, 3373 const struct tgsi_full_dst_register *dst, 3374 const struct tgsi_full_src_register *src) 3375 { 3376 struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f); 3377 struct tgsi_full_src_register two = 3378 make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f); 3379 struct tgsi_full_src_register neg_two = 3380 make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f); 3381 3382 unsigned val_tmp = get_temp_index(emit); 3383 struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp); 3384 struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp); 3385 3386 unsigned bias_tmp = get_temp_index(emit); 3387 struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp); 3388 struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp); 3389 3390 /* val = src * 2.0 */ 3391 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, 3392 src, &two, FALSE); 3393 3394 /* bias = src > 0.5 */ 3395 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, 3396 src, &half, FALSE); 3397 3398 /* bias = bias & -2.0 */ 3399 emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst, 3400 &bias_src, &neg_two, FALSE); 3401 3402 /* dst = val + bias */ 3403 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst, 3404 &val_src, &bias_src, FALSE); 3405 3406 free_temp_indexes(emit); 3407 } 3408 3409 3410 /** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */ 3411 static void 3412 emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit, 3413 const struct tgsi_full_dst_register *dst, 3414 const struct tgsi_full_src_register *src) 3415 { 3416 struct tgsi_full_src_register scale = 3417 make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f); 3418 3419 /* dst = src * scale */ 3420 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale, FALSE); 3421 } 3422 3423 3424 /** Convert from R32_UINT to 10_10_10_2_sscaled */ 3425 static void 3426 emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit, 3427 const struct tgsi_full_dst_register *dst, 3428 const struct tgsi_full_src_register *src) 3429 { 3430 struct tgsi_full_src_register lshift = 3431 make_immediate_reg_int4(emit, 22, 12, 2, 0); 3432 struct tgsi_full_src_register rshift = 3433 make_immediate_reg_int4(emit, 22, 22, 22, 30); 3434 3435 struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X); 3436 3437 unsigned tmp = get_temp_index(emit); 3438 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3439 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3440 3441 /* 3442 * r = (pixel << 22) >> 22; # signed int in [511, -512] 3443 * g = (pixel << 12) >> 22; # signed int in [511, -512] 3444 * b = (pixel << 2) >> 22; # signed int in [511, -512] 3445 * a = (pixel << 0) >> 30; # signed int in [1, -2] 3446 * dst = i_to_f(r,g,b,a); # convert to float 3447 */ 3448 emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst, 3449 &src_xxxx, &lshift, FALSE); 3450 emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst, 3451 &tmp_src, &rshift, FALSE); 3452 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src, FALSE); 3453 3454 free_temp_indexes(emit); 3455 } 3456 3457 3458 /** 3459 * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction. 3460 */ 3461 static boolean 3462 emit_arl_uarl(struct svga_shader_emitter_v10 *emit, 3463 const struct tgsi_full_instruction *inst) 3464 { 3465 unsigned index = inst->Dst[0].Register.Index; 3466 struct tgsi_full_dst_register dst; 3467 unsigned opcode; 3468 3469 assert(index < MAX_VGPU10_ADDR_REGS); 3470 dst = make_dst_temp_reg(emit->address_reg_index[index]); 3471 3472 /* ARL dst, s0 3473 * Translates into: 3474 * FTOI address_tmp, s0 3475 * 3476 * UARL dst, s0 3477 * Translates into: 3478 * MOV address_tmp, s0 3479 */ 3480 if (inst->Instruction.Opcode == TGSI_OPCODE_ARL) 3481 opcode = VGPU10_OPCODE_FTOI; 3482 else 3483 opcode = VGPU10_OPCODE_MOV; 3484 3485 emit_instruction_op1(emit, opcode, &dst, &inst->Src[0], FALSE); 3486 3487 return TRUE; 3488 } 3489 3490 3491 /** 3492 * Emit code for TGSI_OPCODE_CAL instruction. 3493 */ 3494 static boolean 3495 emit_cal(struct svga_shader_emitter_v10 *emit, 3496 const struct tgsi_full_instruction *inst) 3497 { 3498 unsigned label = inst->Label.Label; 3499 VGPU10OperandToken0 operand; 3500 operand.value = 0; 3501 operand.operandType = VGPU10_OPERAND_TYPE_LABEL; 3502 3503 begin_emit_instruction(emit); 3504 emit_dword(emit, operand.value); 3505 emit_dword(emit, label); 3506 end_emit_instruction(emit); 3507 3508 return TRUE; 3509 } 3510 3511 3512 /** 3513 * Emit code for TGSI_OPCODE_IABS instruction. 3514 */ 3515 static boolean 3516 emit_iabs(struct svga_shader_emitter_v10 *emit, 3517 const struct tgsi_full_instruction *inst) 3518 { 3519 /* dst.x = (src0.x < 0) ? -src0.x : src0.x 3520 * dst.y = (src0.y < 0) ? -src0.y : src0.y 3521 * dst.z = (src0.z < 0) ? -src0.z : src0.z 3522 * dst.w = (src0.w < 0) ? -src0.w : src0.w 3523 * 3524 * Translates into 3525 * IMAX dst, src, neg(src) 3526 */ 3527 struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]); 3528 emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0], 3529 &inst->Src[0], &neg_src, FALSE); 3530 3531 return TRUE; 3532 } 3533 3534 3535 /** 3536 * Emit code for TGSI_OPCODE_CMP instruction. 3537 */ 3538 static boolean 3539 emit_cmp(struct svga_shader_emitter_v10 *emit, 3540 const struct tgsi_full_instruction *inst) 3541 { 3542 /* dst.x = (src0.x < 0) ? src1.x : src2.x 3543 * dst.y = (src0.y < 0) ? src1.y : src2.y 3544 * dst.z = (src0.z < 0) ? src1.z : src2.z 3545 * dst.w = (src0.w < 0) ? src1.w : src2.w 3546 * 3547 * Translates into 3548 * LT tmp, src0, 0.0 3549 * MOVC dst, tmp, src1, src2 3550 */ 3551 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 3552 unsigned tmp = get_temp_index(emit); 3553 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3554 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3555 3556 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, 3557 &inst->Src[0], &zero, FALSE); 3558 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], 3559 &tmp_src, &inst->Src[1], &inst->Src[2], 3560 inst->Instruction.Saturate); 3561 3562 free_temp_indexes(emit); 3563 3564 return TRUE; 3565 } 3566 3567 3568 /** 3569 * Emit code for TGSI_OPCODE_DP2A instruction. 3570 */ 3571 static boolean 3572 emit_dp2a(struct svga_shader_emitter_v10 *emit, 3573 const struct tgsi_full_instruction *inst) 3574 { 3575 /* dst.x = src0.x * src1.x + src0.y * src1.y + src2.x 3576 * dst.y = src0.x * src1.x + src0.y * src1.y + src2.x 3577 * dst.z = src0.x * src1.x + src0.y * src1.y + src2.x 3578 * dst.w = src0.x * src1.x + src0.y * src1.y + src2.x 3579 * Translate into 3580 * MAD tmp.x, s0.y, s1.y, s2.x 3581 * MAD tmp.x, s0.x, s1.x, tmp.x 3582 * MOV dst.xyzw, tmp.xxxx 3583 */ 3584 unsigned tmp = get_temp_index(emit); 3585 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3586 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3587 3588 struct tgsi_full_src_register tmp_src_xxxx = 3589 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 3590 struct tgsi_full_dst_register tmp_dst_x = 3591 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 3592 3593 struct tgsi_full_src_register src0_xxxx = 3594 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 3595 struct tgsi_full_src_register src0_yyyy = 3596 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 3597 struct tgsi_full_src_register src1_xxxx = 3598 scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 3599 struct tgsi_full_src_register src1_yyyy = 3600 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); 3601 struct tgsi_full_src_register src2_xxxx = 3602 scalar_src(&inst->Src[2], TGSI_SWIZZLE_X); 3603 3604 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_yyyy, 3605 &src1_yyyy, &src2_xxxx, FALSE); 3606 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &tmp_dst_x, &src0_xxxx, 3607 &src1_xxxx, &tmp_src_xxxx, FALSE); 3608 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 3609 &tmp_src_xxxx, inst->Instruction.Saturate); 3610 3611 free_temp_indexes(emit); 3612 3613 return TRUE; 3614 } 3615 3616 3617 /** 3618 * Emit code for TGSI_OPCODE_DPH instruction. 3619 */ 3620 static boolean 3621 emit_dph(struct svga_shader_emitter_v10 *emit, 3622 const struct tgsi_full_instruction *inst) 3623 { 3624 /* 3625 * DP3 tmp, s0, s1 3626 * ADD dst, tmp, s1.wwww 3627 */ 3628 3629 struct tgsi_full_src_register s1_wwww = 3630 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, 3631 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); 3632 3633 unsigned tmp = get_temp_index(emit); 3634 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3635 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3636 3637 /* DP3 tmp, s0, s1 */ 3638 emit_instruction_op2(emit, VGPU10_OPCODE_DP3, &tmp_dst, &inst->Src[0], 3639 &inst->Src[1], FALSE); 3640 3641 /* ADD dst, tmp, s1.wwww */ 3642 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &inst->Dst[0], &tmp_src, 3643 &s1_wwww, inst->Instruction.Saturate); 3644 3645 free_temp_indexes(emit); 3646 3647 return TRUE; 3648 } 3649 3650 3651 /** 3652 * Emit code for TGSI_OPCODE_DST instruction. 3653 */ 3654 static boolean 3655 emit_dst(struct svga_shader_emitter_v10 *emit, 3656 const struct tgsi_full_instruction *inst) 3657 { 3658 /* 3659 * dst.x = 1 3660 * dst.y = src0.y * src1.y 3661 * dst.z = src0.z 3662 * dst.w = src1.w 3663 */ 3664 3665 struct tgsi_full_src_register s0_yyyy = 3666 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 3667 struct tgsi_full_src_register s0_zzzz = 3668 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z); 3669 struct tgsi_full_src_register s1_yyyy = 3670 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); 3671 struct tgsi_full_src_register s1_wwww = 3672 scalar_src(&inst->Src[1], TGSI_SWIZZLE_W); 3673 3674 /* 3675 * If dst and either src0 and src1 are the same we need 3676 * to create a temporary for it and insert a extra move. 3677 */ 3678 unsigned tmp_move = get_temp_index(emit); 3679 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 3680 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 3681 3682 /* MOV dst.x, 1.0 */ 3683 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3684 struct tgsi_full_dst_register dst_x = 3685 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 3686 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 3687 3688 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE); 3689 } 3690 3691 /* MUL dst.y, s0.y, s1.y */ 3692 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3693 struct tgsi_full_dst_register dst_y = 3694 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 3695 3696 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy, 3697 &s1_yyyy, inst->Instruction.Saturate); 3698 } 3699 3700 /* MOV dst.z, s0.z */ 3701 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3702 struct tgsi_full_dst_register dst_z = 3703 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 3704 3705 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, &s0_zzzz, 3706 inst->Instruction.Saturate); 3707 } 3708 3709 /* MOV dst.w, s1.w */ 3710 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3711 struct tgsi_full_dst_register dst_w = 3712 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 3713 3714 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &s1_wwww, 3715 inst->Instruction.Saturate); 3716 } 3717 3718 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, 3719 FALSE); 3720 free_temp_indexes(emit); 3721 3722 return TRUE; 3723 } 3724 3725 3726 3727 /** 3728 * Emit code for TGSI_OPCODE_ENDPRIM (GS only) 3729 */ 3730 static boolean 3731 emit_endprim(struct svga_shader_emitter_v10 *emit, 3732 const struct tgsi_full_instruction *inst) 3733 { 3734 assert(emit->unit == PIPE_SHADER_GEOMETRY); 3735 3736 /* We can't use emit_simple() because the TGSI instruction has one 3737 * operand (vertex stream number) which we must ignore for VGPU10. 3738 */ 3739 begin_emit_instruction(emit); 3740 emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE); 3741 end_emit_instruction(emit); 3742 return TRUE; 3743 } 3744 3745 3746 /** 3747 * Emit code for TGSI_OPCODE_EX2 (2^x) instruction. 3748 */ 3749 static boolean 3750 emit_ex2(struct svga_shader_emitter_v10 *emit, 3751 const struct tgsi_full_instruction *inst) 3752 { 3753 /* Note that TGSI_OPCODE_EX2 computes only one value from src.x 3754 * while VGPU10 computes four values. 3755 * 3756 * dst = EX2(src): 3757 * dst.xyzw = 2.0 ^ src.x 3758 */ 3759 3760 struct tgsi_full_src_register src_xxxx = 3761 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 3762 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 3763 3764 /* EXP tmp, s0.xxxx */ 3765 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx, 3766 inst->Instruction.Saturate); 3767 3768 return TRUE; 3769 } 3770 3771 3772 /** 3773 * Emit code for TGSI_OPCODE_EXP instruction. 3774 */ 3775 static boolean 3776 emit_exp(struct svga_shader_emitter_v10 *emit, 3777 const struct tgsi_full_instruction *inst) 3778 { 3779 /* 3780 * dst.x = 2 ^ floor(s0.x) 3781 * dst.y = s0.x - floor(s0.x) 3782 * dst.z = 2 ^ s0.x 3783 * dst.w = 1.0 3784 */ 3785 3786 struct tgsi_full_src_register src_xxxx = 3787 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 3788 unsigned tmp = get_temp_index(emit); 3789 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3790 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3791 3792 /* 3793 * If dst and src are the same we need to create 3794 * a temporary for it and insert a extra move. 3795 */ 3796 unsigned tmp_move = get_temp_index(emit); 3797 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 3798 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 3799 3800 /* only use X component of temp reg */ 3801 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 3802 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); 3803 3804 /* ROUND_NI tmp.x, s0.x */ 3805 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, 3806 &src_xxxx, FALSE); /* round to -infinity */ 3807 3808 /* EXP dst.x, tmp.x */ 3809 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3810 struct tgsi_full_dst_register dst_x = 3811 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 3812 3813 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src, 3814 inst->Instruction.Saturate); 3815 } 3816 3817 /* ADD dst.y, s0.x, -tmp */ 3818 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3819 struct tgsi_full_dst_register dst_y = 3820 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 3821 struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src); 3822 3823 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx, 3824 &neg_tmp_src, inst->Instruction.Saturate); 3825 } 3826 3827 /* EXP dst.z, s0.x */ 3828 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3829 struct tgsi_full_dst_register dst_z = 3830 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 3831 3832 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx, 3833 inst->Instruction.Saturate); 3834 } 3835 3836 /* MOV dst.w, 1.0 */ 3837 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3838 struct tgsi_full_dst_register dst_w = 3839 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 3840 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 3841 3842 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, 3843 FALSE); 3844 } 3845 3846 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, 3847 FALSE); 3848 3849 free_temp_indexes(emit); 3850 3851 return TRUE; 3852 } 3853 3854 3855 /** 3856 * Emit code for TGSI_OPCODE_IF instruction. 3857 */ 3858 static boolean 3859 emit_if(struct svga_shader_emitter_v10 *emit, 3860 const struct tgsi_full_instruction *inst) 3861 { 3862 VGPU10OpcodeToken0 opcode0; 3863 3864 /* The src register should be a scalar */ 3865 assert(inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleY && 3866 inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleZ && 3867 inst->Src[0].Register.SwizzleX == inst->Src[0].Register.SwizzleW); 3868 3869 /* The only special thing here is that we need to set the 3870 * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if 3871 * src.x is non-zero. 3872 */ 3873 opcode0.value = 0; 3874 opcode0.opcodeType = VGPU10_OPCODE_IF; 3875 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; 3876 3877 begin_emit_instruction(emit); 3878 emit_dword(emit, opcode0.value); 3879 emit_src_register(emit, &inst->Src[0]); 3880 end_emit_instruction(emit); 3881 3882 return TRUE; 3883 } 3884 3885 3886 /** 3887 * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of 3888 * the register components are negative). 3889 */ 3890 static boolean 3891 emit_kill_if(struct svga_shader_emitter_v10 *emit, 3892 const struct tgsi_full_instruction *inst) 3893 { 3894 unsigned tmp = get_temp_index(emit); 3895 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 3896 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 3897 3898 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 3899 3900 struct tgsi_full_dst_register tmp_dst_x = 3901 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 3902 struct tgsi_full_src_register tmp_src_xxxx = 3903 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 3904 3905 /* tmp = src[0] < 0.0 */ 3906 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], 3907 &zero, FALSE); 3908 3909 if (!same_swizzle_terms(&inst->Src[0])) { 3910 /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to 3911 * logically OR the swizzle terms. Most uses of KILL_IF only 3912 * test one channel so it's good to avoid these extra steps. 3913 */ 3914 struct tgsi_full_src_register tmp_src_yyyy = 3915 scalar_src(&tmp_src, TGSI_SWIZZLE_Y); 3916 struct tgsi_full_src_register tmp_src_zzzz = 3917 scalar_src(&tmp_src, TGSI_SWIZZLE_Z); 3918 struct tgsi_full_src_register tmp_src_wwww = 3919 scalar_src(&tmp_src, TGSI_SWIZZLE_W); 3920 3921 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 3922 &tmp_src_yyyy, FALSE); 3923 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 3924 &tmp_src_zzzz, FALSE); 3925 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 3926 &tmp_src_wwww, FALSE); 3927 } 3928 3929 begin_emit_instruction(emit); 3930 emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */ 3931 emit_src_register(emit, &tmp_src_xxxx); 3932 end_emit_instruction(emit); 3933 3934 free_temp_indexes(emit); 3935 3936 return TRUE; 3937 } 3938 3939 3940 /** 3941 * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard). 3942 */ 3943 static boolean 3944 emit_kill(struct svga_shader_emitter_v10 *emit, 3945 const struct tgsi_full_instruction *inst) 3946 { 3947 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 3948 3949 /* DISCARD if 0.0 is zero */ 3950 begin_emit_instruction(emit); 3951 emit_discard_opcode(emit, FALSE); 3952 emit_src_register(emit, &zero); 3953 end_emit_instruction(emit); 3954 3955 return TRUE; 3956 } 3957 3958 3959 /** 3960 * Emit code for TGSI_OPCODE_LG2 instruction. 3961 */ 3962 static boolean 3963 emit_lg2(struct svga_shader_emitter_v10 *emit, 3964 const struct tgsi_full_instruction *inst) 3965 { 3966 /* Note that TGSI_OPCODE_LG2 computes only one value from src.x 3967 * while VGPU10 computes four values. 3968 * 3969 * dst = LG2(src): 3970 * dst.xyzw = log2(src.x) 3971 */ 3972 3973 struct tgsi_full_src_register src_xxxx = 3974 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 3975 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 3976 3977 /* LOG tmp, s0.xxxx */ 3978 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &inst->Dst[0], &src_xxxx, 3979 inst->Instruction.Saturate); 3980 3981 return TRUE; 3982 } 3983 3984 3985 /** 3986 * Emit code for TGSI_OPCODE_LIT instruction. 3987 */ 3988 static boolean 3989 emit_lit(struct svga_shader_emitter_v10 *emit, 3990 const struct tgsi_full_instruction *inst) 3991 { 3992 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 3993 3994 /* 3995 * If dst and src are the same we need to create 3996 * a temporary for it and insert a extra move. 3997 */ 3998 unsigned tmp_move = get_temp_index(emit); 3999 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 4000 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 4001 4002 /* 4003 * dst.x = 1 4004 * dst.y = max(src.x, 0) 4005 * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0 4006 * dst.w = 1 4007 */ 4008 4009 /* MOV dst.x, 1.0 */ 4010 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 4011 struct tgsi_full_dst_register dst_x = 4012 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 4013 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one, FALSE); 4014 } 4015 4016 /* MOV dst.w, 1.0 */ 4017 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 4018 struct tgsi_full_dst_register dst_w = 4019 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 4020 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE); 4021 } 4022 4023 /* MAX dst.y, src.x, 0.0 */ 4024 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 4025 struct tgsi_full_dst_register dst_y = 4026 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 4027 struct tgsi_full_src_register zero = 4028 make_immediate_reg_float(emit, 0.0f); 4029 struct tgsi_full_src_register src_xxxx = 4030 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 4031 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 4032 4033 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx, 4034 &zero, inst->Instruction.Saturate); 4035 } 4036 4037 /* 4038 * tmp1 = clamp(src.w, -128, 128); 4039 * MAX tmp1, src.w, -128 4040 * MIN tmp1, tmp1, 128 4041 * 4042 * tmp2 = max(tmp2, 0); 4043 * MAX tmp2, src.y, 0 4044 * 4045 * tmp1 = pow(tmp2, tmp1); 4046 * LOG tmp2, tmp2 4047 * MUL tmp1, tmp2, tmp1 4048 * EXP tmp1, tmp1 4049 * 4050 * tmp1 = (src.w == 0) ? 1 : tmp1; 4051 * EQ tmp2, 0, src.w 4052 * MOVC tmp1, tmp2, 1.0, tmp1 4053 * 4054 * dst.z = (0 < src.x) ? tmp1 : 0; 4055 * LT tmp2, 0, src.x 4056 * MOVC dst.z, tmp2, tmp1, 0.0 4057 */ 4058 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 4059 struct tgsi_full_dst_register dst_z = 4060 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 4061 4062 unsigned tmp1 = get_temp_index(emit); 4063 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 4064 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 4065 unsigned tmp2 = get_temp_index(emit); 4066 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 4067 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 4068 4069 struct tgsi_full_src_register src_xxxx = 4070 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 4071 struct tgsi_full_src_register src_yyyy = 4072 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 4073 struct tgsi_full_src_register src_wwww = 4074 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 4075 4076 struct tgsi_full_src_register zero = 4077 make_immediate_reg_float(emit, 0.0f); 4078 struct tgsi_full_src_register lowerbound = 4079 make_immediate_reg_float(emit, -128.0f); 4080 struct tgsi_full_src_register upperbound = 4081 make_immediate_reg_float(emit, 128.0f); 4082 4083 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww, 4084 &lowerbound, FALSE); 4085 emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src, 4086 &upperbound, FALSE); 4087 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy, 4088 &zero, FALSE); 4089 4090 /* POW tmp1, tmp2, tmp1 */ 4091 /* LOG tmp2, tmp2 */ 4092 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src, 4093 FALSE); 4094 4095 /* MUL tmp1, tmp2, tmp1 */ 4096 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src, 4097 &tmp1_src, FALSE); 4098 4099 /* EXP tmp1, tmp1 */ 4100 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src, 4101 FALSE); 4102 4103 /* EQ tmp2, 0, src.w */ 4104 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, 4105 &src_wwww, FALSE); 4106 /* MOVC tmp1.z, tmp2, tmp1, 1.0 */ 4107 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst, 4108 &tmp2_src, &one, &tmp1_src, FALSE); 4109 4110 /* LT tmp2, 0, src.x */ 4111 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, 4112 &src_xxxx, FALSE); 4113 /* MOVC dst.z, tmp2, tmp1, 0.0 */ 4114 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z, 4115 &tmp2_src, &tmp1_src, &zero, FALSE); 4116 } 4117 4118 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src, 4119 FALSE); 4120 free_temp_indexes(emit); 4121 4122 return TRUE; 4123 } 4124 4125 4126 /** 4127 * Emit code for TGSI_OPCODE_LOG instruction. 4128 */ 4129 static boolean 4130 emit_log(struct svga_shader_emitter_v10 *emit, 4131 const struct tgsi_full_instruction *inst) 4132 { 4133 /* 4134 * dst.x = floor(lg2(abs(s0.x))) 4135 * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x)))) 4136 * dst.z = lg2(abs(s0.x)) 4137 * dst.w = 1.0 4138 */ 4139 4140 struct tgsi_full_src_register src_xxxx = 4141 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 4142 unsigned tmp = get_temp_index(emit); 4143 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4144 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4145 struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx); 4146 4147 /* only use X component of temp reg */ 4148 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4149 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4150 4151 /* LOG tmp.x, abs(s0.x) */ 4152 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { 4153 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, 4154 &abs_src_xxxx, FALSE); 4155 } 4156 4157 /* MOV dst.z, tmp.x */ 4158 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 4159 struct tgsi_full_dst_register dst_z = 4160 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z); 4161 4162 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_z, 4163 &tmp_src, inst->Instruction.Saturate); 4164 } 4165 4166 /* FLR tmp.x, tmp.x */ 4167 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { 4168 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, 4169 &tmp_src, FALSE); 4170 } 4171 4172 /* MOV dst.x, tmp.x */ 4173 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 4174 struct tgsi_full_dst_register dst_x = 4175 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); 4176 4177 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &tmp_src, 4178 inst->Instruction.Saturate); 4179 } 4180 4181 /* EXP tmp.x, tmp.x */ 4182 /* DIV dst.y, abs(s0.x), tmp.x */ 4183 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 4184 struct tgsi_full_dst_register dst_y = 4185 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); 4186 4187 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src, 4188 FALSE); 4189 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx, 4190 &tmp_src, inst->Instruction.Saturate); 4191 } 4192 4193 /* MOV dst.w, 1.0 */ 4194 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 4195 struct tgsi_full_dst_register dst_w = 4196 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W); 4197 struct tgsi_full_src_register one = 4198 make_immediate_reg_float(emit, 1.0f); 4199 4200 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one, FALSE); 4201 } 4202 4203 free_temp_indexes(emit); 4204 4205 return TRUE; 4206 } 4207 4208 4209 /** 4210 * Emit code for TGSI_OPCODE_LRP instruction. 4211 */ 4212 static boolean 4213 emit_lrp(struct svga_shader_emitter_v10 *emit, 4214 const struct tgsi_full_instruction *inst) 4215 { 4216 /* dst = LRP(s0, s1, s2): 4217 * dst = s0 * (s1 - s2) + s2 4218 * Translates into: 4219 * SUB tmp, s1, s2; tmp = s1 - s2 4220 * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2 4221 */ 4222 unsigned tmp = get_temp_index(emit); 4223 struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp); 4224 struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp); 4225 struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]); 4226 4227 /* ADD tmp, s1, -s2 */ 4228 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &dst_tmp, 4229 &inst->Src[1], &neg_src2, FALSE); 4230 4231 /* MAD dst, s1, tmp, s3 */ 4232 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &inst->Dst[0], 4233 &inst->Src[0], &src_tmp, &inst->Src[2], 4234 inst->Instruction.Saturate); 4235 4236 free_temp_indexes(emit); 4237 4238 return TRUE; 4239 } 4240 4241 4242 /** 4243 * Emit code for TGSI_OPCODE_POW instruction. 4244 */ 4245 static boolean 4246 emit_pow(struct svga_shader_emitter_v10 *emit, 4247 const struct tgsi_full_instruction *inst) 4248 { 4249 /* Note that TGSI_OPCODE_POW computes only one value from src0.x and 4250 * src1.x while VGPU10 computes four values. 4251 * 4252 * dst = POW(src0, src1): 4253 * dst.xyzw = src0.x ^ src1.x 4254 */ 4255 unsigned tmp = get_temp_index(emit); 4256 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4257 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4258 struct tgsi_full_src_register src0_xxxx = 4259 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 4260 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 4261 struct tgsi_full_src_register src1_xxxx = 4262 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 4263 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 4264 4265 /* LOG tmp, s0.xxxx */ 4266 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &src0_xxxx, 4267 FALSE); 4268 4269 /* MUL tmp, tmp, s1.xxxx */ 4270 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, &tmp_src, 4271 &src1_xxxx, FALSE); 4272 4273 /* EXP tmp, s0.xxxx */ 4274 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], 4275 &tmp_src, inst->Instruction.Saturate); 4276 4277 /* free tmp */ 4278 free_temp_indexes(emit); 4279 4280 return TRUE; 4281 } 4282 4283 4284 /** 4285 * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction. 4286 */ 4287 static boolean 4288 emit_rcp(struct svga_shader_emitter_v10 *emit, 4289 const struct tgsi_full_instruction *inst) 4290 { 4291 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4292 4293 unsigned tmp = get_temp_index(emit); 4294 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4295 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4296 4297 struct tgsi_full_dst_register tmp_dst_x = 4298 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4299 struct tgsi_full_src_register tmp_src_xxxx = 4300 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4301 4302 /* DIV tmp.x, 1.0, s0 */ 4303 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst_x, &one, 4304 &inst->Src[0], FALSE); 4305 4306 /* MOV dst, tmp.xxxx */ 4307 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 4308 &tmp_src_xxxx, inst->Instruction.Saturate); 4309 4310 free_temp_indexes(emit); 4311 4312 return TRUE; 4313 } 4314 4315 4316 /** 4317 * Emit code for TGSI_OPCODE_RSQ instruction. 4318 */ 4319 static boolean 4320 emit_rsq(struct svga_shader_emitter_v10 *emit, 4321 const struct tgsi_full_instruction *inst) 4322 { 4323 /* dst = RSQ(src): 4324 * dst.xyzw = 1 / sqrt(src.x) 4325 * Translates into: 4326 * RSQ tmp, src.x 4327 * MOV dst, tmp.xxxx 4328 */ 4329 4330 unsigned tmp = get_temp_index(emit); 4331 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4332 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4333 4334 struct tgsi_full_dst_register tmp_dst_x = 4335 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4336 struct tgsi_full_src_register tmp_src_xxxx = 4337 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4338 4339 /* RSQ tmp, src.x */ 4340 emit_instruction_op1(emit, VGPU10_OPCODE_RSQ, &tmp_dst_x, 4341 &inst->Src[0], FALSE); 4342 4343 /* MOV dst, tmp.xxxx */ 4344 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 4345 &tmp_src_xxxx, inst->Instruction.Saturate); 4346 4347 /* free tmp */ 4348 free_temp_indexes(emit); 4349 4350 return TRUE; 4351 } 4352 4353 4354 /** 4355 * Emit code for TGSI_OPCODE_SCS instruction. 4356 */ 4357 static boolean 4358 emit_scs(struct svga_shader_emitter_v10 *emit, 4359 const struct tgsi_full_instruction *inst) 4360 { 4361 /* dst.x = cos(src.x) 4362 * dst.y = sin(src.x) 4363 * dst.z = 0.0 4364 * dst.w = 1.0 4365 */ 4366 struct tgsi_full_dst_register dst_x = 4367 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); 4368 struct tgsi_full_dst_register dst_y = 4369 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); 4370 struct tgsi_full_dst_register dst_zw = 4371 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_ZW); 4372 4373 struct tgsi_full_src_register zero_one = 4374 make_immediate_reg_float4(emit, 0.0f, 0.0f, 0.0f, 1.0f); 4375 4376 begin_emit_instruction(emit); 4377 emit_opcode(emit, VGPU10_OPCODE_SINCOS, inst->Instruction.Saturate); 4378 emit_dst_register(emit, &dst_y); 4379 emit_dst_register(emit, &dst_x); 4380 emit_src_register(emit, &inst->Src[0]); 4381 end_emit_instruction(emit); 4382 4383 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 4384 &dst_zw, &zero_one, inst->Instruction.Saturate); 4385 4386 return TRUE; 4387 } 4388 4389 4390 /** 4391 * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction. 4392 */ 4393 static boolean 4394 emit_seq(struct svga_shader_emitter_v10 *emit, 4395 const struct tgsi_full_instruction *inst) 4396 { 4397 /* dst = SEQ(s0, s1): 4398 * dst = s0 == s1 ? 1.0 : 0.0 (per component) 4399 * Translates into: 4400 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) 4401 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4402 */ 4403 unsigned tmp = get_temp_index(emit); 4404 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4405 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4406 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4407 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4408 4409 /* EQ tmp, s0, s1 */ 4410 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0], 4411 &inst->Src[1], FALSE); 4412 4413 /* MOVC dst, tmp, one, zero */ 4414 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4415 &one, &zero, FALSE); 4416 4417 free_temp_indexes(emit); 4418 4419 return TRUE; 4420 } 4421 4422 4423 /** 4424 * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction. 4425 */ 4426 static boolean 4427 emit_sge(struct svga_shader_emitter_v10 *emit, 4428 const struct tgsi_full_instruction *inst) 4429 { 4430 /* dst = SGE(s0, s1): 4431 * dst = s0 >= s1 ? 1.0 : 0.0 (per component) 4432 * Translates into: 4433 * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp) 4434 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4435 */ 4436 unsigned tmp = get_temp_index(emit); 4437 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4438 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4439 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4440 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4441 4442 /* GE tmp, s0, s1 */ 4443 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0], 4444 &inst->Src[1], FALSE); 4445 4446 /* MOVC dst, tmp, one, zero */ 4447 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4448 &one, &zero, FALSE); 4449 4450 free_temp_indexes(emit); 4451 4452 return TRUE; 4453 } 4454 4455 4456 /** 4457 * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction. 4458 */ 4459 static boolean 4460 emit_sgt(struct svga_shader_emitter_v10 *emit, 4461 const struct tgsi_full_instruction *inst) 4462 { 4463 /* dst = SGT(s0, s1): 4464 * dst = s0 > s1 ? 1.0 : 0.0 (per component) 4465 * Translates into: 4466 * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp) 4467 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4468 */ 4469 unsigned tmp = get_temp_index(emit); 4470 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4471 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4472 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4473 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4474 4475 /* LT tmp, s1, s0 */ 4476 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1], 4477 &inst->Src[0], FALSE); 4478 4479 /* MOVC dst, tmp, one, zero */ 4480 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4481 &one, &zero, FALSE); 4482 4483 free_temp_indexes(emit); 4484 4485 return TRUE; 4486 } 4487 4488 4489 /** 4490 * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions. 4491 */ 4492 static boolean 4493 emit_sincos(struct svga_shader_emitter_v10 *emit, 4494 const struct tgsi_full_instruction *inst) 4495 { 4496 unsigned tmp = get_temp_index(emit); 4497 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4498 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4499 4500 struct tgsi_full_src_register tmp_src_xxxx = 4501 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 4502 struct tgsi_full_dst_register tmp_dst_x = 4503 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 4504 4505 begin_emit_instruction(emit); 4506 emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE); 4507 4508 if(inst->Instruction.Opcode == TGSI_OPCODE_SIN) 4509 { 4510 emit_dst_register(emit, &tmp_dst_x); /* first destination register */ 4511 emit_null_dst_register(emit); /* second destination register */ 4512 } 4513 else { 4514 emit_null_dst_register(emit); 4515 emit_dst_register(emit, &tmp_dst_x); 4516 } 4517 4518 emit_src_register(emit, &inst->Src[0]); 4519 end_emit_instruction(emit); 4520 4521 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 4522 &tmp_src_xxxx, inst->Instruction.Saturate); 4523 4524 free_temp_indexes(emit); 4525 4526 return TRUE; 4527 } 4528 4529 4530 /** 4531 * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction. 4532 */ 4533 static boolean 4534 emit_sle(struct svga_shader_emitter_v10 *emit, 4535 const struct tgsi_full_instruction *inst) 4536 { 4537 /* dst = SLE(s0, s1): 4538 * dst = s0 <= s1 ? 1.0 : 0.0 (per component) 4539 * Translates into: 4540 * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp) 4541 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4542 */ 4543 unsigned tmp = get_temp_index(emit); 4544 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4545 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4546 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4547 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4548 4549 /* GE tmp, s1, s0 */ 4550 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1], 4551 &inst->Src[0], FALSE); 4552 4553 /* MOVC dst, tmp, one, zero */ 4554 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4555 &one, &zero, FALSE); 4556 4557 free_temp_indexes(emit); 4558 4559 return TRUE; 4560 } 4561 4562 4563 /** 4564 * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction. 4565 */ 4566 static boolean 4567 emit_slt(struct svga_shader_emitter_v10 *emit, 4568 const struct tgsi_full_instruction *inst) 4569 { 4570 /* dst = SLT(s0, s1): 4571 * dst = s0 < s1 ? 1.0 : 0.0 (per component) 4572 * Translates into: 4573 * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp) 4574 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4575 */ 4576 unsigned tmp = get_temp_index(emit); 4577 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4578 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4579 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4580 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4581 4582 /* LT tmp, s0, s1 */ 4583 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], 4584 &inst->Src[1], FALSE); 4585 4586 /* MOVC dst, tmp, one, zero */ 4587 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4588 &one, &zero, FALSE); 4589 4590 free_temp_indexes(emit); 4591 4592 return TRUE; 4593 } 4594 4595 4596 /** 4597 * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction. 4598 */ 4599 static boolean 4600 emit_sne(struct svga_shader_emitter_v10 *emit, 4601 const struct tgsi_full_instruction *inst) 4602 { 4603 /* dst = SNE(s0, s1): 4604 * dst = s0 != s1 ? 1.0 : 0.0 (per component) 4605 * Translates into: 4606 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) 4607 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 4608 */ 4609 unsigned tmp = get_temp_index(emit); 4610 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4611 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4612 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4613 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 4614 4615 /* NE tmp, s0, s1 */ 4616 emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0], 4617 &inst->Src[1], FALSE); 4618 4619 /* MOVC dst, tmp, one, zero */ 4620 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 4621 &one, &zero, FALSE); 4622 4623 free_temp_indexes(emit); 4624 4625 return TRUE; 4626 } 4627 4628 4629 /** 4630 * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction. 4631 */ 4632 static boolean 4633 emit_ssg(struct svga_shader_emitter_v10 *emit, 4634 const struct tgsi_full_instruction *inst) 4635 { 4636 /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0 4637 * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0 4638 * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0 4639 * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0 4640 * Translates into: 4641 * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp) 4642 * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component) 4643 * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp) 4644 * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component) 4645 */ 4646 struct tgsi_full_src_register zero = 4647 make_immediate_reg_float(emit, 0.0f); 4648 struct tgsi_full_src_register one = 4649 make_immediate_reg_float(emit, 1.0f); 4650 struct tgsi_full_src_register neg_one = 4651 make_immediate_reg_float(emit, -1.0f); 4652 4653 unsigned tmp1 = get_temp_index(emit); 4654 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 4655 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 4656 4657 unsigned tmp2 = get_temp_index(emit); 4658 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 4659 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 4660 4661 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0], 4662 &zero, FALSE); 4663 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src, 4664 &neg_one, &zero, FALSE); 4665 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero, 4666 &inst->Src[0], FALSE); 4667 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src, 4668 &one, &tmp2_src, FALSE); 4669 4670 free_temp_indexes(emit); 4671 4672 return TRUE; 4673 } 4674 4675 4676 /** 4677 * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction. 4678 */ 4679 static boolean 4680 emit_issg(struct svga_shader_emitter_v10 *emit, 4681 const struct tgsi_full_instruction *inst) 4682 { 4683 /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0 4684 * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0 4685 * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0 4686 * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0 4687 * Translates into: 4688 * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component) 4689 * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component) 4690 * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component) 4691 */ 4692 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 4693 4694 unsigned tmp1 = get_temp_index(emit); 4695 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 4696 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 4697 4698 unsigned tmp2 = get_temp_index(emit); 4699 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 4700 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 4701 4702 struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src); 4703 4704 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst, 4705 &inst->Src[0], &zero, FALSE); 4706 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst, 4707 &zero, &inst->Src[0], FALSE); 4708 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0], 4709 &tmp1_src, &neg_tmp2, FALSE); 4710 4711 free_temp_indexes(emit); 4712 4713 return TRUE; 4714 } 4715 4716 4717 /** 4718 * Emit a comparison instruction. The dest register will get 4719 * 0 or ~0 values depending on the outcome of comparing src0 to src1. 4720 */ 4721 static void 4722 emit_comparison(struct svga_shader_emitter_v10 *emit, 4723 SVGA3dCmpFunc func, 4724 const struct tgsi_full_dst_register *dst, 4725 const struct tgsi_full_src_register *src0, 4726 const struct tgsi_full_src_register *src1) 4727 { 4728 struct tgsi_full_src_register immediate; 4729 VGPU10OpcodeToken0 opcode0; 4730 boolean swapSrc = FALSE; 4731 4732 /* Sanity checks for svga vs. gallium enums */ 4733 STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1)); 4734 STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1)); 4735 4736 opcode0.value = 0; 4737 4738 switch (func) { 4739 case SVGA3D_CMP_NEVER: 4740 immediate = make_immediate_reg_int(emit, 0); 4741 /* MOV dst, {0} */ 4742 begin_emit_instruction(emit); 4743 emit_dword(emit, VGPU10_OPCODE_MOV); 4744 emit_dst_register(emit, dst); 4745 emit_src_register(emit, &immediate); 4746 end_emit_instruction(emit); 4747 return; 4748 case SVGA3D_CMP_ALWAYS: 4749 immediate = make_immediate_reg_int(emit, -1); 4750 /* MOV dst, {-1} */ 4751 begin_emit_instruction(emit); 4752 emit_dword(emit, VGPU10_OPCODE_MOV); 4753 emit_dst_register(emit, dst); 4754 emit_src_register(emit, &immediate); 4755 end_emit_instruction(emit); 4756 return; 4757 case SVGA3D_CMP_LESS: 4758 opcode0.opcodeType = VGPU10_OPCODE_LT; 4759 break; 4760 case SVGA3D_CMP_EQUAL: 4761 opcode0.opcodeType = VGPU10_OPCODE_EQ; 4762 break; 4763 case SVGA3D_CMP_LESSEQUAL: 4764 opcode0.opcodeType = VGPU10_OPCODE_GE; 4765 swapSrc = TRUE; 4766 break; 4767 case SVGA3D_CMP_GREATER: 4768 opcode0.opcodeType = VGPU10_OPCODE_LT; 4769 swapSrc = TRUE; 4770 break; 4771 case SVGA3D_CMP_NOTEQUAL: 4772 opcode0.opcodeType = VGPU10_OPCODE_NE; 4773 break; 4774 case SVGA3D_CMP_GREATEREQUAL: 4775 opcode0.opcodeType = VGPU10_OPCODE_GE; 4776 break; 4777 default: 4778 assert(!"Unexpected comparison mode"); 4779 opcode0.opcodeType = VGPU10_OPCODE_EQ; 4780 } 4781 4782 begin_emit_instruction(emit); 4783 emit_dword(emit, opcode0.value); 4784 emit_dst_register(emit, dst); 4785 if (swapSrc) { 4786 emit_src_register(emit, src1); 4787 emit_src_register(emit, src0); 4788 } 4789 else { 4790 emit_src_register(emit, src0); 4791 emit_src_register(emit, src1); 4792 } 4793 end_emit_instruction(emit); 4794 } 4795 4796 4797 /** 4798 * Get texel/address offsets for a texture instruction. 4799 */ 4800 static void 4801 get_texel_offsets(const struct svga_shader_emitter_v10 *emit, 4802 const struct tgsi_full_instruction *inst, int offsets[3]) 4803 { 4804 if (inst->Texture.NumOffsets == 1) { 4805 /* According to OpenGL Shader Language spec the offsets are only 4806 * fetched from a previously-declared immediate/literal. 4807 */ 4808 const struct tgsi_texture_offset *off = inst->TexOffsets; 4809 const unsigned index = off[0].Index; 4810 const unsigned swizzleX = off[0].SwizzleX; 4811 const unsigned swizzleY = off[0].SwizzleY; 4812 const unsigned swizzleZ = off[0].SwizzleZ; 4813 const union tgsi_immediate_data *imm = emit->immediates[index]; 4814 4815 assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE); 4816 4817 offsets[0] = imm[swizzleX].Int; 4818 offsets[1] = imm[swizzleY].Int; 4819 offsets[2] = imm[swizzleZ].Int; 4820 } 4821 else { 4822 offsets[0] = offsets[1] = offsets[2] = 0; 4823 } 4824 } 4825 4826 4827 /** 4828 * Set up the coordinate register for texture sampling. 4829 * When we're sampling from a RECT texture we have to scale the 4830 * unnormalized coordinate to a normalized coordinate. 4831 * We do that by multiplying the coordinate by an "extra" constant. 4832 * An alternative would be to use the RESINFO instruction to query the 4833 * texture's size. 4834 */ 4835 static struct tgsi_full_src_register 4836 setup_texcoord(struct svga_shader_emitter_v10 *emit, 4837 unsigned unit, 4838 const struct tgsi_full_src_register *coord) 4839 { 4840 if (emit->key.tex[unit].unnormalized) { 4841 unsigned scale_index = emit->texcoord_scale_index[unit]; 4842 unsigned tmp = get_temp_index(emit); 4843 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 4844 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 4845 struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index); 4846 4847 /* MUL tmp, coord, const[] */ 4848 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, 4849 coord, &scale_src, FALSE); 4850 return tmp_src; 4851 } 4852 else { 4853 /* use texcoord as-is */ 4854 return *coord; 4855 } 4856 } 4857 4858 4859 /** 4860 * For SAMPLE_C instructions, emit the extra src register which indicates 4861 * the reference/comparision value. 4862 */ 4863 static void 4864 emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit, 4865 unsigned target, 4866 const struct tgsi_full_src_register *coord) 4867 { 4868 struct tgsi_full_src_register coord_src_ref; 4869 unsigned component; 4870 4871 assert(tgsi_is_shadow_target(target)); 4872 4873 assert(target != TGSI_TEXTURE_SHADOWCUBE_ARRAY); /* XXX not implemented */ 4874 if (target == TGSI_TEXTURE_SHADOW2D_ARRAY || 4875 target == TGSI_TEXTURE_SHADOWCUBE) 4876 component = TGSI_SWIZZLE_W; 4877 else 4878 component = TGSI_SWIZZLE_Z; 4879 4880 coord_src_ref = scalar_src(coord, component); 4881 4882 emit_src_register(emit, &coord_src_ref); 4883 } 4884 4885 4886 /** 4887 * Info for implementing texture swizzles. 4888 * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle() 4889 * functions use this to encapsulate the extra steps needed to perform 4890 * a texture swizzle, or shadow/depth comparisons. 4891 * The shadow/depth comparison is only done here if for the cases where 4892 * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare). 4893 */ 4894 struct tex_swizzle_info 4895 { 4896 boolean swizzled; 4897 boolean shadow_compare; 4898 unsigned unit; 4899 unsigned texture_target; /**< TGSI_TEXTURE_x */ 4900 struct tgsi_full_src_register tmp_src; 4901 struct tgsi_full_dst_register tmp_dst; 4902 const struct tgsi_full_dst_register *inst_dst; 4903 const struct tgsi_full_src_register *coord_src; 4904 }; 4905 4906 4907 /** 4908 * Do setup for handling texture swizzles or shadow compares. 4909 * \param unit the texture unit 4910 * \param inst the TGSI texture instruction 4911 * \param shadow_compare do shadow/depth comparison? 4912 * \param swz returns the swizzle info 4913 */ 4914 static void 4915 begin_tex_swizzle(struct svga_shader_emitter_v10 *emit, 4916 unsigned unit, 4917 const struct tgsi_full_instruction *inst, 4918 boolean shadow_compare, 4919 struct tex_swizzle_info *swz) 4920 { 4921 swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X || 4922 emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y || 4923 emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z || 4924 emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W); 4925 4926 swz->shadow_compare = shadow_compare; 4927 swz->texture_target = inst->Texture.Texture; 4928 4929 if (swz->swizzled || shadow_compare) { 4930 /* Allocate temp register for the result of the SAMPLE instruction 4931 * and the source of the MOV/compare/swizzle instructions. 4932 */ 4933 unsigned tmp = get_temp_index(emit); 4934 swz->tmp_src = make_src_temp_reg(tmp); 4935 swz->tmp_dst = make_dst_temp_reg(tmp); 4936 4937 swz->unit = unit; 4938 } 4939 swz->inst_dst = &inst->Dst[0]; 4940 swz->coord_src = &inst->Src[0]; 4941 } 4942 4943 4944 /** 4945 * Returns the register to put the SAMPLE instruction results into. 4946 * This will either be the original instruction dst reg (if no swizzle 4947 * and no shadow comparison) or a temporary reg if there is a swizzle. 4948 */ 4949 static const struct tgsi_full_dst_register * 4950 get_tex_swizzle_dst(const struct tex_swizzle_info *swz) 4951 { 4952 return (swz->swizzled || swz->shadow_compare) 4953 ? &swz->tmp_dst : swz->inst_dst; 4954 } 4955 4956 4957 /** 4958 * This emits the MOV instruction that actually implements a texture swizzle 4959 * and/or shadow comparison. 4960 */ 4961 static void 4962 end_tex_swizzle(struct svga_shader_emitter_v10 *emit, 4963 const struct tex_swizzle_info *swz) 4964 { 4965 if (swz->shadow_compare) { 4966 /* Emit extra instructions to compare the fetched texel value against 4967 * a texture coordinate component. The result of the comparison 4968 * is 0.0 or 1.0. 4969 */ 4970 struct tgsi_full_src_register coord_src; 4971 struct tgsi_full_src_register texel_src = 4972 scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X); 4973 struct tgsi_full_src_register one = 4974 make_immediate_reg_float(emit, 1.0f); 4975 /* convert gallium comparison func to SVGA comparison func */ 4976 SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1; 4977 4978 assert(emit->unit == PIPE_SHADER_FRAGMENT); 4979 4980 switch (swz->texture_target) { 4981 case TGSI_TEXTURE_SHADOW2D: 4982 case TGSI_TEXTURE_SHADOWRECT: 4983 case TGSI_TEXTURE_SHADOW1D_ARRAY: 4984 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z); 4985 break; 4986 case TGSI_TEXTURE_SHADOW1D: 4987 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Y); 4988 break; 4989 case TGSI_TEXTURE_SHADOWCUBE: 4990 case TGSI_TEXTURE_SHADOW2D_ARRAY: 4991 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_W); 4992 break; 4993 default: 4994 assert(!"Unexpected texture target in end_tex_swizzle()"); 4995 coord_src = scalar_src(swz->coord_src, TGSI_SWIZZLE_Z); 4996 } 4997 4998 /* COMPARE tmp, coord, texel */ 4999 /* XXX it would seem that the texel and coord arguments should 5000 * be transposed here, but piglit tests indicate otherwise. 5001 */ 5002 emit_comparison(emit, compare_func, 5003 &swz->tmp_dst, &texel_src, &coord_src); 5004 5005 /* AND dest, tmp, {1.0} */ 5006 begin_emit_instruction(emit); 5007 emit_opcode(emit, VGPU10_OPCODE_AND, FALSE); 5008 if (swz->swizzled) { 5009 emit_dst_register(emit, &swz->tmp_dst); 5010 } 5011 else { 5012 emit_dst_register(emit, swz->inst_dst); 5013 } 5014 emit_src_register(emit, &swz->tmp_src); 5015 emit_src_register(emit, &one); 5016 end_emit_instruction(emit); 5017 } 5018 5019 if (swz->swizzled) { 5020 unsigned swz_r = emit->key.tex[swz->unit].swizzle_r; 5021 unsigned swz_g = emit->key.tex[swz->unit].swizzle_g; 5022 unsigned swz_b = emit->key.tex[swz->unit].swizzle_b; 5023 unsigned swz_a = emit->key.tex[swz->unit].swizzle_a; 5024 unsigned writemask_0 = 0, writemask_1 = 0; 5025 boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]); 5026 5027 /* Swizzle w/out zero/one terms */ 5028 struct tgsi_full_src_register src_swizzled = 5029 swizzle_src(&swz->tmp_src, 5030 swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X, 5031 swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y, 5032 swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z, 5033 swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W); 5034 5035 /* MOV dst, color(tmp).<swizzle> */ 5036 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 5037 swz->inst_dst, &src_swizzled, FALSE); 5038 5039 /* handle swizzle zero terms */ 5040 writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) | 5041 ((swz_g == PIPE_SWIZZLE_0) << 1) | 5042 ((swz_b == PIPE_SWIZZLE_0) << 2) | 5043 ((swz_a == PIPE_SWIZZLE_0) << 3)); 5044 5045 if (writemask_0) { 5046 struct tgsi_full_src_register zero = int_tex ? 5047 make_immediate_reg_int(emit, 0) : 5048 make_immediate_reg_float(emit, 0.0f); 5049 struct tgsi_full_dst_register dst = 5050 writemask_dst(swz->inst_dst, writemask_0); 5051 5052 /* MOV dst.writemask_0, {0,0,0,0} */ 5053 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 5054 &dst, &zero, FALSE); 5055 } 5056 5057 /* handle swizzle one terms */ 5058 writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) | 5059 ((swz_g == PIPE_SWIZZLE_1) << 1) | 5060 ((swz_b == PIPE_SWIZZLE_1) << 2) | 5061 ((swz_a == PIPE_SWIZZLE_1) << 3)); 5062 5063 if (writemask_1) { 5064 struct tgsi_full_src_register one = int_tex ? 5065 make_immediate_reg_int(emit, 1) : 5066 make_immediate_reg_float(emit, 1.0f); 5067 struct tgsi_full_dst_register dst = 5068 writemask_dst(swz->inst_dst, writemask_1); 5069 5070 /* MOV dst.writemask_1, {1,1,1,1} */ 5071 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one, FALSE); 5072 } 5073 } 5074 } 5075 5076 5077 /** 5078 * Emit code for TGSI_OPCODE_SAMPLE instruction. 5079 */ 5080 static boolean 5081 emit_sample(struct svga_shader_emitter_v10 *emit, 5082 const struct tgsi_full_instruction *inst) 5083 { 5084 const unsigned resource_unit = inst->Src[1].Register.Index; 5085 const unsigned sampler_unit = inst->Src[2].Register.Index; 5086 struct tgsi_full_src_register coord; 5087 int offsets[3]; 5088 struct tex_swizzle_info swz_info; 5089 5090 begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info); 5091 5092 get_texel_offsets(emit, inst, offsets); 5093 5094 coord = setup_texcoord(emit, resource_unit, &inst->Src[0]); 5095 5096 /* SAMPLE dst, coord(s0), resource, sampler */ 5097 begin_emit_instruction(emit); 5098 5099 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE, 5100 inst->Instruction.Saturate, offsets); 5101 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5102 emit_src_register(emit, &coord); 5103 emit_resource_register(emit, resource_unit); 5104 emit_sampler_register(emit, sampler_unit); 5105 end_emit_instruction(emit); 5106 5107 end_tex_swizzle(emit, &swz_info); 5108 5109 free_temp_indexes(emit); 5110 5111 return TRUE; 5112 } 5113 5114 5115 /** 5116 * Check if a texture instruction is valid. 5117 * An example of an invalid texture instruction is doing shadow comparison 5118 * with an integer-valued texture. 5119 * If we detect an invalid texture instruction, we replace it with: 5120 * MOV dst, {1,1,1,1}; 5121 * \return TRUE if valid, FALSE if invalid. 5122 */ 5123 static boolean 5124 is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit, 5125 const struct tgsi_full_instruction *inst) 5126 { 5127 const unsigned unit = inst->Src[1].Register.Index; 5128 const unsigned target = inst->Texture.Texture; 5129 boolean valid = TRUE; 5130 5131 if (tgsi_is_shadow_target(target) && 5132 is_integer_type(emit->sampler_return_type[unit])) { 5133 debug_printf("Invalid SAMPLE_C with an integer texture!\n"); 5134 valid = FALSE; 5135 } 5136 /* XXX might check for other conditions in the future here */ 5137 5138 if (!valid) { 5139 /* emit a MOV dst, {1,1,1,1} instruction. */ 5140 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 5141 begin_emit_instruction(emit); 5142 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 5143 emit_dst_register(emit, &inst->Dst[0]); 5144 emit_src_register(emit, &one); 5145 end_emit_instruction(emit); 5146 } 5147 5148 return valid; 5149 } 5150 5151 5152 /** 5153 * Emit code for TGSI_OPCODE_TEX (simple texture lookup) 5154 */ 5155 static boolean 5156 emit_tex(struct svga_shader_emitter_v10 *emit, 5157 const struct tgsi_full_instruction *inst) 5158 { 5159 const uint unit = inst->Src[1].Register.Index; 5160 unsigned target = inst->Texture.Texture; 5161 unsigned opcode; 5162 struct tgsi_full_src_register coord; 5163 int offsets[3]; 5164 struct tex_swizzle_info swz_info; 5165 5166 /* check that the sampler returns a float */ 5167 if (!is_valid_tex_instruction(emit, inst)) 5168 return TRUE; 5169 5170 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 5171 5172 get_texel_offsets(emit, inst, offsets); 5173 5174 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5175 5176 /* SAMPLE dst, coord(s0), resource, sampler */ 5177 begin_emit_instruction(emit); 5178 5179 if (tgsi_is_shadow_target(target)) 5180 opcode = VGPU10_OPCODE_SAMPLE_C; 5181 else 5182 opcode = VGPU10_OPCODE_SAMPLE; 5183 5184 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 5185 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5186 emit_src_register(emit, &coord); 5187 emit_resource_register(emit, unit); 5188 emit_sampler_register(emit, unit); 5189 if (opcode == VGPU10_OPCODE_SAMPLE_C) { 5190 emit_tex_compare_refcoord(emit, target, &coord); 5191 } 5192 end_emit_instruction(emit); 5193 5194 end_tex_swizzle(emit, &swz_info); 5195 5196 free_temp_indexes(emit); 5197 5198 return TRUE; 5199 } 5200 5201 5202 /** 5203 * Emit code for TGSI_OPCODE_TXP (projective texture) 5204 */ 5205 static boolean 5206 emit_txp(struct svga_shader_emitter_v10 *emit, 5207 const struct tgsi_full_instruction *inst) 5208 { 5209 const uint unit = inst->Src[1].Register.Index; 5210 unsigned target = inst->Texture.Texture; 5211 unsigned opcode; 5212 int offsets[3]; 5213 unsigned tmp = get_temp_index(emit); 5214 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 5215 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 5216 struct tgsi_full_src_register src0_wwww = 5217 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 5218 struct tgsi_full_src_register coord; 5219 struct tex_swizzle_info swz_info; 5220 5221 /* check that the sampler returns a float */ 5222 if (!is_valid_tex_instruction(emit, inst)) 5223 return TRUE; 5224 5225 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 5226 5227 get_texel_offsets(emit, inst, offsets); 5228 5229 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5230 5231 /* DIV tmp, coord, coord.wwww */ 5232 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst, 5233 &coord, &src0_wwww, FALSE); 5234 5235 /* SAMPLE dst, coord(tmp), resource, sampler */ 5236 begin_emit_instruction(emit); 5237 5238 if (tgsi_is_shadow_target(target)) 5239 opcode = VGPU10_OPCODE_SAMPLE_C; 5240 else 5241 opcode = VGPU10_OPCODE_SAMPLE; 5242 5243 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 5244 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5245 emit_src_register(emit, &tmp_src); /* projected coord */ 5246 emit_resource_register(emit, unit); 5247 emit_sampler_register(emit, unit); 5248 if (opcode == VGPU10_OPCODE_SAMPLE_C) { 5249 emit_tex_compare_refcoord(emit, target, &tmp_src); 5250 } 5251 end_emit_instruction(emit); 5252 5253 end_tex_swizzle(emit, &swz_info); 5254 5255 free_temp_indexes(emit); 5256 5257 return TRUE; 5258 } 5259 5260 5261 /* 5262 * Emit code for TGSI_OPCODE_XPD instruction. 5263 */ 5264 static boolean 5265 emit_xpd(struct svga_shader_emitter_v10 *emit, 5266 const struct tgsi_full_instruction *inst) 5267 { 5268 /* dst.x = src0.y * src1.z - src1.y * src0.z 5269 * dst.y = src0.z * src1.x - src1.z * src0.x 5270 * dst.z = src0.x * src1.y - src1.x * src0.y 5271 * dst.w = 1 5272 */ 5273 struct tgsi_full_src_register s0_xxxx = 5274 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 5275 struct tgsi_full_src_register s0_yyyy = 5276 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 5277 struct tgsi_full_src_register s0_zzzz = 5278 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z); 5279 5280 struct tgsi_full_src_register s1_xxxx = 5281 scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 5282 struct tgsi_full_src_register s1_yyyy = 5283 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); 5284 struct tgsi_full_src_register s1_zzzz = 5285 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Z); 5286 5287 unsigned tmp1 = get_temp_index(emit); 5288 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 5289 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 5290 5291 unsigned tmp2 = get_temp_index(emit); 5292 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 5293 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 5294 struct tgsi_full_src_register neg_tmp2_src = negate_src(&tmp2_src); 5295 5296 unsigned tmp3 = get_temp_index(emit); 5297 struct tgsi_full_src_register tmp3_src = make_src_temp_reg(tmp3); 5298 struct tgsi_full_dst_register tmp3_dst = make_dst_temp_reg(tmp3); 5299 struct tgsi_full_dst_register tmp3_dst_x = 5300 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_X); 5301 struct tgsi_full_dst_register tmp3_dst_y = 5302 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Y); 5303 struct tgsi_full_dst_register tmp3_dst_z = 5304 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_Z); 5305 struct tgsi_full_dst_register tmp3_dst_w = 5306 writemask_dst(&tmp3_dst, TGSI_WRITEMASK_W); 5307 5308 /* Note: we put all the intermediate computations into tmp3 in case 5309 * the XPD dest register is that same as one of the src regs (in which 5310 * case we could clobber a src reg before we're done with it) . 5311 * 5312 * Note: we could get by with just one temp register instead of three 5313 * since we're doing scalar operations and there's enough room in one 5314 * temp for everything. 5315 */ 5316 5317 /* MUL tmp1, src0.y, src1.z */ 5318 /* MUL tmp2, src1.y, src0.z */ 5319 /* ADD tmp3.x, tmp1, -tmp2 */ 5320 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 5321 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, 5322 &s0_yyyy, &s1_zzzz, FALSE); 5323 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, 5324 &s1_yyyy, &s0_zzzz, FALSE); 5325 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_x, 5326 &tmp1_src, &neg_tmp2_src, FALSE); 5327 } 5328 5329 /* MUL tmp1, src0.z, src1.x */ 5330 /* MUL tmp2, src1.z, src0.x */ 5331 /* ADD tmp3.y, tmp1, -tmp2 */ 5332 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 5333 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_zzzz, 5334 &s1_xxxx, FALSE); 5335 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_zzzz, 5336 &s0_xxxx, FALSE); 5337 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_y, 5338 &tmp1_src, &neg_tmp2_src, FALSE); 5339 } 5340 5341 /* MUL tmp1, src0.x, src1.y */ 5342 /* MUL tmp2, src1.x, src0.y */ 5343 /* ADD tmp3.z, tmp1, -tmp2 */ 5344 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 5345 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &s0_xxxx, 5346 &s1_yyyy, FALSE); 5347 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp2_dst, &s1_xxxx, 5348 &s0_yyyy, FALSE); 5349 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp3_dst_z, 5350 &tmp1_src, &neg_tmp2_src, FALSE); 5351 } 5352 5353 /* MOV tmp3.w, 1.0 */ 5354 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 5355 struct tgsi_full_src_register one = 5356 make_immediate_reg_float(emit, 1.0f); 5357 5358 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &tmp3_dst_w, &one, FALSE); 5359 } 5360 5361 /* MOV dst, tmp3 */ 5362 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &tmp3_src, 5363 inst->Instruction.Saturate); 5364 5365 5366 free_temp_indexes(emit); 5367 5368 return TRUE; 5369 } 5370 5371 5372 /** 5373 * Emit code for TGSI_OPCODE_TXD (explicit derivatives) 5374 */ 5375 static boolean 5376 emit_txd(struct svga_shader_emitter_v10 *emit, 5377 const struct tgsi_full_instruction *inst) 5378 { 5379 const uint unit = inst->Src[3].Register.Index; 5380 unsigned target = inst->Texture.Texture; 5381 int offsets[3]; 5382 struct tgsi_full_src_register coord; 5383 struct tex_swizzle_info swz_info; 5384 5385 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), 5386 &swz_info); 5387 5388 get_texel_offsets(emit, inst, offsets); 5389 5390 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5391 5392 /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */ 5393 begin_emit_instruction(emit); 5394 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D, 5395 inst->Instruction.Saturate, offsets); 5396 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5397 emit_src_register(emit, &coord); 5398 emit_resource_register(emit, unit); 5399 emit_sampler_register(emit, unit); 5400 emit_src_register(emit, &inst->Src[1]); /* Xderiv */ 5401 emit_src_register(emit, &inst->Src[2]); /* Yderiv */ 5402 end_emit_instruction(emit); 5403 5404 end_tex_swizzle(emit, &swz_info); 5405 5406 free_temp_indexes(emit); 5407 5408 return TRUE; 5409 } 5410 5411 5412 /** 5413 * Emit code for TGSI_OPCODE_TXF (texel fetch) 5414 */ 5415 static boolean 5416 emit_txf(struct svga_shader_emitter_v10 *emit, 5417 const struct tgsi_full_instruction *inst) 5418 { 5419 const uint unit = inst->Src[1].Register.Index; 5420 const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture); 5421 int offsets[3]; 5422 struct tex_swizzle_info swz_info; 5423 5424 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 5425 5426 get_texel_offsets(emit, inst, offsets); 5427 5428 if (msaa) { 5429 /* Fetch one sample from an MSAA texture */ 5430 struct tgsi_full_src_register sampleIndex = 5431 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 5432 /* LD_MS dst, coord(s0), resource, sampleIndex */ 5433 begin_emit_instruction(emit); 5434 emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS, 5435 inst->Instruction.Saturate, offsets); 5436 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5437 emit_src_register(emit, &inst->Src[0]); 5438 emit_resource_register(emit, unit); 5439 emit_src_register(emit, &sampleIndex); 5440 end_emit_instruction(emit); 5441 } 5442 else { 5443 /* Fetch one texel specified by integer coordinate */ 5444 /* LD dst, coord(s0), resource */ 5445 begin_emit_instruction(emit); 5446 emit_sample_opcode(emit, VGPU10_OPCODE_LD, 5447 inst->Instruction.Saturate, offsets); 5448 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5449 emit_src_register(emit, &inst->Src[0]); 5450 emit_resource_register(emit, unit); 5451 end_emit_instruction(emit); 5452 } 5453 5454 end_tex_swizzle(emit, &swz_info); 5455 5456 free_temp_indexes(emit); 5457 5458 return TRUE; 5459 } 5460 5461 5462 /** 5463 * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias) 5464 * or TGSI_OPCODE_TXB2 (for cube shadow maps). 5465 */ 5466 static boolean 5467 emit_txl_txb(struct svga_shader_emitter_v10 *emit, 5468 const struct tgsi_full_instruction *inst) 5469 { 5470 unsigned target = inst->Texture.Texture; 5471 unsigned opcode, unit; 5472 int offsets[3]; 5473 struct tgsi_full_src_register coord, lod_bias; 5474 struct tex_swizzle_info swz_info; 5475 5476 assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL || 5477 inst->Instruction.Opcode == TGSI_OPCODE_TXB || 5478 inst->Instruction.Opcode == TGSI_OPCODE_TXB2); 5479 5480 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) { 5481 lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 5482 unit = inst->Src[2].Register.Index; 5483 } 5484 else { 5485 lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 5486 unit = inst->Src[1].Register.Index; 5487 } 5488 5489 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), 5490 &swz_info); 5491 5492 get_texel_offsets(emit, inst, offsets); 5493 5494 coord = setup_texcoord(emit, unit, &inst->Src[0]); 5495 5496 /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */ 5497 begin_emit_instruction(emit); 5498 if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) { 5499 opcode = VGPU10_OPCODE_SAMPLE_L; 5500 } 5501 else { 5502 opcode = VGPU10_OPCODE_SAMPLE_B; 5503 } 5504 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 5505 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 5506 emit_src_register(emit, &coord); 5507 emit_resource_register(emit, unit); 5508 emit_sampler_register(emit, unit); 5509 emit_src_register(emit, &lod_bias); 5510 end_emit_instruction(emit); 5511 5512 end_tex_swizzle(emit, &swz_info); 5513 5514 free_temp_indexes(emit); 5515 5516 return TRUE; 5517 } 5518 5519 5520 /** 5521 * Emit code for TGSI_OPCODE_TXQ (texture query) instruction. 5522 */ 5523 static boolean 5524 emit_txq(struct svga_shader_emitter_v10 *emit, 5525 const struct tgsi_full_instruction *inst) 5526 { 5527 const uint unit = inst->Src[1].Register.Index; 5528 5529 if (emit->sampler_target[unit] == TGSI_TEXTURE_BUFFER) { 5530 /* RESINFO does not support querying texture buffers, so we instead 5531 * store texture buffer sizes in shader constants, then copy them to 5532 * implement TXQ instead of emitting RESINFO. 5533 * MOV dst, const[texture_buffer_size_index[unit]] 5534 */ 5535 struct tgsi_full_src_register size_src = 5536 make_src_const_reg(emit->texture_buffer_size_index[unit]); 5537 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src, 5538 FALSE); 5539 } else { 5540 /* RESINFO dst, srcMipLevel, resource */ 5541 begin_emit_instruction(emit); 5542 emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT); 5543 emit_dst_register(emit, &inst->Dst[0]); 5544 emit_src_register(emit, &inst->Src[0]); 5545 emit_resource_register(emit, unit); 5546 end_emit_instruction(emit); 5547 } 5548 5549 free_temp_indexes(emit); 5550 5551 return TRUE; 5552 } 5553 5554 5555 /** 5556 * Emit a simple instruction (like ADD, MUL, MIN, etc). 5557 */ 5558 static boolean 5559 emit_simple(struct svga_shader_emitter_v10 *emit, 5560 const struct tgsi_full_instruction *inst) 5561 { 5562 const unsigned opcode = inst->Instruction.Opcode; 5563 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); 5564 unsigned i; 5565 5566 begin_emit_instruction(emit); 5567 emit_opcode(emit, translate_opcode(inst->Instruction.Opcode), 5568 inst->Instruction.Saturate); 5569 for (i = 0; i < op->num_dst; i++) { 5570 emit_dst_register(emit, &inst->Dst[i]); 5571 } 5572 for (i = 0; i < op->num_src; i++) { 5573 emit_src_register(emit, &inst->Src[i]); 5574 } 5575 end_emit_instruction(emit); 5576 5577 return TRUE; 5578 } 5579 5580 5581 /** 5582 * We only special case the MOV instruction to try to detect constant 5583 * color writes in the fragment shader. 5584 */ 5585 static boolean 5586 emit_mov(struct svga_shader_emitter_v10 *emit, 5587 const struct tgsi_full_instruction *inst) 5588 { 5589 const struct tgsi_full_src_register *src = &inst->Src[0]; 5590 const struct tgsi_full_dst_register *dst = &inst->Dst[0]; 5591 5592 if (emit->unit == PIPE_SHADER_FRAGMENT && 5593 dst->Register.File == TGSI_FILE_OUTPUT && 5594 dst->Register.Index == 0 && 5595 src->Register.File == TGSI_FILE_CONSTANT && 5596 !src->Register.Indirect) { 5597 emit->constant_color_output = TRUE; 5598 } 5599 5600 return emit_simple(emit, inst); 5601 } 5602 5603 5604 /** 5605 * Emit a simple VGPU10 instruction which writes to multiple dest registers, 5606 * where TGSI only uses one dest register. 5607 */ 5608 static boolean 5609 emit_simple_1dst(struct svga_shader_emitter_v10 *emit, 5610 const struct tgsi_full_instruction *inst, 5611 unsigned dst_count, 5612 unsigned dst_index) 5613 { 5614 const unsigned opcode = inst->Instruction.Opcode; 5615 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); 5616 unsigned i; 5617 5618 begin_emit_instruction(emit); 5619 emit_opcode(emit, translate_opcode(inst->Instruction.Opcode), 5620 inst->Instruction.Saturate); 5621 5622 for (i = 0; i < dst_count; i++) { 5623 if (i == dst_index) { 5624 emit_dst_register(emit, &inst->Dst[0]); 5625 } else { 5626 emit_null_dst_register(emit); 5627 } 5628 } 5629 5630 for (i = 0; i < op->num_src; i++) { 5631 emit_src_register(emit, &inst->Src[i]); 5632 } 5633 end_emit_instruction(emit); 5634 5635 return TRUE; 5636 } 5637 5638 5639 /** 5640 * Translate a single TGSI instruction to VGPU10. 5641 */ 5642 static boolean 5643 emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, 5644 unsigned inst_number, 5645 const struct tgsi_full_instruction *inst) 5646 { 5647 const unsigned opcode = inst->Instruction.Opcode; 5648 5649 switch (opcode) { 5650 case TGSI_OPCODE_ADD: 5651 case TGSI_OPCODE_AND: 5652 case TGSI_OPCODE_BGNLOOP: 5653 case TGSI_OPCODE_BRK: 5654 case TGSI_OPCODE_CEIL: 5655 case TGSI_OPCODE_CONT: 5656 case TGSI_OPCODE_DDX: 5657 case TGSI_OPCODE_DDY: 5658 case TGSI_OPCODE_DIV: 5659 case TGSI_OPCODE_DP2: 5660 case TGSI_OPCODE_DP3: 5661 case TGSI_OPCODE_DP4: 5662 case TGSI_OPCODE_ELSE: 5663 case TGSI_OPCODE_ENDIF: 5664 case TGSI_OPCODE_ENDLOOP: 5665 case TGSI_OPCODE_ENDSUB: 5666 case TGSI_OPCODE_F2I: 5667 case TGSI_OPCODE_F2U: 5668 case TGSI_OPCODE_FLR: 5669 case TGSI_OPCODE_FRC: 5670 case TGSI_OPCODE_FSEQ: 5671 case TGSI_OPCODE_FSGE: 5672 case TGSI_OPCODE_FSLT: 5673 case TGSI_OPCODE_FSNE: 5674 case TGSI_OPCODE_I2F: 5675 case TGSI_OPCODE_IMAX: 5676 case TGSI_OPCODE_IMIN: 5677 case TGSI_OPCODE_INEG: 5678 case TGSI_OPCODE_ISGE: 5679 case TGSI_OPCODE_ISHR: 5680 case TGSI_OPCODE_ISLT: 5681 case TGSI_OPCODE_MAD: 5682 case TGSI_OPCODE_MAX: 5683 case TGSI_OPCODE_MIN: 5684 case TGSI_OPCODE_MUL: 5685 case TGSI_OPCODE_NOP: 5686 case TGSI_OPCODE_NOT: 5687 case TGSI_OPCODE_OR: 5688 case TGSI_OPCODE_RET: 5689 case TGSI_OPCODE_UADD: 5690 case TGSI_OPCODE_USEQ: 5691 case TGSI_OPCODE_USGE: 5692 case TGSI_OPCODE_USLT: 5693 case TGSI_OPCODE_UMIN: 5694 case TGSI_OPCODE_UMAD: 5695 case TGSI_OPCODE_UMAX: 5696 case TGSI_OPCODE_ROUND: 5697 case TGSI_OPCODE_SQRT: 5698 case TGSI_OPCODE_SHL: 5699 case TGSI_OPCODE_TRUNC: 5700 case TGSI_OPCODE_U2F: 5701 case TGSI_OPCODE_UCMP: 5702 case TGSI_OPCODE_USHR: 5703 case TGSI_OPCODE_USNE: 5704 case TGSI_OPCODE_XOR: 5705 /* simple instructions */ 5706 return emit_simple(emit, inst); 5707 5708 case TGSI_OPCODE_MOV: 5709 return emit_mov(emit, inst); 5710 case TGSI_OPCODE_EMIT: 5711 return emit_vertex(emit, inst); 5712 case TGSI_OPCODE_ENDPRIM: 5713 return emit_endprim(emit, inst); 5714 case TGSI_OPCODE_IABS: 5715 return emit_iabs(emit, inst); 5716 case TGSI_OPCODE_ARL: 5717 /* fall-through */ 5718 case TGSI_OPCODE_UARL: 5719 return emit_arl_uarl(emit, inst); 5720 case TGSI_OPCODE_BGNSUB: 5721 /* no-op */ 5722 return TRUE; 5723 case TGSI_OPCODE_CAL: 5724 return emit_cal(emit, inst); 5725 case TGSI_OPCODE_CMP: 5726 return emit_cmp(emit, inst); 5727 case TGSI_OPCODE_COS: 5728 return emit_sincos(emit, inst); 5729 case TGSI_OPCODE_DP2A: 5730 return emit_dp2a(emit, inst); 5731 case TGSI_OPCODE_DPH: 5732 return emit_dph(emit, inst); 5733 case TGSI_OPCODE_DST: 5734 return emit_dst(emit, inst); 5735 case TGSI_OPCODE_EX2: 5736 return emit_ex2(emit, inst); 5737 case TGSI_OPCODE_EXP: 5738 return emit_exp(emit, inst); 5739 case TGSI_OPCODE_IF: 5740 return emit_if(emit, inst); 5741 case TGSI_OPCODE_KILL: 5742 return emit_kill(emit, inst); 5743 case TGSI_OPCODE_KILL_IF: 5744 return emit_kill_if(emit, inst); 5745 case TGSI_OPCODE_LG2: 5746 return emit_lg2(emit, inst); 5747 case TGSI_OPCODE_LIT: 5748 return emit_lit(emit, inst); 5749 case TGSI_OPCODE_LOG: 5750 return emit_log(emit, inst); 5751 case TGSI_OPCODE_LRP: 5752 return emit_lrp(emit, inst); 5753 case TGSI_OPCODE_POW: 5754 return emit_pow(emit, inst); 5755 case TGSI_OPCODE_RCP: 5756 return emit_rcp(emit, inst); 5757 case TGSI_OPCODE_RSQ: 5758 return emit_rsq(emit, inst); 5759 case TGSI_OPCODE_SAMPLE: 5760 return emit_sample(emit, inst); 5761 case TGSI_OPCODE_SCS: 5762 return emit_scs(emit, inst); 5763 case TGSI_OPCODE_SEQ: 5764 return emit_seq(emit, inst); 5765 case TGSI_OPCODE_SGE: 5766 return emit_sge(emit, inst); 5767 case TGSI_OPCODE_SGT: 5768 return emit_sgt(emit, inst); 5769 case TGSI_OPCODE_SIN: 5770 return emit_sincos(emit, inst); 5771 case TGSI_OPCODE_SLE: 5772 return emit_sle(emit, inst); 5773 case TGSI_OPCODE_SLT: 5774 return emit_slt(emit, inst); 5775 case TGSI_OPCODE_SNE: 5776 return emit_sne(emit, inst); 5777 case TGSI_OPCODE_SSG: 5778 return emit_ssg(emit, inst); 5779 case TGSI_OPCODE_ISSG: 5780 return emit_issg(emit, inst); 5781 case TGSI_OPCODE_TEX: 5782 return emit_tex(emit, inst); 5783 case TGSI_OPCODE_TXP: 5784 return emit_txp(emit, inst); 5785 case TGSI_OPCODE_TXB: 5786 case TGSI_OPCODE_TXB2: 5787 case TGSI_OPCODE_TXL: 5788 return emit_txl_txb(emit, inst); 5789 case TGSI_OPCODE_TXD: 5790 return emit_txd(emit, inst); 5791 case TGSI_OPCODE_TXF: 5792 return emit_txf(emit, inst); 5793 case TGSI_OPCODE_TXQ: 5794 return emit_txq(emit, inst); 5795 case TGSI_OPCODE_UIF: 5796 return emit_if(emit, inst); 5797 case TGSI_OPCODE_XPD: 5798 return emit_xpd(emit, inst); 5799 case TGSI_OPCODE_UMUL_HI: 5800 case TGSI_OPCODE_IMUL_HI: 5801 case TGSI_OPCODE_UDIV: 5802 case TGSI_OPCODE_IDIV: 5803 /* These cases use only the FIRST of two destination registers */ 5804 return emit_simple_1dst(emit, inst, 2, 0); 5805 case TGSI_OPCODE_UMUL: 5806 case TGSI_OPCODE_UMOD: 5807 case TGSI_OPCODE_MOD: 5808 /* These cases use only the SECOND of two destination registers */ 5809 return emit_simple_1dst(emit, inst, 2, 1); 5810 case TGSI_OPCODE_END: 5811 if (!emit_post_helpers(emit)) 5812 return FALSE; 5813 return emit_simple(emit, inst); 5814 5815 default: 5816 debug_printf("Unimplemented tgsi instruction %s\n", 5817 tgsi_get_opcode_name(opcode)); 5818 return FALSE; 5819 } 5820 5821 return TRUE; 5822 } 5823 5824 5825 /** 5826 * Emit the extra instructions to adjust the vertex position. 5827 * There are two possible adjustments: 5828 * 1. Converting from Gallium to VGPU10 coordinate space by applying the 5829 * "prescale" and "pretranslate" values. 5830 * 2. Undoing the viewport transformation when we use the swtnl/draw path. 5831 * \param vs_pos_tmp_index which temporary register contains the vertex pos. 5832 */ 5833 static void 5834 emit_vpos_instructions(struct svga_shader_emitter_v10 *emit, 5835 unsigned vs_pos_tmp_index) 5836 { 5837 struct tgsi_full_src_register tmp_pos_src; 5838 struct tgsi_full_dst_register pos_dst; 5839 5840 /* Don't bother to emit any extra vertex instructions if vertex position is 5841 * not written out 5842 */ 5843 if (emit->vposition.out_index == INVALID_INDEX) 5844 return; 5845 5846 tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index); 5847 pos_dst = make_dst_output_reg(emit->vposition.out_index); 5848 5849 /* If non-adjusted vertex position register index 5850 * is valid, copy the vertex position from the temporary 5851 * vertex position register before it is modified by the 5852 * prescale computation. 5853 */ 5854 if (emit->vposition.so_index != INVALID_INDEX) { 5855 struct tgsi_full_dst_register pos_so_dst = 5856 make_dst_output_reg(emit->vposition.so_index); 5857 5858 /* MOV pos_so, tmp_pos */ 5859 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, 5860 &tmp_pos_src, FALSE); 5861 } 5862 5863 if (emit->vposition.need_prescale) { 5864 /* This code adjusts the vertex position to match the VGPU10 convention. 5865 * If p is the position computed by the shader (usually by applying the 5866 * modelview and projection matrices), the new position q is computed by: 5867 * 5868 * q.x = p.w * trans.x + p.x * scale.x 5869 * q.y = p.w * trans.y + p.y * scale.y 5870 * q.z = p.w * trans.z + p.z * scale.z; 5871 * q.w = p.w * trans.w + p.w; 5872 */ 5873 struct tgsi_full_src_register tmp_pos_src_w = 5874 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); 5875 struct tgsi_full_dst_register tmp_pos_dst = 5876 make_dst_temp_reg(vs_pos_tmp_index); 5877 struct tgsi_full_dst_register tmp_pos_dst_xyz = 5878 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ); 5879 5880 struct tgsi_full_src_register prescale_scale = 5881 make_src_const_reg(emit->vposition.prescale_scale_index); 5882 struct tgsi_full_src_register prescale_trans = 5883 make_src_const_reg(emit->vposition.prescale_trans_index); 5884 5885 /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */ 5886 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz, 5887 &tmp_pos_src, &prescale_scale, FALSE); 5888 5889 /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */ 5890 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w, 5891 &prescale_trans, &tmp_pos_src, FALSE); 5892 } 5893 else if (emit->key.vs.undo_viewport) { 5894 /* This code computes the final vertex position from the temporary 5895 * vertex position by undoing the viewport transformation and the 5896 * divide-by-W operation (we convert window coords back to clip coords). 5897 * This is needed when we use the 'draw' module for fallbacks. 5898 * If p is the temp pos in window coords, then the NDC coord q is: 5899 * q.x = (p.x - vp.x_trans) / vp.x_scale * p.w 5900 * q.y = (p.y - vp.y_trans) / vp.y_scale * p.w 5901 * q.z = p.z * p.w 5902 * q.w = p.w 5903 * CONST[vs_viewport_index] contains: 5904 * { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans } 5905 */ 5906 struct tgsi_full_dst_register tmp_pos_dst = 5907 make_dst_temp_reg(vs_pos_tmp_index); 5908 struct tgsi_full_dst_register tmp_pos_dst_xy = 5909 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY); 5910 struct tgsi_full_src_register tmp_pos_src_wwww = 5911 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); 5912 5913 struct tgsi_full_dst_register pos_dst_xyz = 5914 writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ); 5915 struct tgsi_full_dst_register pos_dst_w = 5916 writemask_dst(&pos_dst, TGSI_WRITEMASK_W); 5917 5918 struct tgsi_full_src_register vp_xyzw = 5919 make_src_const_reg(emit->vs.viewport_index); 5920 struct tgsi_full_src_register vp_zwww = 5921 swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, 5922 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); 5923 5924 /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */ 5925 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy, 5926 &tmp_pos_src, &vp_zwww, FALSE); 5927 5928 /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */ 5929 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy, 5930 &tmp_pos_src, &vp_xyzw, FALSE); 5931 5932 /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */ 5933 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz, 5934 &tmp_pos_src, &tmp_pos_src_wwww, FALSE); 5935 5936 /* MOV pos.w, tmp_pos.w */ 5937 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, 5938 &tmp_pos_src, FALSE); 5939 } 5940 else if (vs_pos_tmp_index != INVALID_INDEX) { 5941 /* This code is to handle the case where the temporary vertex 5942 * position register is created when the vertex shader has stream 5943 * output and prescale is disabled because rasterization is to be 5944 * discarded. 5945 */ 5946 struct tgsi_full_dst_register pos_dst = 5947 make_dst_output_reg(emit->vposition.out_index); 5948 5949 /* MOV pos, tmp_pos */ 5950 begin_emit_instruction(emit); 5951 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 5952 emit_dst_register(emit, &pos_dst); 5953 emit_src_register(emit, &tmp_pos_src); 5954 end_emit_instruction(emit); 5955 } 5956 } 5957 5958 static void 5959 emit_clipping_instructions(struct svga_shader_emitter_v10 *emit) 5960 { 5961 if (emit->clip_mode == CLIP_DISTANCE) { 5962 /* Copy from copy distance temporary to CLIPDIST & the shadow copy */ 5963 emit_clip_distance_instructions(emit); 5964 5965 } else if (emit->clip_mode == CLIP_VERTEX) { 5966 /* Convert TGSI CLIPVERTEX to CLIPDIST */ 5967 emit_clip_vertex_instructions(emit); 5968 } 5969 5970 /** 5971 * Emit vertex position and take care of legacy user planes only if 5972 * there is a valid vertex position register index. 5973 * This is to take care of the case 5974 * where the shader doesn't output vertex position. Then in 5975 * this case, don't bother to emit more vertex instructions. 5976 */ 5977 if (emit->vposition.out_index == INVALID_INDEX) 5978 return; 5979 5980 /** 5981 * Emit per-vertex clipping instructions for legacy user defined clip planes. 5982 * NOTE: we must emit the clip distance instructions before the 5983 * emit_vpos_instructions() call since the later function will change 5984 * the TEMP[vs_pos_tmp_index] value. 5985 */ 5986 if (emit->clip_mode == CLIP_LEGACY) { 5987 /* Emit CLIPDIST for legacy user defined clip planes */ 5988 emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index); 5989 } 5990 } 5991 5992 5993 /** 5994 * Emit extra per-vertex instructions. This includes clip-coordinate 5995 * space conversion and computing clip distances. This is called for 5996 * each GS emit-vertex instruction and at the end of VS translation. 5997 */ 5998 static void 5999 emit_vertex_instructions(struct svga_shader_emitter_v10 *emit) 6000 { 6001 const unsigned vs_pos_tmp_index = emit->vposition.tmp_index; 6002 6003 /* Emit clipping instructions based on clipping mode */ 6004 emit_clipping_instructions(emit); 6005 6006 /** 6007 * Reset the temporary vertex position register index 6008 * so that emit_dst_register() will use the real vertex position output 6009 */ 6010 emit->vposition.tmp_index = INVALID_INDEX; 6011 6012 /* Emit vertex position instructions */ 6013 emit_vpos_instructions(emit, vs_pos_tmp_index); 6014 6015 /* Restore original vposition.tmp_index value for the next GS vertex. 6016 * It doesn't matter for VS. 6017 */ 6018 emit->vposition.tmp_index = vs_pos_tmp_index; 6019 } 6020 6021 /** 6022 * Translate the TGSI_OPCODE_EMIT GS instruction. 6023 */ 6024 static boolean 6025 emit_vertex(struct svga_shader_emitter_v10 *emit, 6026 const struct tgsi_full_instruction *inst) 6027 { 6028 unsigned ret = TRUE; 6029 6030 assert(emit->unit == PIPE_SHADER_GEOMETRY); 6031 6032 emit_vertex_instructions(emit); 6033 6034 /* We can't use emit_simple() because the TGSI instruction has one 6035 * operand (vertex stream number) which we must ignore for VGPU10. 6036 */ 6037 begin_emit_instruction(emit); 6038 emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE); 6039 end_emit_instruction(emit); 6040 6041 return ret; 6042 } 6043 6044 6045 /** 6046 * Emit the extra code to convert from VGPU10's boolean front-face 6047 * register to TGSI's signed front-face register. 6048 * 6049 * TODO: Make temporary front-face register a scalar. 6050 */ 6051 static void 6052 emit_frontface_instructions(struct svga_shader_emitter_v10 *emit) 6053 { 6054 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6055 6056 if (emit->fs.face_input_index != INVALID_INDEX) { 6057 /* convert vgpu10 boolean face register to gallium +/-1 value */ 6058 struct tgsi_full_dst_register tmp_dst = 6059 make_dst_temp_reg(emit->fs.face_tmp_index); 6060 struct tgsi_full_src_register one = 6061 make_immediate_reg_float(emit, 1.0f); 6062 struct tgsi_full_src_register neg_one = 6063 make_immediate_reg_float(emit, -1.0f); 6064 6065 /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */ 6066 begin_emit_instruction(emit); 6067 emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE); 6068 emit_dst_register(emit, &tmp_dst); 6069 emit_face_register(emit); 6070 emit_src_register(emit, &one); 6071 emit_src_register(emit, &neg_one); 6072 end_emit_instruction(emit); 6073 } 6074 } 6075 6076 6077 /** 6078 * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w. 6079 */ 6080 static void 6081 emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit) 6082 { 6083 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6084 6085 if (emit->fs.fragcoord_input_index != INVALID_INDEX) { 6086 struct tgsi_full_dst_register tmp_dst = 6087 make_dst_temp_reg(emit->fs.fragcoord_tmp_index); 6088 struct tgsi_full_dst_register tmp_dst_xyz = 6089 writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ); 6090 struct tgsi_full_dst_register tmp_dst_w = 6091 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); 6092 struct tgsi_full_src_register one = 6093 make_immediate_reg_float(emit, 1.0f); 6094 struct tgsi_full_src_register fragcoord = 6095 make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index); 6096 6097 /* save the input index */ 6098 unsigned fragcoord_input_index = emit->fs.fragcoord_input_index; 6099 /* set to invalid to prevent substitution in emit_src_register() */ 6100 emit->fs.fragcoord_input_index = INVALID_INDEX; 6101 6102 /* MOV fragcoord_tmp.xyz, fragcoord.xyz */ 6103 begin_emit_instruction(emit); 6104 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 6105 emit_dst_register(emit, &tmp_dst_xyz); 6106 emit_src_register(emit, &fragcoord); 6107 end_emit_instruction(emit); 6108 6109 /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */ 6110 begin_emit_instruction(emit); 6111 emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE); 6112 emit_dst_register(emit, &tmp_dst_w); 6113 emit_src_register(emit, &one); 6114 emit_src_register(emit, &fragcoord); 6115 end_emit_instruction(emit); 6116 6117 /* restore saved value */ 6118 emit->fs.fragcoord_input_index = fragcoord_input_index; 6119 } 6120 } 6121 6122 6123 /** 6124 * Emit extra instructions to adjust VS inputs/attributes. This can 6125 * mean casting a vertex attribute from int to float or setting the 6126 * W component to 1, or both. 6127 */ 6128 static void 6129 emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit) 6130 { 6131 const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1; 6132 const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof; 6133 const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof; 6134 const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra; 6135 const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm; 6136 const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled; 6137 const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled; 6138 6139 unsigned adjust_mask = (save_w_1_mask | 6140 save_itof_mask | 6141 save_utof_mask | 6142 save_is_bgra_mask | 6143 save_puint_to_snorm_mask | 6144 save_puint_to_uscaled_mask | 6145 save_puint_to_sscaled_mask); 6146 6147 assert(emit->unit == PIPE_SHADER_VERTEX); 6148 6149 if (adjust_mask) { 6150 struct tgsi_full_src_register one = 6151 make_immediate_reg_float(emit, 1.0f); 6152 6153 struct tgsi_full_src_register one_int = 6154 make_immediate_reg_int(emit, 1); 6155 6156 /* We need to turn off these bitmasks while emitting the 6157 * instructions below, then restore them afterward. 6158 */ 6159 emit->key.vs.adjust_attrib_w_1 = 0; 6160 emit->key.vs.adjust_attrib_itof = 0; 6161 emit->key.vs.adjust_attrib_utof = 0; 6162 emit->key.vs.attrib_is_bgra = 0; 6163 emit->key.vs.attrib_puint_to_snorm = 0; 6164 emit->key.vs.attrib_puint_to_uscaled = 0; 6165 emit->key.vs.attrib_puint_to_sscaled = 0; 6166 6167 while (adjust_mask) { 6168 unsigned index = u_bit_scan(&adjust_mask); 6169 6170 /* skip the instruction if this vertex attribute is not being used */ 6171 if (emit->info.input_usage_mask[index] == 0) 6172 continue; 6173 6174 unsigned tmp = emit->vs.adjusted_input[index]; 6175 struct tgsi_full_src_register input_src = 6176 make_src_reg(TGSI_FILE_INPUT, index); 6177 6178 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 6179 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 6180 struct tgsi_full_dst_register tmp_dst_w = 6181 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); 6182 6183 /* ITOF/UTOF/MOV tmp, input[index] */ 6184 if (save_itof_mask & (1 << index)) { 6185 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, 6186 &tmp_dst, &input_src, FALSE); 6187 } 6188 else if (save_utof_mask & (1 << index)) { 6189 emit_instruction_op1(emit, VGPU10_OPCODE_UTOF, 6190 &tmp_dst, &input_src, FALSE); 6191 } 6192 else if (save_puint_to_snorm_mask & (1 << index)) { 6193 emit_puint_to_snorm(emit, &tmp_dst, &input_src); 6194 } 6195 else if (save_puint_to_uscaled_mask & (1 << index)) { 6196 emit_puint_to_uscaled(emit, &tmp_dst, &input_src); 6197 } 6198 else if (save_puint_to_sscaled_mask & (1 << index)) { 6199 emit_puint_to_sscaled(emit, &tmp_dst, &input_src); 6200 } 6201 else { 6202 assert((save_w_1_mask | save_is_bgra_mask) & (1 << index)); 6203 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6204 &tmp_dst, &input_src, FALSE); 6205 } 6206 6207 if (save_is_bgra_mask & (1 << index)) { 6208 emit_swap_r_b(emit, &tmp_dst, &tmp_src); 6209 } 6210 6211 if (save_w_1_mask & (1 << index)) { 6212 /* MOV tmp.w, 1.0 */ 6213 if (emit->key.vs.attrib_is_pure_int & (1 << index)) { 6214 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6215 &tmp_dst_w, &one_int, FALSE); 6216 } 6217 else { 6218 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6219 &tmp_dst_w, &one, FALSE); 6220 } 6221 } 6222 } 6223 6224 emit->key.vs.adjust_attrib_w_1 = save_w_1_mask; 6225 emit->key.vs.adjust_attrib_itof = save_itof_mask; 6226 emit->key.vs.adjust_attrib_utof = save_utof_mask; 6227 emit->key.vs.attrib_is_bgra = save_is_bgra_mask; 6228 emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask; 6229 emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask; 6230 emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask; 6231 } 6232 } 6233 6234 6235 /** 6236 * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed 6237 * to implement some instructions. We pre-allocate those values here 6238 * in the immediate constant buffer. 6239 */ 6240 static void 6241 alloc_common_immediates(struct svga_shader_emitter_v10 *emit) 6242 { 6243 unsigned n = 0; 6244 6245 emit->common_immediate_pos[n++] = 6246 alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f); 6247 6248 emit->common_immediate_pos[n++] = 6249 alloc_immediate_float4(emit, 128.0f, -128.0f, 2.0f, 3.0f); 6250 6251 emit->common_immediate_pos[n++] = 6252 alloc_immediate_int4(emit, 0, 1, 0, -1); 6253 6254 if (emit->key.vs.attrib_puint_to_snorm) { 6255 emit->common_immediate_pos[n++] = 6256 alloc_immediate_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f); 6257 } 6258 6259 if (emit->key.vs.attrib_puint_to_uscaled) { 6260 emit->common_immediate_pos[n++] = 6261 alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f); 6262 } 6263 6264 if (emit->key.vs.attrib_puint_to_sscaled) { 6265 emit->common_immediate_pos[n++] = 6266 alloc_immediate_int4(emit, 22, 12, 2, 0); 6267 6268 emit->common_immediate_pos[n++] = 6269 alloc_immediate_int4(emit, 22, 30, 0, 0); 6270 } 6271 6272 assert(n <= ARRAY_SIZE(emit->common_immediate_pos)); 6273 emit->num_common_immediates = n; 6274 } 6275 6276 6277 /** 6278 * Emit any extra/helper declarations/code that we might need between 6279 * the declaration section and code section. 6280 */ 6281 static boolean 6282 emit_pre_helpers(struct svga_shader_emitter_v10 *emit) 6283 { 6284 /* Properties */ 6285 if (emit->unit == PIPE_SHADER_GEOMETRY) 6286 emit_property_instructions(emit); 6287 6288 /* Declare inputs */ 6289 if (!emit_input_declarations(emit)) 6290 return FALSE; 6291 6292 /* Declare outputs */ 6293 if (!emit_output_declarations(emit)) 6294 return FALSE; 6295 6296 /* Declare temporary registers */ 6297 emit_temporaries_declaration(emit); 6298 6299 /* Declare constant registers */ 6300 emit_constant_declaration(emit); 6301 6302 /* Declare samplers and resources */ 6303 emit_sampler_declarations(emit); 6304 emit_resource_declarations(emit); 6305 6306 /* Declare clip distance output registers */ 6307 if (emit->unit == PIPE_SHADER_VERTEX || 6308 emit->unit == PIPE_SHADER_GEOMETRY) { 6309 emit_clip_distance_declarations(emit); 6310 } 6311 6312 alloc_common_immediates(emit); 6313 6314 if (emit->unit == PIPE_SHADER_FRAGMENT && 6315 emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { 6316 float alpha = emit->key.fs.alpha_ref; 6317 emit->fs.alpha_ref_index = 6318 alloc_immediate_float4(emit, alpha, alpha, alpha, alpha); 6319 } 6320 6321 /* Now, emit the constant block containing all the immediates 6322 * declared by shader, as well as the extra ones seen above. 6323 */ 6324 emit_vgpu10_immediates_block(emit); 6325 6326 if (emit->unit == PIPE_SHADER_FRAGMENT) { 6327 emit_frontface_instructions(emit); 6328 emit_fragcoord_instructions(emit); 6329 } 6330 else if (emit->unit == PIPE_SHADER_VERTEX) { 6331 emit_vertex_attrib_instructions(emit); 6332 } 6333 6334 return TRUE; 6335 } 6336 6337 6338 /** 6339 * Emit alpha test code. This compares TEMP[fs_color_tmp_index].w 6340 * against the alpha reference value and discards the fragment if the 6341 * comparison fails. 6342 */ 6343 static void 6344 emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit, 6345 unsigned fs_color_tmp_index) 6346 { 6347 /* compare output color's alpha to alpha ref and kill */ 6348 unsigned tmp = get_temp_index(emit); 6349 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 6350 struct tgsi_full_src_register tmp_src_x = 6351 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 6352 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 6353 struct tgsi_full_src_register color_src = 6354 make_src_temp_reg(fs_color_tmp_index); 6355 struct tgsi_full_src_register color_src_w = 6356 scalar_src(&color_src, TGSI_SWIZZLE_W); 6357 struct tgsi_full_src_register ref_src = 6358 make_src_immediate_reg(emit->fs.alpha_ref_index); 6359 struct tgsi_full_dst_register color_dst = 6360 make_dst_output_reg(emit->fs.color_out_index[0]); 6361 6362 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6363 6364 /* dst = src0 'alpha_func' src1 */ 6365 emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst, 6366 &color_src_w, &ref_src); 6367 6368 /* DISCARD if dst.x == 0 */ 6369 begin_emit_instruction(emit); 6370 emit_discard_opcode(emit, FALSE); /* discard if src0.x is zero */ 6371 emit_src_register(emit, &tmp_src_x); 6372 end_emit_instruction(emit); 6373 6374 /* If we don't need to broadcast the color below or set fragments to 6375 * white, emit final color here. 6376 */ 6377 if (emit->key.fs.write_color0_to_n_cbufs <= 1 && 6378 !emit->key.fs.white_fragments) { 6379 /* MOV output.color, tempcolor */ 6380 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, 6381 &color_src, FALSE); /* XXX saturate? */ 6382 } 6383 6384 free_temp_indexes(emit); 6385 } 6386 6387 6388 /** 6389 * When we need to emit white for all fragments (for emulating XOR logicop 6390 * mode), this function copies white into the temporary color output register. 6391 */ 6392 static void 6393 emit_set_color_white(struct svga_shader_emitter_v10 *emit, 6394 unsigned fs_color_tmp_index) 6395 { 6396 struct tgsi_full_dst_register color_dst = 6397 make_dst_temp_reg(fs_color_tmp_index); 6398 struct tgsi_full_src_register white = 6399 make_immediate_reg_float(emit, 1.0f); 6400 6401 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &white, FALSE); 6402 } 6403 6404 6405 /** 6406 * Emit instructions for writing a single color output to multiple 6407 * color buffers. 6408 * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or 6409 * when key.fs.white_fragments is true). 6410 * property is set and the number of render targets is greater than one. 6411 * \param fs_color_tmp_index index of the temp register that holds the 6412 * color to broadcast. 6413 */ 6414 static void 6415 emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit, 6416 unsigned fs_color_tmp_index) 6417 { 6418 const unsigned n = emit->key.fs.write_color0_to_n_cbufs; 6419 unsigned i; 6420 struct tgsi_full_src_register color_src = 6421 make_src_temp_reg(fs_color_tmp_index); 6422 6423 assert(emit->unit == PIPE_SHADER_FRAGMENT); 6424 6425 for (i = 0; i < n; i++) { 6426 unsigned output_reg = emit->fs.color_out_index[i]; 6427 struct tgsi_full_dst_register color_dst = 6428 make_dst_output_reg(output_reg); 6429 6430 /* Fill in this semantic here since we'll use it later in 6431 * emit_dst_register(). 6432 */ 6433 emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR; 6434 6435 /* MOV output.color[i], tempcolor */ 6436 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, 6437 &color_src, FALSE); /* XXX saturate? */ 6438 } 6439 } 6440 6441 6442 /** 6443 * Emit extra helper code after the original shader code, but before the 6444 * last END/RET instruction. 6445 * For vertex shaders this means emitting the extra code to apply the 6446 * prescale scale/translation. 6447 */ 6448 static boolean 6449 emit_post_helpers(struct svga_shader_emitter_v10 *emit) 6450 { 6451 if (emit->unit == PIPE_SHADER_VERTEX) { 6452 emit_vertex_instructions(emit); 6453 } 6454 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 6455 const unsigned fs_color_tmp_index = emit->fs.color_tmp_index; 6456 6457 /* We no longer want emit_dst_register() to substitute the 6458 * temporary fragment color register for the real color output. 6459 */ 6460 emit->fs.color_tmp_index = INVALID_INDEX; 6461 6462 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { 6463 emit_alpha_test_instructions(emit, fs_color_tmp_index); 6464 } 6465 if (emit->key.fs.white_fragments) { 6466 emit_set_color_white(emit, fs_color_tmp_index); 6467 } 6468 if (emit->key.fs.write_color0_to_n_cbufs > 1 || 6469 emit->key.fs.white_fragments) { 6470 emit_broadcast_color_instructions(emit, fs_color_tmp_index); 6471 } 6472 } 6473 6474 return TRUE; 6475 } 6476 6477 6478 /** 6479 * Translate the TGSI tokens into VGPU10 tokens. 6480 */ 6481 static boolean 6482 emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit, 6483 const struct tgsi_token *tokens) 6484 { 6485 struct tgsi_parse_context parse; 6486 boolean ret = TRUE; 6487 boolean pre_helpers_emitted = FALSE; 6488 unsigned inst_number = 0; 6489 6490 tgsi_parse_init(&parse, tokens); 6491 6492 while (!tgsi_parse_end_of_tokens(&parse)) { 6493 tgsi_parse_token(&parse); 6494 6495 switch (parse.FullToken.Token.Type) { 6496 case TGSI_TOKEN_TYPE_IMMEDIATE: 6497 ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate); 6498 if (!ret) 6499 goto done; 6500 break; 6501 6502 case TGSI_TOKEN_TYPE_DECLARATION: 6503 ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration); 6504 if (!ret) 6505 goto done; 6506 break; 6507 6508 case TGSI_TOKEN_TYPE_INSTRUCTION: 6509 if (!pre_helpers_emitted) { 6510 ret = emit_pre_helpers(emit); 6511 if (!ret) 6512 goto done; 6513 pre_helpers_emitted = TRUE; 6514 } 6515 ret = emit_vgpu10_instruction(emit, inst_number++, 6516 &parse.FullToken.FullInstruction); 6517 if (!ret) 6518 goto done; 6519 break; 6520 6521 case TGSI_TOKEN_TYPE_PROPERTY: 6522 ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty); 6523 if (!ret) 6524 goto done; 6525 break; 6526 6527 default: 6528 break; 6529 } 6530 } 6531 6532 done: 6533 tgsi_parse_free(&parse); 6534 return ret; 6535 } 6536 6537 6538 /** 6539 * Emit the first VGPU10 shader tokens. 6540 */ 6541 static boolean 6542 emit_vgpu10_header(struct svga_shader_emitter_v10 *emit) 6543 { 6544 VGPU10ProgramToken ptoken; 6545 6546 /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */ 6547 ptoken.majorVersion = 4; 6548 ptoken.minorVersion = 0; 6549 ptoken.programType = translate_shader_type(emit->unit); 6550 if (!emit_dword(emit, ptoken.value)) 6551 return FALSE; 6552 6553 /* Second token: total length of shader, in tokens. We can't fill this 6554 * in until we're all done. Emit zero for now. 6555 */ 6556 return emit_dword(emit, 0); 6557 } 6558 6559 6560 static boolean 6561 emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit) 6562 { 6563 VGPU10ProgramToken *tokens; 6564 6565 /* Replace the second token with total shader length */ 6566 tokens = (VGPU10ProgramToken *) emit->buf; 6567 tokens[1].value = emit_get_num_tokens(emit); 6568 6569 return TRUE; 6570 } 6571 6572 6573 /** 6574 * Modify the FS to read the BCOLORs and use the FACE register 6575 * to choose between the front/back colors. 6576 */ 6577 static const struct tgsi_token * 6578 transform_fs_twoside(const struct tgsi_token *tokens) 6579 { 6580 if (0) { 6581 debug_printf("Before tgsi_add_two_side ------------------\n"); 6582 tgsi_dump(tokens,0); 6583 } 6584 tokens = tgsi_add_two_side(tokens); 6585 if (0) { 6586 debug_printf("After tgsi_add_two_side ------------------\n"); 6587 tgsi_dump(tokens, 0); 6588 } 6589 return tokens; 6590 } 6591 6592 6593 /** 6594 * Modify the FS to do polygon stipple. 6595 */ 6596 static const struct tgsi_token * 6597 transform_fs_pstipple(struct svga_shader_emitter_v10 *emit, 6598 const struct tgsi_token *tokens) 6599 { 6600 const struct tgsi_token *new_tokens; 6601 unsigned unit; 6602 6603 if (0) { 6604 debug_printf("Before pstipple ------------------\n"); 6605 tgsi_dump(tokens,0); 6606 } 6607 6608 new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0, 6609 TGSI_FILE_INPUT); 6610 6611 emit->fs.pstipple_sampler_unit = unit; 6612 6613 /* Setup texture state for stipple */ 6614 emit->sampler_target[unit] = TGSI_TEXTURE_2D; 6615 emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; 6616 emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y; 6617 emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; 6618 emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W; 6619 6620 if (0) { 6621 debug_printf("After pstipple ------------------\n"); 6622 tgsi_dump(new_tokens, 0); 6623 } 6624 6625 return new_tokens; 6626 } 6627 6628 /** 6629 * Modify the FS to support anti-aliasing point. 6630 */ 6631 static const struct tgsi_token * 6632 transform_fs_aapoint(const struct tgsi_token *tokens, 6633 int aa_coord_index) 6634 { 6635 if (0) { 6636 debug_printf("Before tgsi_add_aa_point ------------------\n"); 6637 tgsi_dump(tokens,0); 6638 } 6639 tokens = tgsi_add_aa_point(tokens, aa_coord_index); 6640 if (0) { 6641 debug_printf("After tgsi_add_aa_point ------------------\n"); 6642 tgsi_dump(tokens, 0); 6643 } 6644 return tokens; 6645 } 6646 6647 /** 6648 * This is the main entrypoint for the TGSI -> VPGU10 translator. 6649 */ 6650 struct svga_shader_variant * 6651 svga_tgsi_vgpu10_translate(struct svga_context *svga, 6652 const struct svga_shader *shader, 6653 const struct svga_compile_key *key, 6654 unsigned unit) 6655 { 6656 struct svga_shader_variant *variant = NULL; 6657 struct svga_shader_emitter_v10 *emit; 6658 const struct tgsi_token *tokens = shader->tokens; 6659 struct svga_vertex_shader *vs = svga->curr.vs; 6660 struct svga_geometry_shader *gs = svga->curr.gs; 6661 6662 assert(unit == PIPE_SHADER_VERTEX || 6663 unit == PIPE_SHADER_GEOMETRY || 6664 unit == PIPE_SHADER_FRAGMENT); 6665 6666 /* These two flags cannot be used together */ 6667 assert(key->vs.need_prescale + key->vs.undo_viewport <= 1); 6668 6669 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE); 6670 /* 6671 * Setup the code emitter 6672 */ 6673 emit = alloc_emitter(); 6674 if (!emit) 6675 goto done; 6676 6677 emit->unit = unit; 6678 emit->key = *key; 6679 6680 emit->vposition.need_prescale = (emit->key.vs.need_prescale || 6681 emit->key.gs.need_prescale); 6682 emit->vposition.tmp_index = INVALID_INDEX; 6683 emit->vposition.so_index = INVALID_INDEX; 6684 emit->vposition.out_index = INVALID_INDEX; 6685 6686 emit->fs.color_tmp_index = INVALID_INDEX; 6687 emit->fs.face_input_index = INVALID_INDEX; 6688 emit->fs.fragcoord_input_index = INVALID_INDEX; 6689 6690 emit->gs.prim_id_index = INVALID_INDEX; 6691 6692 emit->clip_dist_out_index = INVALID_INDEX; 6693 emit->clip_dist_tmp_index = INVALID_INDEX; 6694 emit->clip_dist_so_index = INVALID_INDEX; 6695 emit->clip_vertex_out_index = INVALID_INDEX; 6696 6697 if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) { 6698 emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS; 6699 } 6700 6701 if (unit == PIPE_SHADER_FRAGMENT) { 6702 if (key->fs.light_twoside) { 6703 tokens = transform_fs_twoside(tokens); 6704 } 6705 if (key->fs.pstipple) { 6706 const struct tgsi_token *new_tokens = 6707 transform_fs_pstipple(emit, tokens); 6708 if (tokens != shader->tokens) { 6709 /* free the two-sided shader tokens */ 6710 tgsi_free_tokens(tokens); 6711 } 6712 tokens = new_tokens; 6713 } 6714 if (key->fs.aa_point) { 6715 tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index); 6716 } 6717 } 6718 6719 if (SVGA_DEBUG & DEBUG_TGSI) { 6720 debug_printf("#####################################\n"); 6721 debug_printf("### TGSI Shader %u\n", shader->id); 6722 tgsi_dump(tokens, 0); 6723 } 6724 6725 /** 6726 * Rescan the header if the token string is different from the one 6727 * included in the shader; otherwise, the header info is already up-to-date 6728 */ 6729 if (tokens != shader->tokens) { 6730 tgsi_scan_shader(tokens, &emit->info); 6731 } else { 6732 emit->info = shader->info; 6733 } 6734 6735 emit->num_outputs = emit->info.num_outputs; 6736 6737 if (unit == PIPE_SHADER_FRAGMENT) { 6738 /* Compute FS input remapping to match the output from VS/GS */ 6739 if (gs) { 6740 svga_link_shaders(&gs->base.info, &emit->info, &emit->linkage); 6741 } else { 6742 assert(vs); 6743 svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage); 6744 } 6745 } else if (unit == PIPE_SHADER_GEOMETRY) { 6746 assert(vs); 6747 svga_link_shaders(&vs->base.info, &emit->info, &emit->linkage); 6748 } 6749 6750 determine_clipping_mode(emit); 6751 6752 if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX) { 6753 if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) { 6754 /* if there is stream output declarations associated 6755 * with this shader or the shader writes to ClipDistance 6756 * then reserve extra registers for the non-adjusted vertex position 6757 * and the ClipDistance shadow copy 6758 */ 6759 emit->vposition.so_index = emit->num_outputs++; 6760 6761 if (emit->clip_mode == CLIP_DISTANCE) { 6762 emit->clip_dist_so_index = emit->num_outputs++; 6763 if (emit->info.num_written_clipdistance > 4) 6764 emit->num_outputs++; 6765 } 6766 } 6767 } 6768 6769 /* 6770 * Do actual shader translation. 6771 */ 6772 if (!emit_vgpu10_header(emit)) { 6773 debug_printf("svga: emit VGPU10 header failed\n"); 6774 goto cleanup; 6775 } 6776 6777 if (!emit_vgpu10_instructions(emit, tokens)) { 6778 debug_printf("svga: emit VGPU10 instructions failed\n"); 6779 goto cleanup; 6780 } 6781 6782 if (!emit_vgpu10_tail(emit)) { 6783 debug_printf("svga: emit VGPU10 tail failed\n"); 6784 goto cleanup; 6785 } 6786 6787 if (emit->register_overflow) { 6788 goto cleanup; 6789 } 6790 6791 /* 6792 * Create, initialize the 'variant' object. 6793 */ 6794 variant = svga_new_shader_variant(svga); 6795 if (!variant) 6796 goto cleanup; 6797 6798 variant->shader = shader; 6799 variant->nr_tokens = emit_get_num_tokens(emit); 6800 variant->tokens = (const unsigned *)emit->buf; 6801 emit->buf = NULL; /* buffer is no longer owed by emitter context */ 6802 memcpy(&variant->key, key, sizeof(*key)); 6803 variant->id = UTIL_BITMASK_INVALID_INDEX; 6804 6805 /* The extra constant starting offset starts with the number of 6806 * shader constants declared in the shader. 6807 */ 6808 variant->extra_const_start = emit->num_shader_consts[0]; 6809 if (key->gs.wide_point) { 6810 /** 6811 * The extra constant added in the transformed shader 6812 * for inverse viewport scale is to be supplied by the driver. 6813 * So the extra constant starting offset needs to be reduced by 1. 6814 */ 6815 assert(variant->extra_const_start > 0); 6816 variant->extra_const_start--; 6817 } 6818 6819 variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit; 6820 6821 /* If there was exactly one write to a fragment shader output register 6822 * and it came from a constant buffer, we know all fragments will have 6823 * the same color (except for blending). 6824 */ 6825 variant->constant_color_output = 6826 emit->constant_color_output && emit->num_output_writes == 1; 6827 6828 /** keep track in the variant if flat interpolation is used 6829 * for any of the varyings. 6830 */ 6831 variant->uses_flat_interp = emit->uses_flat_interp; 6832 6833 if (tokens != shader->tokens) { 6834 tgsi_free_tokens(tokens); 6835 } 6836 6837 cleanup: 6838 free_emitter(emit); 6839 6840 done: 6841 SVGA_STATS_TIME_POP(svga_sws(svga)); 6842 return variant; 6843 } 6844