1 /************************************************************************** 2 * 3 * Copyright 2007-2008 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 /* 29 * \author 30 * Michal Krol, 31 * Keith Whitwell 32 */ 33 34 #include "pipe/p_compiler.h" 35 #include "pipe/p_context.h" 36 #include "pipe/p_screen.h" 37 #include "pipe/p_shader_tokens.h" 38 #include "pipe/p_state.h" 39 #include "tgsi/tgsi_ureg.h" 40 #include "st_mesa_to_tgsi.h" 41 #include "st_context.h" 42 #include "program/prog_instruction.h" 43 #include "program/prog_parameter.h" 44 #include "util/u_debug.h" 45 #include "util/u_math.h" 46 #include "util/u_memory.h" 47 #include "st_glsl_to_tgsi.h" /* for _mesa_sysval_to_semantic */ 48 49 50 #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \ 51 (1 << PROGRAM_CONSTANT) | \ 52 (1 << PROGRAM_UNIFORM)) 53 54 /** 55 * Intermediate state used during shader translation. 56 */ 57 struct st_translate { 58 struct ureg_program *ureg; 59 60 struct ureg_dst temps[MAX_PROGRAM_TEMPS]; 61 struct ureg_src *constants; 62 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 63 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 64 struct ureg_dst address[1]; 65 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 66 struct ureg_src systemValues[SYSTEM_VALUE_MAX]; 67 68 const GLuint *inputMapping; 69 const GLuint *outputMapping; 70 71 unsigned procType; /**< PIPE_SHADER_VERTEX/FRAGMENT */ 72 }; 73 74 75 /** 76 * Map a Mesa dst register to a TGSI ureg_dst register. 77 */ 78 static struct ureg_dst 79 dst_register( struct st_translate *t, 80 gl_register_file file, 81 GLuint index ) 82 { 83 switch( file ) { 84 case PROGRAM_UNDEFINED: 85 return ureg_dst_undef(); 86 87 case PROGRAM_TEMPORARY: 88 if (ureg_dst_is_undef(t->temps[index])) 89 t->temps[index] = ureg_DECL_temporary( t->ureg ); 90 91 return t->temps[index]; 92 93 case PROGRAM_OUTPUT: 94 if (t->procType == PIPE_SHADER_VERTEX) 95 assert(index < VARYING_SLOT_MAX); 96 else if (t->procType == PIPE_SHADER_FRAGMENT) 97 assert(index < FRAG_RESULT_MAX); 98 else 99 assert(index < VARYING_SLOT_MAX); 100 101 assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs)); 102 103 return t->outputs[t->outputMapping[index]]; 104 105 case PROGRAM_ADDRESS: 106 return t->address[index]; 107 108 default: 109 debug_assert( 0 ); 110 return ureg_dst_undef(); 111 } 112 } 113 114 115 /** 116 * Map a Mesa src register to a TGSI ureg_src register. 117 */ 118 static struct ureg_src 119 src_register( struct st_translate *t, 120 gl_register_file file, 121 GLint index ) 122 { 123 switch( file ) { 124 case PROGRAM_UNDEFINED: 125 return ureg_src_undef(); 126 127 case PROGRAM_TEMPORARY: 128 assert(index >= 0); 129 assert(index < ARRAY_SIZE(t->temps)); 130 if (ureg_dst_is_undef(t->temps[index])) 131 t->temps[index] = ureg_DECL_temporary( t->ureg ); 132 return ureg_src(t->temps[index]); 133 134 case PROGRAM_UNIFORM: 135 assert(index >= 0); 136 return t->constants[index]; 137 case PROGRAM_STATE_VAR: 138 case PROGRAM_CONSTANT: /* ie, immediate */ 139 if (index < 0) 140 return ureg_DECL_constant( t->ureg, 0 ); 141 else 142 return t->constants[index]; 143 144 case PROGRAM_INPUT: 145 assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs)); 146 return t->inputs[t->inputMapping[index]]; 147 148 case PROGRAM_OUTPUT: 149 assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs)); 150 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ 151 152 case PROGRAM_ADDRESS: 153 return ureg_src(t->address[index]); 154 155 case PROGRAM_SYSTEM_VALUE: 156 assert(index < ARRAY_SIZE(t->systemValues)); 157 return t->systemValues[index]; 158 159 default: 160 debug_assert( 0 ); 161 return ureg_src_undef(); 162 } 163 } 164 165 166 /** 167 * Map mesa texture target to TGSI texture target. 168 */ 169 unsigned 170 st_translate_texture_target(GLuint textarget, GLboolean shadow) 171 { 172 if (shadow) { 173 switch (textarget) { 174 case TEXTURE_1D_INDEX: 175 return TGSI_TEXTURE_SHADOW1D; 176 case TEXTURE_2D_INDEX: 177 return TGSI_TEXTURE_SHADOW2D; 178 case TEXTURE_RECT_INDEX: 179 return TGSI_TEXTURE_SHADOWRECT; 180 case TEXTURE_1D_ARRAY_INDEX: 181 return TGSI_TEXTURE_SHADOW1D_ARRAY; 182 case TEXTURE_2D_ARRAY_INDEX: 183 return TGSI_TEXTURE_SHADOW2D_ARRAY; 184 case TEXTURE_CUBE_INDEX: 185 return TGSI_TEXTURE_SHADOWCUBE; 186 case TEXTURE_CUBE_ARRAY_INDEX: 187 return TGSI_TEXTURE_SHADOWCUBE_ARRAY; 188 default: 189 break; 190 } 191 } 192 193 switch (textarget) { 194 case TEXTURE_2D_MULTISAMPLE_INDEX: 195 return TGSI_TEXTURE_2D_MSAA; 196 case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX: 197 return TGSI_TEXTURE_2D_ARRAY_MSAA; 198 case TEXTURE_BUFFER_INDEX: 199 return TGSI_TEXTURE_BUFFER; 200 case TEXTURE_1D_INDEX: 201 return TGSI_TEXTURE_1D; 202 case TEXTURE_2D_INDEX: 203 return TGSI_TEXTURE_2D; 204 case TEXTURE_3D_INDEX: 205 return TGSI_TEXTURE_3D; 206 case TEXTURE_CUBE_INDEX: 207 return TGSI_TEXTURE_CUBE; 208 case TEXTURE_CUBE_ARRAY_INDEX: 209 return TGSI_TEXTURE_CUBE_ARRAY; 210 case TEXTURE_RECT_INDEX: 211 return TGSI_TEXTURE_RECT; 212 case TEXTURE_1D_ARRAY_INDEX: 213 return TGSI_TEXTURE_1D_ARRAY; 214 case TEXTURE_2D_ARRAY_INDEX: 215 return TGSI_TEXTURE_2D_ARRAY; 216 case TEXTURE_EXTERNAL_INDEX: 217 return TGSI_TEXTURE_2D; 218 default: 219 debug_assert(!"unexpected texture target index"); 220 return TGSI_TEXTURE_1D; 221 } 222 } 223 224 225 /** 226 * Translate a (1 << TEXTURE_x_INDEX) bit into a TGSI_TEXTURE_x enum. 227 */ 228 static unsigned 229 translate_texture_index(GLbitfield texBit, bool shadow) 230 { 231 int index = ffs(texBit); 232 assert(index > 0); 233 assert(index - 1 < NUM_TEXTURE_TARGETS); 234 return st_translate_texture_target(index - 1, shadow); 235 } 236 237 238 /** 239 * Create a TGSI ureg_dst register from a Mesa dest register. 240 */ 241 static struct ureg_dst 242 translate_dst( struct st_translate *t, 243 const struct prog_dst_register *DstReg, 244 boolean saturate) 245 { 246 struct ureg_dst dst = dst_register( t, 247 DstReg->File, 248 DstReg->Index ); 249 250 dst = ureg_writemask( dst, 251 DstReg->WriteMask ); 252 253 if (saturate) 254 dst = ureg_saturate( dst ); 255 256 if (DstReg->RelAddr) 257 dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) ); 258 259 return dst; 260 } 261 262 263 /** 264 * Create a TGSI ureg_src register from a Mesa src register. 265 */ 266 static struct ureg_src 267 translate_src( struct st_translate *t, 268 const struct prog_src_register *SrcReg ) 269 { 270 struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index ); 271 272 src = ureg_swizzle( src, 273 GET_SWZ( SrcReg->Swizzle, 0 ) & 0x3, 274 GET_SWZ( SrcReg->Swizzle, 1 ) & 0x3, 275 GET_SWZ( SrcReg->Swizzle, 2 ) & 0x3, 276 GET_SWZ( SrcReg->Swizzle, 3 ) & 0x3); 277 278 if (SrcReg->Negate == NEGATE_XYZW) 279 src = ureg_negate(src); 280 281 if (SrcReg->RelAddr) { 282 src = ureg_src_indirect( src, ureg_src(t->address[0])); 283 if (SrcReg->File != PROGRAM_INPUT && 284 SrcReg->File != PROGRAM_OUTPUT) { 285 /* If SrcReg->Index was negative, it was set to zero in 286 * src_register(). Reassign it now. But don't do this 287 * for input/output regs since they get remapped while 288 * const buffers don't. 289 */ 290 src.Index = SrcReg->Index; 291 } 292 } 293 294 return src; 295 } 296 297 298 static struct ureg_src swizzle_4v( struct ureg_src src, 299 const unsigned *swz ) 300 { 301 return ureg_swizzle( src, swz[0], swz[1], swz[2], swz[3] ); 302 } 303 304 305 /** 306 * Translate a SWZ instruction into a MOV, MUL or MAD instruction. EG: 307 * 308 * SWZ dst, src.x-y10 309 * 310 * becomes: 311 * 312 * MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0} 313 */ 314 static void emit_swz( struct st_translate *t, 315 struct ureg_dst dst, 316 const struct prog_src_register *SrcReg ) 317 { 318 struct ureg_program *ureg = t->ureg; 319 struct ureg_src src = src_register( t, SrcReg->File, SrcReg->Index ); 320 321 unsigned negate_mask = SrcReg->Negate; 322 323 unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 | 324 (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 | 325 (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 | 326 (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3); 327 328 unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 | 329 (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 | 330 (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 | 331 (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3); 332 333 unsigned negative_one_mask = one_mask & negate_mask; 334 unsigned positive_one_mask = one_mask & ~negate_mask; 335 336 struct ureg_src imm; 337 unsigned i; 338 unsigned mul_swizzle[4] = {0,0,0,0}; 339 unsigned add_swizzle[4] = {0,0,0,0}; 340 unsigned src_swizzle[4] = {0,0,0,0}; 341 boolean need_add = FALSE; 342 boolean need_mul = FALSE; 343 344 if (dst.WriteMask == 0) 345 return; 346 347 /* Is this just a MOV? 348 */ 349 if (zero_mask == 0 && 350 one_mask == 0 && 351 (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW)) 352 { 353 ureg_MOV( ureg, dst, translate_src( t, SrcReg )); 354 return; 355 } 356 357 #define IMM_ZERO 0 358 #define IMM_ONE 1 359 #define IMM_NEG_ONE 2 360 361 imm = ureg_imm3f( ureg, 0, 1, -1 ); 362 363 for (i = 0; i < 4; i++) { 364 unsigned bit = 1 << i; 365 366 if (dst.WriteMask & bit) { 367 if (positive_one_mask & bit) { 368 mul_swizzle[i] = IMM_ZERO; 369 add_swizzle[i] = IMM_ONE; 370 need_add = TRUE; 371 } 372 else if (negative_one_mask & bit) { 373 mul_swizzle[i] = IMM_ZERO; 374 add_swizzle[i] = IMM_NEG_ONE; 375 need_add = TRUE; 376 } 377 else if (zero_mask & bit) { 378 mul_swizzle[i] = IMM_ZERO; 379 add_swizzle[i] = IMM_ZERO; 380 need_add = TRUE; 381 } 382 else { 383 add_swizzle[i] = IMM_ZERO; 384 src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i); 385 need_mul = TRUE; 386 if (negate_mask & bit) { 387 mul_swizzle[i] = IMM_NEG_ONE; 388 } 389 else { 390 mul_swizzle[i] = IMM_ONE; 391 } 392 } 393 } 394 } 395 396 if (need_mul && need_add) { 397 ureg_MAD( ureg, 398 dst, 399 swizzle_4v( src, src_swizzle ), 400 swizzle_4v( imm, mul_swizzle ), 401 swizzle_4v( imm, add_swizzle ) ); 402 } 403 else if (need_mul) { 404 ureg_MUL( ureg, 405 dst, 406 swizzle_4v( src, src_swizzle ), 407 swizzle_4v( imm, mul_swizzle ) ); 408 } 409 else if (need_add) { 410 ureg_MOV( ureg, 411 dst, 412 swizzle_4v( imm, add_swizzle ) ); 413 } 414 else { 415 debug_assert(0); 416 } 417 418 #undef IMM_ZERO 419 #undef IMM_ONE 420 #undef IMM_NEG_ONE 421 } 422 423 424 static unsigned 425 translate_opcode( unsigned op ) 426 { 427 switch( op ) { 428 case OPCODE_ARL: 429 return TGSI_OPCODE_ARL; 430 case OPCODE_ADD: 431 return TGSI_OPCODE_ADD; 432 case OPCODE_CMP: 433 return TGSI_OPCODE_CMP; 434 case OPCODE_COS: 435 return TGSI_OPCODE_COS; 436 case OPCODE_DP3: 437 return TGSI_OPCODE_DP3; 438 case OPCODE_DP4: 439 return TGSI_OPCODE_DP4; 440 case OPCODE_DPH: 441 return TGSI_OPCODE_DPH; 442 case OPCODE_DST: 443 return TGSI_OPCODE_DST; 444 case OPCODE_EX2: 445 return TGSI_OPCODE_EX2; 446 case OPCODE_EXP: 447 return TGSI_OPCODE_EXP; 448 case OPCODE_FLR: 449 return TGSI_OPCODE_FLR; 450 case OPCODE_FRC: 451 return TGSI_OPCODE_FRC; 452 case OPCODE_KIL: 453 return TGSI_OPCODE_KILL_IF; 454 case OPCODE_LG2: 455 return TGSI_OPCODE_LG2; 456 case OPCODE_LOG: 457 return TGSI_OPCODE_LOG; 458 case OPCODE_LIT: 459 return TGSI_OPCODE_LIT; 460 case OPCODE_LRP: 461 return TGSI_OPCODE_LRP; 462 case OPCODE_MAD: 463 return TGSI_OPCODE_MAD; 464 case OPCODE_MAX: 465 return TGSI_OPCODE_MAX; 466 case OPCODE_MIN: 467 return TGSI_OPCODE_MIN; 468 case OPCODE_MOV: 469 return TGSI_OPCODE_MOV; 470 case OPCODE_MUL: 471 return TGSI_OPCODE_MUL; 472 case OPCODE_POW: 473 return TGSI_OPCODE_POW; 474 case OPCODE_RCP: 475 return TGSI_OPCODE_RCP; 476 case OPCODE_SCS: 477 return TGSI_OPCODE_SCS; 478 case OPCODE_SGE: 479 return TGSI_OPCODE_SGE; 480 case OPCODE_SIN: 481 return TGSI_OPCODE_SIN; 482 case OPCODE_SLT: 483 return TGSI_OPCODE_SLT; 484 case OPCODE_TEX: 485 return TGSI_OPCODE_TEX; 486 case OPCODE_TXB: 487 return TGSI_OPCODE_TXB; 488 case OPCODE_TXP: 489 return TGSI_OPCODE_TXP; 490 case OPCODE_XPD: 491 return TGSI_OPCODE_XPD; 492 case OPCODE_END: 493 return TGSI_OPCODE_END; 494 default: 495 debug_assert( 0 ); 496 return TGSI_OPCODE_NOP; 497 } 498 } 499 500 501 static void 502 compile_instruction( 503 struct gl_context *ctx, 504 struct st_translate *t, 505 const struct prog_instruction *inst) 506 { 507 struct ureg_program *ureg = t->ureg; 508 GLuint i; 509 struct ureg_dst dst[1] = { { 0 } }; 510 struct ureg_src src[4]; 511 unsigned num_dst; 512 unsigned num_src; 513 514 num_dst = _mesa_num_inst_dst_regs( inst->Opcode ); 515 num_src = _mesa_num_inst_src_regs( inst->Opcode ); 516 517 if (num_dst) 518 dst[0] = translate_dst( t, 519 &inst->DstReg, 520 inst->Saturate); 521 522 for (i = 0; i < num_src; i++) 523 src[i] = translate_src( t, &inst->SrcReg[i] ); 524 525 switch( inst->Opcode ) { 526 case OPCODE_SWZ: 527 emit_swz( t, dst[0], &inst->SrcReg[0] ); 528 return; 529 530 case OPCODE_TEX: 531 case OPCODE_TXB: 532 case OPCODE_TXP: 533 src[num_src++] = t->samplers[inst->TexSrcUnit]; 534 ureg_tex_insn( ureg, 535 translate_opcode( inst->Opcode ), 536 dst, num_dst, 537 st_translate_texture_target( inst->TexSrcTarget, 538 inst->TexShadow ), 539 NULL, 0, 540 src, num_src ); 541 return; 542 543 case OPCODE_SCS: 544 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY ); 545 ureg_insn( ureg, 546 translate_opcode( inst->Opcode ), 547 dst, num_dst, 548 src, num_src ); 549 break; 550 551 case OPCODE_XPD: 552 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ ); 553 ureg_insn( ureg, 554 translate_opcode( inst->Opcode ), 555 dst, num_dst, 556 src, num_src ); 557 break; 558 559 case OPCODE_RSQ: 560 ureg_RSQ( ureg, dst[0], ureg_abs(src[0]) ); 561 break; 562 563 case OPCODE_ABS: 564 ureg_MOV(ureg, dst[0], ureg_abs(src[0])); 565 break; 566 567 case OPCODE_SUB: 568 ureg_ADD(ureg, dst[0], src[0], ureg_negate(src[1])); 569 break; 570 571 default: 572 ureg_insn( ureg, 573 translate_opcode( inst->Opcode ), 574 dst, num_dst, 575 src, num_src ); 576 break; 577 } 578 } 579 580 581 /** 582 * Emit the TGSI instructions for inverting and adjusting WPOS. 583 * This code is unavoidable because it also depends on whether 584 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). 585 */ 586 static void 587 emit_wpos_adjustment(struct gl_context *ctx, 588 struct st_translate *t, 589 const struct gl_program *program, 590 boolean invert, 591 GLfloat adjX, GLfloat adjY[2]) 592 { 593 struct ureg_program *ureg = t->ureg; 594 595 /* Fragment program uses fragment position input. 596 * Need to replace instances of INPUT[WPOS] with temp T 597 * where T = INPUT[WPOS] by y is inverted. 598 */ 599 static const gl_state_index wposTransformState[STATE_LENGTH] 600 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0 }; 601 602 /* XXX: note we are modifying the incoming shader here! Need to 603 * do this before emitting the constant decls below, or this 604 * will be missed: 605 */ 606 unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, 607 wposTransformState); 608 609 struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst ); 610 struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg ); 611 struct ureg_src *wpos = 612 ctx->Const.GLSLFragCoordIsSysVal ? 613 &t->systemValues[SYSTEM_VALUE_FRAG_COORD] : 614 &t->inputs[t->inputMapping[VARYING_SLOT_POS]]; 615 struct ureg_src wpos_input = *wpos; 616 617 /* First, apply the coordinate shift: */ 618 if (adjX || adjY[0] || adjY[1]) { 619 if (adjY[0] != adjY[1]) { 620 /* Adjust the y coordinate by adjY[1] or adjY[0] respectively 621 * depending on whether inversion is actually going to be applied 622 * or not, which is determined by testing against the inversion 623 * state variable used below, which will be either +1 or -1. 624 */ 625 struct ureg_dst adj_temp = ureg_DECL_temporary(ureg); 626 627 ureg_CMP(ureg, adj_temp, 628 ureg_scalar(wpostrans, invert ? 2 : 0), 629 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f), 630 ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f)); 631 ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp)); 632 } else { 633 ureg_ADD(ureg, wpos_temp, wpos_input, 634 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f)); 635 } 636 wpos_input = ureg_src(wpos_temp); 637 } else { 638 /* MOV wpos_temp, input[wpos] 639 */ 640 ureg_MOV( ureg, wpos_temp, wpos_input ); 641 } 642 643 /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be 644 * inversion/identity, or the other way around if we're drawing to an FBO. 645 */ 646 if (invert) { 647 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy 648 */ 649 ureg_MAD( ureg, 650 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 651 wpos_input, 652 ureg_scalar(wpostrans, 0), 653 ureg_scalar(wpostrans, 1)); 654 } else { 655 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww 656 */ 657 ureg_MAD( ureg, 658 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ), 659 wpos_input, 660 ureg_scalar(wpostrans, 2), 661 ureg_scalar(wpostrans, 3)); 662 } 663 664 /* Use wpos_temp as position input from here on: 665 */ 666 *wpos = ureg_src(wpos_temp); 667 } 668 669 670 /** 671 * Emit fragment position/coordinate code. 672 */ 673 static void 674 emit_wpos(struct st_context *st, 675 struct st_translate *t, 676 const struct gl_program *program, 677 struct ureg_program *ureg) 678 { 679 struct pipe_screen *pscreen = st->pipe->screen; 680 GLfloat adjX = 0.0f; 681 GLfloat adjY[2] = { 0.0f, 0.0f }; 682 boolean invert = FALSE; 683 684 /* Query the pixel center conventions supported by the pipe driver and set 685 * adjX, adjY to help out if it cannot handle the requested one internally. 686 * 687 * The bias of the y-coordinate depends on whether y-inversion takes place 688 * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are 689 * drawing to an FBO (causes additional inversion), and whether the pipe 690 * driver origin and the requested origin differ (the latter condition is 691 * stored in the 'invert' variable). 692 * 693 * For height = 100 (i = integer, h = half-integer, l = lower, u = upper): 694 * 695 * center shift only: 696 * i -> h: +0.5 697 * h -> i: -0.5 698 * 699 * inversion only: 700 * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99 701 * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5 702 * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0 703 * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5 704 * 705 * inversion and center shift: 706 * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5 707 * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99 708 * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5 709 * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0 710 */ 711 if (program->OriginUpperLeft) { 712 /* Fragment shader wants origin in upper-left */ 713 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { 714 /* the driver supports upper-left origin */ 715 } 716 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { 717 /* the driver supports lower-left origin, need to invert Y */ 718 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, 719 TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 720 invert = TRUE; 721 } 722 else 723 assert(0); 724 } 725 else { 726 /* Fragment shader wants origin in lower-left */ 727 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) 728 /* the driver supports lower-left origin */ 729 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, 730 TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 731 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) 732 /* the driver supports upper-left origin, need to invert Y */ 733 invert = TRUE; 734 else 735 assert(0); 736 } 737 738 if (program->PixelCenterInteger) { 739 /* Fragment shader wants pixel center integer */ 740 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 741 /* the driver supports pixel center integer */ 742 adjY[1] = 1.0f; 743 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, 744 TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 745 } 746 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 747 /* the driver supports pixel center half integer, need to bias X,Y */ 748 adjX = -0.5f; 749 adjY[0] = -0.5f; 750 adjY[1] = 0.5f; 751 } 752 else 753 assert(0); 754 } 755 else { 756 /* Fragment shader wants pixel center half integer */ 757 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 758 /* the driver supports pixel center half integer */ 759 } 760 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 761 /* the driver supports pixel center integer, need to bias X,Y */ 762 adjX = adjY[0] = adjY[1] = 0.5f; 763 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, 764 TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 765 } 766 else 767 assert(0); 768 } 769 770 /* we invert after adjustment so that we avoid the MOV to temporary, 771 * and reuse the adjustment ADD instead */ 772 emit_wpos_adjustment(st->ctx, t, program, invert, adjX, adjY); 773 } 774 775 776 /** 777 * Translate Mesa program to TGSI format. 778 * \param program the program to translate 779 * \param numInputs number of input registers used 780 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 781 * input indexes 782 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 783 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 784 * each input 785 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 786 * \param numOutputs number of output registers used 787 * \param outputMapping maps Mesa fragment program outputs to TGSI 788 * generic outputs 789 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 790 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 791 * each output 792 * 793 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY 794 */ 795 enum pipe_error 796 st_translate_mesa_program( 797 struct gl_context *ctx, 798 uint procType, 799 struct ureg_program *ureg, 800 const struct gl_program *program, 801 GLuint numInputs, 802 const GLuint inputMapping[], 803 const ubyte inputSemanticName[], 804 const ubyte inputSemanticIndex[], 805 const GLuint interpMode[], 806 GLuint numOutputs, 807 const GLuint outputMapping[], 808 const ubyte outputSemanticName[], 809 const ubyte outputSemanticIndex[]) 810 { 811 struct st_translate translate, *t; 812 unsigned i; 813 enum pipe_error ret = PIPE_OK; 814 815 assert(numInputs <= ARRAY_SIZE(t->inputs)); 816 assert(numOutputs <= ARRAY_SIZE(t->outputs)); 817 818 t = &translate; 819 memset(t, 0, sizeof *t); 820 821 t->procType = procType; 822 t->inputMapping = inputMapping; 823 t->outputMapping = outputMapping; 824 t->ureg = ureg; 825 826 /*_mesa_print_program(program);*/ 827 828 /* 829 * Declare input attributes. 830 */ 831 if (procType == PIPE_SHADER_FRAGMENT) { 832 for (i = 0; i < numInputs; i++) { 833 t->inputs[i] = ureg_DECL_fs_input(ureg, 834 inputSemanticName[i], 835 inputSemanticIndex[i], 836 interpMode[i]); 837 } 838 839 if (program->info.inputs_read & VARYING_BIT_POS) { 840 /* Must do this after setting up t->inputs, and before 841 * emitting constant references, below: 842 */ 843 emit_wpos(st_context(ctx), t, program, ureg); 844 } 845 846 /* 847 * Declare output attributes. 848 */ 849 for (i = 0; i < numOutputs; i++) { 850 switch (outputSemanticName[i]) { 851 case TGSI_SEMANTIC_POSITION: 852 t->outputs[i] = ureg_DECL_output( ureg, 853 TGSI_SEMANTIC_POSITION, /* Z / Depth */ 854 outputSemanticIndex[i] ); 855 856 t->outputs[i] = ureg_writemask( t->outputs[i], 857 TGSI_WRITEMASK_Z ); 858 break; 859 case TGSI_SEMANTIC_STENCIL: 860 t->outputs[i] = ureg_DECL_output( ureg, 861 TGSI_SEMANTIC_STENCIL, /* Stencil */ 862 outputSemanticIndex[i] ); 863 t->outputs[i] = ureg_writemask( t->outputs[i], 864 TGSI_WRITEMASK_Y ); 865 break; 866 case TGSI_SEMANTIC_COLOR: 867 t->outputs[i] = ureg_DECL_output( ureg, 868 TGSI_SEMANTIC_COLOR, 869 outputSemanticIndex[i] ); 870 break; 871 default: 872 debug_assert(0); 873 return 0; 874 } 875 } 876 } 877 else if (procType == PIPE_SHADER_GEOMETRY) { 878 for (i = 0; i < numInputs; i++) { 879 t->inputs[i] = ureg_DECL_input(ureg, 880 inputSemanticName[i], 881 inputSemanticIndex[i], 0, 1); 882 } 883 884 for (i = 0; i < numOutputs; i++) { 885 t->outputs[i] = ureg_DECL_output( ureg, 886 outputSemanticName[i], 887 outputSemanticIndex[i] ); 888 } 889 } 890 else { 891 assert(procType == PIPE_SHADER_VERTEX); 892 893 for (i = 0; i < numInputs; i++) { 894 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 895 } 896 897 for (i = 0; i < numOutputs; i++) { 898 t->outputs[i] = ureg_DECL_output( ureg, 899 outputSemanticName[i], 900 outputSemanticIndex[i] ); 901 if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) { 902 /* force register to contain a fog coordinate in the form (F, 0, 0, 1). */ 903 ureg_MOV(ureg, 904 ureg_writemask(t->outputs[i], TGSI_WRITEMASK_YZW), 905 ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f)); 906 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X); 907 } 908 } 909 } 910 911 /* Declare address register. 912 */ 913 if (program->arb.NumAddressRegs > 0) { 914 debug_assert( program->arb.NumAddressRegs == 1 ); 915 t->address[0] = ureg_DECL_address( ureg ); 916 } 917 918 /* Declare misc input registers 919 */ 920 { 921 GLbitfield sysInputs = program->info.system_values_read; 922 923 for (i = 0; sysInputs; i++) { 924 if (sysInputs & (1 << i)) { 925 unsigned semName = _mesa_sysval_to_semantic(i); 926 927 t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0); 928 929 if (semName == TGSI_SEMANTIC_INSTANCEID || 930 semName == TGSI_SEMANTIC_VERTEXID) { 931 /* From Gallium perspective, these system values are always 932 * integer, and require native integer support. However, if 933 * native integer is supported on the vertex stage but not the 934 * pixel stage (e.g, i915g + draw), Mesa will generate IR that 935 * assumes these system values are floats. To resolve the 936 * inconsistency, we insert a U2F. 937 */ 938 struct st_context *st = st_context(ctx); 939 struct pipe_screen *pscreen = st->pipe->screen; 940 assert(procType == PIPE_SHADER_VERTEX); 941 assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS)); 942 (void) pscreen; /* silence non-debug build warnings */ 943 if (!ctx->Const.NativeIntegers) { 944 struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg); 945 ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]); 946 t->systemValues[i] = ureg_scalar(ureg_src(temp), 0); 947 } 948 } 949 950 if (procType == PIPE_SHADER_FRAGMENT && 951 semName == TGSI_SEMANTIC_POSITION) 952 emit_wpos(st_context(ctx), t, program, ureg); 953 954 sysInputs &= ~(1 << i); 955 } 956 } 957 } 958 959 if (program->arb.IndirectRegisterFiles & (1 << PROGRAM_TEMPORARY)) { 960 /* If temps are accessed with indirect addressing, declare temporaries 961 * in sequential order. Else, we declare them on demand elsewhere. 962 */ 963 for (i = 0; i < program->arb.NumTemporaries; i++) { 964 /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ 965 t->temps[i] = ureg_DECL_temporary( t->ureg ); 966 } 967 } 968 969 /* Emit constants and immediates. Mesa uses a single index space 970 * for these, so we put all the translated regs in t->constants. 971 */ 972 if (program->Parameters) { 973 t->constants = calloc( program->Parameters->NumParameters, 974 sizeof t->constants[0] ); 975 if (t->constants == NULL) { 976 ret = PIPE_ERROR_OUT_OF_MEMORY; 977 goto out; 978 } 979 980 for (i = 0; i < program->Parameters->NumParameters; i++) { 981 switch (program->Parameters->Parameters[i].Type) { 982 case PROGRAM_STATE_VAR: 983 case PROGRAM_UNIFORM: 984 t->constants[i] = ureg_DECL_constant( ureg, i ); 985 break; 986 987 /* Emit immediates only when there's no indirect addressing of 988 * the const buffer. 989 * FIXME: Be smarter and recognize param arrays: 990 * indirect addressing is only valid within the referenced 991 * array. 992 */ 993 case PROGRAM_CONSTANT: 994 if (program->arb.IndirectRegisterFiles & PROGRAM_ANY_CONST) 995 t->constants[i] = ureg_DECL_constant( ureg, i ); 996 else 997 t->constants[i] = 998 ureg_DECL_immediate( ureg, 999 (const float*) program->Parameters->ParameterValues[i], 1000 4 ); 1001 break; 1002 default: 1003 break; 1004 } 1005 } 1006 } 1007 1008 /* texture samplers */ 1009 for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) { 1010 if (program->SamplersUsed & (1u << i)) { 1011 unsigned target = 1012 translate_texture_index(program->TexturesUsed[i], 1013 !!(program->ShadowSamplers & (1 << i))); 1014 t->samplers[i] = ureg_DECL_sampler( ureg, i ); 1015 ureg_DECL_sampler_view(ureg, i, target, 1016 TGSI_RETURN_TYPE_FLOAT, 1017 TGSI_RETURN_TYPE_FLOAT, 1018 TGSI_RETURN_TYPE_FLOAT, 1019 TGSI_RETURN_TYPE_FLOAT); 1020 1021 } 1022 } 1023 1024 /* Emit each instruction in turn: 1025 */ 1026 for (i = 0; i < program->arb.NumInstructions; i++) 1027 compile_instruction(ctx, t, &program->arb.Instructions[i]); 1028 1029 out: 1030 free(t->constants); 1031 return ret; 1032 } 1033