1 /************************************************************************** 2 * 3 * Copyright 2007 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29 #include <stdarg.h> 30 31 #include "i915_reg.h" 32 #include "i915_context.h" 33 #include "i915_fpc.h" 34 #include "i915_debug_private.h" 35 36 #include "pipe/p_shader_tokens.h" 37 #include "util/u_math.h" 38 #include "util/u_memory.h" 39 #include "util/u_string.h" 40 #include "tgsi/tgsi_parse.h" 41 #include "tgsi/tgsi_dump.h" 42 43 #include "draw/draw_vertex.h" 44 45 #ifndef M_PI 46 #define M_PI 3.14159265358979323846 47 #endif 48 49 /** 50 * Simple pass-through fragment shader to use when we don't have 51 * a real shader (or it fails to compile for some reason). 52 */ 53 static unsigned passthrough_decl[] = 54 { 55 _3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1), 56 57 /* declare input color: 58 */ 59 (D0_DCL | 60 (REG_TYPE_T << D0_TYPE_SHIFT) | 61 (T_DIFFUSE << D0_NR_SHIFT) | 62 D0_CHANNEL_ALL), 63 0, 64 0, 65 }; 66 67 static unsigned passthrough_program[] = 68 { 69 /* move to output color: 70 */ 71 (A0_MOV | 72 (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | 73 A0_DEST_CHANNEL_ALL | 74 (REG_TYPE_T << A0_SRC0_TYPE_SHIFT) | 75 (T_DIFFUSE << A0_SRC0_NR_SHIFT)), 76 0x01230000, /* .xyzw */ 77 0 78 }; 79 80 /* 2*pi, -(2*pi)^3/3!, (2*pi)^5/5!, -(2*pi)^7/7! */ 81 static const float sin_constants[4] = { 2.0 * M_PI, 82 -8.0f * M_PI * M_PI * M_PI / (3 * 2 * 1), 83 32.0f * M_PI * M_PI * M_PI * M_PI * M_PI / (5 * 4 * 3 * 2 * 1), 84 -128.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (7 * 6 * 5 * 4 * 3 * 2 * 1) 85 }; 86 87 /* 1, -(2*pi)^2/2!, (2*pi)^4/4!, -(2*pi)^6/6! */ 88 static const float cos_constants[4] = { 1.0, 89 -4.0f * M_PI * M_PI / (2 * 1), 90 16.0f * M_PI * M_PI * M_PI * M_PI / (4 * 3 * 2 * 1), 91 -64.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (6 * 5 * 4 * 3 * 2 * 1) 92 }; 93 94 95 96 /** 97 * component-wise negation of ureg 98 */ 99 static inline int 100 negate(int reg, int x, int y, int z, int w) 101 { 102 /* Another neat thing about the UREG representation */ 103 return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) | 104 ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) | 105 ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) | 106 ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT)); 107 } 108 109 110 /** 111 * In the event of a translation failure, we'll generate a simple color 112 * pass-through program. 113 */ 114 static void 115 i915_use_passthrough_shader(struct i915_fragment_shader *fs) 116 { 117 fs->program = (uint *) MALLOC(sizeof(passthrough_program)); 118 fs->decl = (uint *) MALLOC(sizeof(passthrough_decl)); 119 if (fs->program) { 120 memcpy(fs->program, passthrough_program, sizeof(passthrough_program)); 121 memcpy(fs->decl, passthrough_decl, sizeof(passthrough_decl)); 122 fs->program_len = ARRAY_SIZE(passthrough_program); 123 fs->decl_len = ARRAY_SIZE(passthrough_decl); 124 } 125 fs->num_constants = 0; 126 } 127 128 129 void 130 i915_program_error(struct i915_fp_compile *p, const char *msg, ...) 131 { 132 va_list args; 133 char buffer[1024]; 134 135 debug_printf("i915_program_error: "); 136 va_start( args, msg ); 137 util_vsnprintf( buffer, sizeof(buffer), msg, args ); 138 va_end( args ); 139 debug_printf("%s", buffer); 140 debug_printf("\n"); 141 142 p->error = 1; 143 } 144 145 static uint get_mapping(struct i915_fragment_shader* fs, int unit) 146 { 147 int i; 148 for (i = 0; i < I915_TEX_UNITS; i++) 149 { 150 if (fs->generic_mapping[i] == -1) { 151 fs->generic_mapping[i] = unit; 152 return i; 153 } 154 if (fs->generic_mapping[i] == unit) 155 return i; 156 } 157 debug_printf("Exceeded max generics\n"); 158 return 0; 159 } 160 161 /** 162 * Construct a ureg for the given source register. Will emit 163 * constants, apply swizzling and negation as needed. 164 */ 165 static uint 166 src_vector(struct i915_fp_compile *p, 167 const struct i915_full_src_register *source, 168 struct i915_fragment_shader *fs) 169 { 170 uint index = source->Register.Index; 171 uint src = 0, sem_name, sem_ind; 172 173 switch (source->Register.File) { 174 case TGSI_FILE_TEMPORARY: 175 if (source->Register.Index >= I915_MAX_TEMPORARY) { 176 i915_program_error(p, "Exceeded max temporary reg"); 177 return 0; 178 } 179 src = UREG(REG_TYPE_R, index); 180 break; 181 case TGSI_FILE_INPUT: 182 /* XXX: Packing COL1, FOGC into a single attribute works for 183 * texenv programs, but will fail for real fragment programs 184 * that use these attributes and expect them to be a full 4 185 * components wide. Could use a texcoord to pass these 186 * attributes if necessary, but that won't work in the general 187 * case. 188 * 189 * We also use a texture coordinate to pass wpos when possible. 190 */ 191 192 sem_name = p->shader->info.input_semantic_name[index]; 193 sem_ind = p->shader->info.input_semantic_index[index]; 194 195 switch (sem_name) { 196 case TGSI_SEMANTIC_POSITION: 197 { 198 /* for fragcoord */ 199 int real_tex_unit = get_mapping(fs, I915_SEMANTIC_POS); 200 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL); 201 break; 202 } 203 case TGSI_SEMANTIC_COLOR: 204 if (sem_ind == 0) { 205 src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); 206 } 207 else { 208 /* secondary color */ 209 assert(sem_ind == 1); 210 src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); 211 src = swizzle(src, X, Y, Z, ONE); 212 } 213 break; 214 case TGSI_SEMANTIC_FOG: 215 src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); 216 src = swizzle(src, W, W, W, W); 217 break; 218 case TGSI_SEMANTIC_GENERIC: 219 { 220 int real_tex_unit = get_mapping(fs, sem_ind); 221 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL); 222 break; 223 } 224 case TGSI_SEMANTIC_FACE: 225 { 226 /* for back/front faces */ 227 int real_tex_unit = get_mapping(fs, I915_SEMANTIC_FACE); 228 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X); 229 break; 230 } 231 default: 232 i915_program_error(p, "Bad source->Index"); 233 return 0; 234 } 235 break; 236 237 case TGSI_FILE_IMMEDIATE: 238 assert(index < p->num_immediates); 239 index = p->immediates_map[index]; 240 /* fall-through */ 241 case TGSI_FILE_CONSTANT: 242 src = UREG(REG_TYPE_CONST, index); 243 break; 244 245 default: 246 i915_program_error(p, "Bad source->File"); 247 return 0; 248 } 249 250 src = swizzle(src, 251 source->Register.SwizzleX, 252 source->Register.SwizzleY, 253 source->Register.SwizzleZ, 254 source->Register.SwizzleW); 255 256 /* There's both negate-all-components and per-component negation. 257 * Try to handle both here. 258 */ 259 { 260 int n = source->Register.Negate; 261 src = negate(src, n, n, n, n); 262 } 263 264 /* no abs() */ 265 #if 0 266 /* XXX assertions disabled to allow arbfplight.c to run */ 267 /* XXX enable these assertions, or fix things */ 268 assert(!source->Register.Absolute); 269 #endif 270 if (source->Register.Absolute) 271 debug_printf("Unhandled absolute value\n"); 272 273 return src; 274 } 275 276 277 /** 278 * Construct a ureg for a destination register. 279 */ 280 static uint 281 get_result_vector(struct i915_fp_compile *p, 282 const struct i915_full_dst_register *dest) 283 { 284 switch (dest->Register.File) { 285 case TGSI_FILE_OUTPUT: 286 { 287 uint sem_name = p->shader->info.output_semantic_name[dest->Register.Index]; 288 switch (sem_name) { 289 case TGSI_SEMANTIC_POSITION: 290 return UREG(REG_TYPE_OD, 0); 291 case TGSI_SEMANTIC_COLOR: 292 return UREG(REG_TYPE_OC, 0); 293 default: 294 i915_program_error(p, "Bad inst->DstReg.Index/semantics"); 295 return 0; 296 } 297 } 298 case TGSI_FILE_TEMPORARY: 299 return UREG(REG_TYPE_R, dest->Register.Index); 300 default: 301 i915_program_error(p, "Bad inst->DstReg.File"); 302 return 0; 303 } 304 } 305 306 307 /** 308 * Compute flags for saturation and writemask. 309 */ 310 static uint 311 get_result_flags(const struct i915_full_instruction *inst) 312 { 313 const uint writeMask 314 = inst->Dst[0].Register.WriteMask; 315 uint flags = 0x0; 316 317 if (inst->Instruction.Saturate) 318 flags |= A0_DEST_SATURATE; 319 320 if (writeMask & TGSI_WRITEMASK_X) 321 flags |= A0_DEST_CHANNEL_X; 322 if (writeMask & TGSI_WRITEMASK_Y) 323 flags |= A0_DEST_CHANNEL_Y; 324 if (writeMask & TGSI_WRITEMASK_Z) 325 flags |= A0_DEST_CHANNEL_Z; 326 if (writeMask & TGSI_WRITEMASK_W) 327 flags |= A0_DEST_CHANNEL_W; 328 329 return flags; 330 } 331 332 333 /** 334 * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token 335 */ 336 static uint 337 translate_tex_src_target(struct i915_fp_compile *p, uint tex) 338 { 339 switch (tex) { 340 case TGSI_TEXTURE_SHADOW1D: 341 /* fall-through */ 342 case TGSI_TEXTURE_1D: 343 return D0_SAMPLE_TYPE_2D; 344 345 case TGSI_TEXTURE_SHADOW2D: 346 /* fall-through */ 347 case TGSI_TEXTURE_2D: 348 return D0_SAMPLE_TYPE_2D; 349 350 case TGSI_TEXTURE_SHADOWRECT: 351 /* fall-through */ 352 case TGSI_TEXTURE_RECT: 353 return D0_SAMPLE_TYPE_2D; 354 355 case TGSI_TEXTURE_3D: 356 return D0_SAMPLE_TYPE_VOLUME; 357 358 case TGSI_TEXTURE_CUBE: 359 return D0_SAMPLE_TYPE_CUBE; 360 361 default: 362 i915_program_error(p, "TexSrc type"); 363 return 0; 364 } 365 } 366 367 /** 368 * Return the number of coords needed to access a given TGSI_TEXTURE_* 369 */ 370 uint 371 i915_num_coords(uint tex) 372 { 373 switch (tex) { 374 case TGSI_TEXTURE_SHADOW1D: 375 case TGSI_TEXTURE_1D: 376 return 1; 377 378 case TGSI_TEXTURE_SHADOW2D: 379 case TGSI_TEXTURE_2D: 380 case TGSI_TEXTURE_SHADOWRECT: 381 case TGSI_TEXTURE_RECT: 382 return 2; 383 384 case TGSI_TEXTURE_3D: 385 case TGSI_TEXTURE_CUBE: 386 return 3; 387 388 default: 389 debug_printf("Unknown texture target for num coords"); 390 return 2; 391 } 392 } 393 394 395 /** 396 * Generate texel lookup instruction. 397 */ 398 static void 399 emit_tex(struct i915_fp_compile *p, 400 const struct i915_full_instruction *inst, 401 uint opcode, 402 struct i915_fragment_shader* fs) 403 { 404 uint texture = inst->Texture.Texture; 405 uint unit = inst->Src[1].Register.Index; 406 uint tex = translate_tex_src_target( p, texture ); 407 uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); 408 uint coord = src_vector( p, &inst->Src[0], fs); 409 410 i915_emit_texld( p, 411 get_result_vector( p, &inst->Dst[0] ), 412 get_result_flags( inst ), 413 sampler, 414 coord, 415 opcode, 416 i915_num_coords(texture) ); 417 } 418 419 420 /** 421 * Generate a simple arithmetic instruction 422 * \param opcode the i915 opcode 423 * \param numArgs the number of input/src arguments 424 */ 425 static void 426 emit_simple_arith(struct i915_fp_compile *p, 427 const struct i915_full_instruction *inst, 428 uint opcode, uint numArgs, 429 struct i915_fragment_shader *fs) 430 { 431 uint arg1, arg2, arg3; 432 433 assert(numArgs <= 3); 434 435 arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0], fs ); 436 arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1], fs ); 437 arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2], fs ); 438 439 i915_emit_arith( p, 440 opcode, 441 get_result_vector( p, &inst->Dst[0]), 442 get_result_flags( inst ), 0, 443 arg1, 444 arg2, 445 arg3 ); 446 } 447 448 449 /** As above, but swap the first two src regs */ 450 static void 451 emit_simple_arith_swap2(struct i915_fp_compile *p, 452 const struct i915_full_instruction *inst, 453 uint opcode, uint numArgs, 454 struct i915_fragment_shader *fs) 455 { 456 struct i915_full_instruction inst2; 457 458 assert(numArgs == 2); 459 460 /* transpose first two registers */ 461 inst2 = *inst; 462 inst2.Src[0] = inst->Src[1]; 463 inst2.Src[1] = inst->Src[0]; 464 465 emit_simple_arith(p, &inst2, opcode, numArgs, fs); 466 } 467 468 /* 469 * Translate TGSI instruction to i915 instruction. 470 * 471 * Possible concerns: 472 * 473 * DDX, DDY -- return 0 474 * SIN, COS -- could use another taylor step? 475 * LIT -- results seem a little different to sw mesa 476 * LOG -- different to mesa on negative numbers, but this is conformant. 477 */ 478 static void 479 i915_translate_instruction(struct i915_fp_compile *p, 480 const struct i915_full_instruction *inst, 481 struct i915_fragment_shader *fs) 482 { 483 uint src0, src1, src2, flags; 484 uint tmp = 0; 485 486 switch (inst->Instruction.Opcode) { 487 case TGSI_OPCODE_ADD: 488 emit_simple_arith(p, inst, A0_ADD, 2, fs); 489 break; 490 491 case TGSI_OPCODE_CEIL: 492 src0 = src_vector(p, &inst->Src[0], fs); 493 tmp = i915_get_utemp(p); 494 flags = get_result_flags(inst); 495 i915_emit_arith(p, 496 A0_FLR, 497 tmp, 498 flags & A0_DEST_CHANNEL_ALL, 0, 499 negate(src0, 1, 1, 1, 1), 0, 0); 500 i915_emit_arith(p, 501 A0_MOV, 502 get_result_vector(p, &inst->Dst[0]), 503 flags, 0, 504 negate(tmp, 1, 1, 1, 1), 0, 0); 505 break; 506 507 case TGSI_OPCODE_CMP: 508 src0 = src_vector(p, &inst->Src[0], fs); 509 src1 = src_vector(p, &inst->Src[1], fs); 510 src2 = src_vector(p, &inst->Src[2], fs); 511 i915_emit_arith(p, A0_CMP, 512 get_result_vector(p, &inst->Dst[0]), 513 get_result_flags(inst), 514 0, src0, src2, src1); /* NOTE: order of src2, src1 */ 515 break; 516 517 case TGSI_OPCODE_COS: 518 src0 = src_vector(p, &inst->Src[0], fs); 519 tmp = i915_get_utemp(p); 520 521 i915_emit_arith(p, 522 A0_MUL, 523 tmp, A0_DEST_CHANNEL_X, 0, 524 src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); 525 526 i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); 527 528 /* 529 * t0.xy = MUL x.xx11, x.x111 ; x^2, x, 1, 1 530 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1 531 * t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 532 * result = DP4 t0, cos_constants 533 */ 534 i915_emit_arith(p, 535 A0_MUL, 536 tmp, A0_DEST_CHANNEL_XY, 0, 537 swizzle(tmp, X, X, ONE, ONE), 538 swizzle(tmp, X, ONE, ONE, ONE), 0); 539 540 i915_emit_arith(p, 541 A0_MUL, 542 tmp, A0_DEST_CHANNEL_XYZ, 0, 543 swizzle(tmp, X, Y, X, ONE), 544 swizzle(tmp, X, X, ONE, ONE), 0); 545 546 i915_emit_arith(p, 547 A0_MUL, 548 tmp, A0_DEST_CHANNEL_XYZ, 0, 549 swizzle(tmp, X, X, Z, ONE), 550 swizzle(tmp, Z, ONE, ONE, ONE), 0); 551 552 i915_emit_arith(p, 553 A0_DP4, 554 get_result_vector(p, &inst->Dst[0]), 555 get_result_flags(inst), 0, 556 swizzle(tmp, ONE, Z, Y, X), 557 i915_emit_const4fv(p, cos_constants), 0); 558 break; 559 560 case TGSI_OPCODE_DDX: 561 case TGSI_OPCODE_DDY: 562 /* XXX We just output 0 here */ 563 debug_printf("Punting DDX/DDY\n"); 564 src0 = get_result_vector(p, &inst->Dst[0]); 565 i915_emit_arith(p, 566 A0_MOV, 567 get_result_vector(p, &inst->Dst[0]), 568 get_result_flags(inst), 0, 569 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0); 570 break; 571 572 case TGSI_OPCODE_DP2: 573 src0 = src_vector(p, &inst->Src[0], fs); 574 src1 = src_vector(p, &inst->Src[1], fs); 575 576 i915_emit_arith(p, 577 A0_DP3, 578 get_result_vector(p, &inst->Dst[0]), 579 get_result_flags(inst), 0, 580 swizzle(src0, X, Y, ZERO, ZERO), src1, 0); 581 break; 582 583 case TGSI_OPCODE_DP3: 584 emit_simple_arith(p, inst, A0_DP3, 2, fs); 585 break; 586 587 case TGSI_OPCODE_DP4: 588 emit_simple_arith(p, inst, A0_DP4, 2, fs); 589 break; 590 591 case TGSI_OPCODE_DST: 592 src0 = src_vector(p, &inst->Src[0], fs); 593 src1 = src_vector(p, &inst->Src[1], fs); 594 595 /* result[0] = 1 * 1; 596 * result[1] = a[1] * b[1]; 597 * result[2] = a[2] * 1; 598 * result[3] = 1 * b[3]; 599 */ 600 i915_emit_arith(p, 601 A0_MUL, 602 get_result_vector(p, &inst->Dst[0]), 603 get_result_flags(inst), 0, 604 swizzle(src0, ONE, Y, Z, ONE), 605 swizzle(src1, ONE, Y, ONE, W), 0); 606 break; 607 608 case TGSI_OPCODE_END: 609 /* no-op */ 610 break; 611 612 case TGSI_OPCODE_EX2: 613 src0 = src_vector(p, &inst->Src[0], fs); 614 615 i915_emit_arith(p, 616 A0_EXP, 617 get_result_vector(p, &inst->Dst[0]), 618 get_result_flags(inst), 0, 619 swizzle(src0, X, X, X, X), 0, 0); 620 break; 621 622 case TGSI_OPCODE_FLR: 623 emit_simple_arith(p, inst, A0_FLR, 1, fs); 624 break; 625 626 case TGSI_OPCODE_FRC: 627 emit_simple_arith(p, inst, A0_FRC, 1, fs); 628 break; 629 630 case TGSI_OPCODE_KILL_IF: 631 /* kill if src[0].x < 0 || src[0].y < 0 ... */ 632 src0 = src_vector(p, &inst->Src[0], fs); 633 tmp = i915_get_utemp(p); 634 635 i915_emit_texld(p, 636 tmp, /* dest reg: a dummy reg */ 637 A0_DEST_CHANNEL_ALL, /* dest writemask */ 638 0, /* sampler */ 639 src0, /* coord*/ 640 T0_TEXKILL, /* opcode */ 641 1); /* num_coord */ 642 break; 643 644 case TGSI_OPCODE_KILL: 645 /* unconditional kill */ 646 tmp = i915_get_utemp(p); 647 648 i915_emit_texld(p, 649 tmp, /* dest reg: a dummy reg */ 650 A0_DEST_CHANNEL_ALL, /* dest writemask */ 651 0, /* sampler */ 652 negate(swizzle(0, ONE, ONE, ONE, ONE), 1, 1, 1, 1), /* coord */ 653 T0_TEXKILL, /* opcode */ 654 1); /* num_coord */ 655 break; 656 657 case TGSI_OPCODE_LG2: 658 src0 = src_vector(p, &inst->Src[0], fs); 659 660 i915_emit_arith(p, 661 A0_LOG, 662 get_result_vector(p, &inst->Dst[0]), 663 get_result_flags(inst), 0, 664 swizzle(src0, X, X, X, X), 0, 0); 665 break; 666 667 case TGSI_OPCODE_LIT: 668 src0 = src_vector(p, &inst->Src[0], fs); 669 tmp = i915_get_utemp(p); 670 671 /* tmp = max( a.xyzw, a.00zw ) 672 * XXX: Clamp tmp.w to -128..128 673 * tmp.y = log(tmp.y) 674 * tmp.y = tmp.w * tmp.y 675 * tmp.y = exp(tmp.y) 676 * result = cmp (a.11-x1, a.1x01, a.1xy1 ) 677 */ 678 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, 679 src0, swizzle(src0, ZERO, ZERO, Z, W), 0); 680 681 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, 682 swizzle(tmp, Y, Y, Y, Y), 0, 0); 683 684 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, 685 swizzle(tmp, ZERO, Y, ZERO, ZERO), 686 swizzle(tmp, ZERO, W, ZERO, ZERO), 0); 687 688 i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, 689 swizzle(tmp, Y, Y, Y, Y), 0, 0); 690 691 i915_emit_arith(p, A0_CMP, 692 get_result_vector(p, &inst->Dst[0]), 693 get_result_flags(inst), 0, 694 negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), 695 swizzle(tmp, ONE, X, ZERO, ONE), 696 swizzle(tmp, ONE, X, Y, ONE)); 697 698 break; 699 700 case TGSI_OPCODE_LRP: 701 src0 = src_vector(p, &inst->Src[0], fs); 702 src1 = src_vector(p, &inst->Src[1], fs); 703 src2 = src_vector(p, &inst->Src[2], fs); 704 flags = get_result_flags(inst); 705 tmp = i915_get_utemp(p); 706 707 /* b*a + c*(1-a) 708 * 709 * b*a + c - ca 710 * 711 * tmp = b*a + c, 712 * result = (-c)*a + tmp 713 */ 714 i915_emit_arith(p, A0_MAD, tmp, 715 flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); 716 717 i915_emit_arith(p, A0_MAD, 718 get_result_vector(p, &inst->Dst[0]), 719 flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); 720 break; 721 722 case TGSI_OPCODE_MAD: 723 emit_simple_arith(p, inst, A0_MAD, 3, fs); 724 break; 725 726 case TGSI_OPCODE_MAX: 727 emit_simple_arith(p, inst, A0_MAX, 2, fs); 728 break; 729 730 case TGSI_OPCODE_MIN: 731 emit_simple_arith(p, inst, A0_MIN, 2, fs); 732 break; 733 734 case TGSI_OPCODE_MOV: 735 emit_simple_arith(p, inst, A0_MOV, 1, fs); 736 break; 737 738 case TGSI_OPCODE_MUL: 739 emit_simple_arith(p, inst, A0_MUL, 2, fs); 740 break; 741 742 case TGSI_OPCODE_NOP: 743 break; 744 745 case TGSI_OPCODE_POW: 746 src0 = src_vector(p, &inst->Src[0], fs); 747 src1 = src_vector(p, &inst->Src[1], fs); 748 tmp = i915_get_utemp(p); 749 flags = get_result_flags(inst); 750 751 /* XXX: masking on intermediate values, here and elsewhere. 752 */ 753 i915_emit_arith(p, 754 A0_LOG, 755 tmp, A0_DEST_CHANNEL_X, 0, 756 swizzle(src0, X, X, X, X), 0, 0); 757 758 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); 759 760 i915_emit_arith(p, 761 A0_EXP, 762 get_result_vector(p, &inst->Dst[0]), 763 flags, 0, swizzle(tmp, X, X, X, X), 0, 0); 764 break; 765 766 case TGSI_OPCODE_RET: 767 /* XXX: no-op? */ 768 break; 769 770 case TGSI_OPCODE_RCP: 771 src0 = src_vector(p, &inst->Src[0], fs); 772 773 i915_emit_arith(p, 774 A0_RCP, 775 get_result_vector(p, &inst->Dst[0]), 776 get_result_flags(inst), 0, 777 swizzle(src0, X, X, X, X), 0, 0); 778 break; 779 780 case TGSI_OPCODE_RSQ: 781 src0 = src_vector(p, &inst->Src[0], fs); 782 783 i915_emit_arith(p, 784 A0_RSQ, 785 get_result_vector(p, &inst->Dst[0]), 786 get_result_flags(inst), 0, 787 swizzle(src0, X, X, X, X), 0, 0); 788 break; 789 790 case TGSI_OPCODE_SEQ: 791 /* if we're both >= and <= then we're == */ 792 src0 = src_vector(p, &inst->Src[0], fs); 793 src1 = src_vector(p, &inst->Src[1], fs); 794 tmp = i915_get_utemp(p); 795 796 i915_emit_arith(p, 797 A0_SGE, 798 tmp, A0_DEST_CHANNEL_ALL, 0, 799 src0, 800 src1, 0); 801 802 i915_emit_arith(p, 803 A0_SGE, 804 get_result_vector(p, &inst->Dst[0]), 805 A0_DEST_CHANNEL_ALL, 0, 806 src1, 807 src0, 0); 808 809 i915_emit_arith(p, 810 A0_MUL, 811 get_result_vector(p, &inst->Dst[0]), 812 A0_DEST_CHANNEL_ALL, 0, 813 get_result_vector(p, &inst->Dst[0]), 814 tmp, 0); 815 816 break; 817 818 case TGSI_OPCODE_SGE: 819 emit_simple_arith(p, inst, A0_SGE, 2, fs); 820 break; 821 822 case TGSI_OPCODE_SIN: 823 src0 = src_vector(p, &inst->Src[0], fs); 824 tmp = i915_get_utemp(p); 825 826 i915_emit_arith(p, 827 A0_MUL, 828 tmp, A0_DEST_CHANNEL_X, 0, 829 src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0); 830 831 i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); 832 833 /* 834 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 835 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x 836 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x 837 * result = DP4 t1.wzyx, sin_constants 838 */ 839 i915_emit_arith(p, 840 A0_MUL, 841 tmp, A0_DEST_CHANNEL_XY, 0, 842 swizzle(tmp, X, X, ONE, ONE), 843 swizzle(tmp, X, ONE, ONE, ONE), 0); 844 845 i915_emit_arith(p, 846 A0_MUL, 847 tmp, A0_DEST_CHANNEL_ALL, 0, 848 swizzle(tmp, X, Y, X, Y), 849 swizzle(tmp, X, X, ONE, ONE), 0); 850 851 i915_emit_arith(p, 852 A0_MUL, 853 tmp, A0_DEST_CHANNEL_ALL, 0, 854 swizzle(tmp, X, Y, Y, W), 855 swizzle(tmp, X, Z, ONE, ONE), 0); 856 857 i915_emit_arith(p, 858 A0_DP4, 859 get_result_vector(p, &inst->Dst[0]), 860 get_result_flags(inst), 0, 861 swizzle(tmp, W, Z, Y, X), 862 i915_emit_const4fv(p, sin_constants), 0); 863 break; 864 865 case TGSI_OPCODE_SLE: 866 /* like SGE, but swap reg0, reg1 */ 867 emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs); 868 break; 869 870 case TGSI_OPCODE_SLT: 871 emit_simple_arith(p, inst, A0_SLT, 2, fs); 872 break; 873 874 case TGSI_OPCODE_SGT: 875 /* like SLT, but swap reg0, reg1 */ 876 emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs); 877 break; 878 879 case TGSI_OPCODE_SNE: 880 /* if we're < or > then we're != */ 881 src0 = src_vector(p, &inst->Src[0], fs); 882 src1 = src_vector(p, &inst->Src[1], fs); 883 tmp = i915_get_utemp(p); 884 885 i915_emit_arith(p, 886 A0_SLT, 887 tmp, 888 A0_DEST_CHANNEL_ALL, 0, 889 src0, 890 src1, 0); 891 892 i915_emit_arith(p, 893 A0_SLT, 894 get_result_vector(p, &inst->Dst[0]), 895 A0_DEST_CHANNEL_ALL, 0, 896 src1, 897 src0, 0); 898 899 i915_emit_arith(p, 900 A0_ADD, 901 get_result_vector(p, &inst->Dst[0]), 902 A0_DEST_CHANNEL_ALL, 0, 903 get_result_vector(p, &inst->Dst[0]), 904 tmp, 0); 905 break; 906 907 case TGSI_OPCODE_SSG: 908 /* compute (src>0) - (src<0) */ 909 src0 = src_vector(p, &inst->Src[0], fs); 910 tmp = i915_get_utemp(p); 911 912 i915_emit_arith(p, 913 A0_SLT, 914 tmp, 915 A0_DEST_CHANNEL_ALL, 0, 916 src0, 917 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0); 918 919 i915_emit_arith(p, 920 A0_SLT, 921 get_result_vector(p, &inst->Dst[0]), 922 A0_DEST_CHANNEL_ALL, 0, 923 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 924 src0, 0); 925 926 i915_emit_arith(p, 927 A0_ADD, 928 get_result_vector(p, &inst->Dst[0]), 929 A0_DEST_CHANNEL_ALL, 0, 930 get_result_vector(p, &inst->Dst[0]), 931 negate(tmp, 1, 1, 1, 1), 0); 932 break; 933 934 case TGSI_OPCODE_TEX: 935 emit_tex(p, inst, T0_TEXLD, fs); 936 break; 937 938 case TGSI_OPCODE_TRUNC: 939 emit_simple_arith(p, inst, A0_TRC, 1, fs); 940 break; 941 942 case TGSI_OPCODE_TXB: 943 emit_tex(p, inst, T0_TEXLDB, fs); 944 break; 945 946 case TGSI_OPCODE_TXP: 947 emit_tex(p, inst, T0_TEXLDP, fs); 948 break; 949 950 default: 951 i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode); 952 p->error = 1; 953 return; 954 } 955 956 i915_release_utemps(p); 957 } 958 959 960 static void i915_translate_token(struct i915_fp_compile *p, 961 const union i915_full_token *token, 962 struct i915_fragment_shader *fs) 963 { 964 struct i915_fragment_shader *ifs = p->shader; 965 switch( token->Token.Type ) { 966 case TGSI_TOKEN_TYPE_PROPERTY: 967 /* 968 * We only support one cbuf, but we still need to ignore the property 969 * correctly so we don't hit the assert at the end of the switch case. 970 */ 971 assert(token->FullProperty.Property.PropertyName == 972 TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS); 973 break; 974 975 case TGSI_TOKEN_TYPE_DECLARATION: 976 if (token->FullDeclaration.Declaration.File 977 == TGSI_FILE_CONSTANT) { 978 uint i; 979 for (i = token->FullDeclaration.Range.First; 980 i <= MIN2(token->FullDeclaration.Range.Last, I915_MAX_CONSTANT - 1); 981 i++) { 982 assert(ifs->constant_flags[i] == 0x0); 983 ifs->constant_flags[i] = I915_CONSTFLAG_USER; 984 ifs->num_constants = MAX2(ifs->num_constants, i + 1); 985 } 986 } 987 else if (token->FullDeclaration.Declaration.File 988 == TGSI_FILE_TEMPORARY) { 989 uint i; 990 for (i = token->FullDeclaration.Range.First; 991 i <= token->FullDeclaration.Range.Last; 992 i++) { 993 if (i >= I915_MAX_TEMPORARY) 994 debug_printf("Too many temps (%d)\n",i); 995 else 996 /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ 997 p->temp_flag |= (1 << i); /* mark temp as used */ 998 } 999 } 1000 break; 1001 1002 case TGSI_TOKEN_TYPE_IMMEDIATE: 1003 { 1004 const struct tgsi_full_immediate *imm 1005 = &token->FullImmediate; 1006 const uint pos = p->num_immediates++; 1007 uint j; 1008 assert( imm->Immediate.NrTokens <= 4 + 1 ); 1009 for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { 1010 p->immediates[pos][j] = imm->u[j].Float; 1011 } 1012 } 1013 break; 1014 1015 case TGSI_TOKEN_TYPE_INSTRUCTION: 1016 if (p->first_instruction) { 1017 /* resolve location of immediates */ 1018 uint i, j; 1019 for (i = 0; i < p->num_immediates; i++) { 1020 /* find constant slot for this immediate */ 1021 for (j = 0; j < I915_MAX_CONSTANT; j++) { 1022 if (ifs->constant_flags[j] == 0x0) { 1023 memcpy(ifs->constants[j], 1024 p->immediates[i], 1025 4 * sizeof(float)); 1026 /*printf("immediate %d maps to const %d\n", i, j);*/ 1027 ifs->constant_flags[j] = 0xf; /* all four comps used */ 1028 p->immediates_map[i] = j; 1029 ifs->num_constants = MAX2(ifs->num_constants, j + 1); 1030 break; 1031 } 1032 } 1033 } 1034 1035 p->first_instruction = FALSE; 1036 } 1037 1038 i915_translate_instruction(p, &token->FullInstruction, fs); 1039 break; 1040 1041 default: 1042 assert( 0 ); 1043 } 1044 1045 } 1046 1047 /** 1048 * Translate TGSI fragment shader into i915 hardware instructions. 1049 * \param p the translation state 1050 * \param tokens the TGSI token array 1051 */ 1052 static void 1053 i915_translate_instructions(struct i915_fp_compile *p, 1054 const struct i915_token_list *tokens, 1055 struct i915_fragment_shader *fs) 1056 { 1057 int i; 1058 for(i = 0; i<tokens->NumTokens; i++) { 1059 i915_translate_token(p, &tokens->Tokens[i], fs); 1060 } 1061 } 1062 1063 1064 static struct i915_fp_compile * 1065 i915_init_compile(struct i915_context *i915, 1066 struct i915_fragment_shader *ifs) 1067 { 1068 struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); 1069 int i; 1070 1071 p->shader = ifs; 1072 1073 /* Put new constants at end of const buffer, growing downward. 1074 * The problem is we don't know how many user-defined constants might 1075 * be specified with pipe->set_constant_buffer(). 1076 * Should pre-scan the user's program to determine the highest-numbered 1077 * constant referenced. 1078 */ 1079 ifs->num_constants = 0; 1080 memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); 1081 1082 memset(&p->register_phases, 0, sizeof(p->register_phases)); 1083 1084 for (i = 0; i < I915_TEX_UNITS; i++) 1085 ifs->generic_mapping[i] = -1; 1086 1087 p->first_instruction = TRUE; 1088 1089 p->nr_tex_indirect = 1; /* correct? */ 1090 p->nr_tex_insn = 0; 1091 p->nr_alu_insn = 0; 1092 p->nr_decl_insn = 0; 1093 1094 p->csr = p->program; 1095 p->decl = p->declarations; 1096 p->decl_s = 0; 1097 p->decl_t = 0; 1098 p->temp_flag = ~0x0 << I915_MAX_TEMPORARY; 1099 p->utemp_flag = ~0x7; 1100 1101 /* initialize the first program word */ 1102 *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; 1103 1104 return p; 1105 } 1106 1107 1108 /* Copy compile results to the fragment program struct and destroy the 1109 * compilation context. 1110 */ 1111 static void 1112 i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) 1113 { 1114 struct i915_fragment_shader *ifs = p->shader; 1115 unsigned long program_size = (unsigned long) (p->csr - p->program); 1116 unsigned long decl_size = (unsigned long) (p->decl - p->declarations); 1117 1118 if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) 1119 debug_printf("Exceeded max nr indirect texture lookups\n"); 1120 1121 if (p->nr_tex_insn > I915_MAX_TEX_INSN) 1122 i915_program_error(p, "Exceeded max TEX instructions"); 1123 1124 if (p->nr_alu_insn > I915_MAX_ALU_INSN) 1125 i915_program_error(p, "Exceeded max ALU instructions"); 1126 1127 if (p->nr_decl_insn > I915_MAX_DECL_INSN) 1128 i915_program_error(p, "Exceeded max DECL instructions"); 1129 1130 if (p->error) { 1131 p->NumNativeInstructions = 0; 1132 p->NumNativeAluInstructions = 0; 1133 p->NumNativeTexInstructions = 0; 1134 p->NumNativeTexIndirections = 0; 1135 1136 i915_use_passthrough_shader(ifs); 1137 } 1138 else { 1139 p->NumNativeInstructions 1140 = p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn; 1141 p->NumNativeAluInstructions = p->nr_alu_insn; 1142 p->NumNativeTexInstructions = p->nr_tex_insn; 1143 p->NumNativeTexIndirections = p->nr_tex_indirect; 1144 1145 /* patch in the program length */ 1146 p->declarations[0] |= program_size + decl_size - 2; 1147 1148 /* Copy compilation results to fragment program struct: 1149 */ 1150 assert(!ifs->decl); 1151 assert(!ifs->program); 1152 1153 ifs->decl 1154 = (uint *) MALLOC(decl_size * sizeof(uint)); 1155 ifs->program 1156 = (uint *) MALLOC(program_size * sizeof(uint)); 1157 1158 if (ifs->decl) { 1159 ifs->decl_len = decl_size; 1160 1161 memcpy(ifs->decl, 1162 p->declarations, 1163 decl_size * sizeof(uint)); 1164 } 1165 1166 if (ifs->program) { 1167 ifs->program_len = program_size; 1168 1169 memcpy(ifs->program, 1170 p->program, 1171 program_size * sizeof(uint)); 1172 } 1173 } 1174 1175 /* Release the compilation struct: 1176 */ 1177 FREE(p); 1178 } 1179 1180 1181 1182 1183 1184 /** 1185 * Rather than trying to intercept and jiggle depth writes during 1186 * emit, just move the value into its correct position at the end of 1187 * the program: 1188 */ 1189 static void 1190 i915_fixup_depth_write(struct i915_fp_compile *p) 1191 { 1192 /* XXX assuming pos/depth is always in output[0] */ 1193 if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) { 1194 const uint depth = UREG(REG_TYPE_OD, 0); 1195 1196 i915_emit_arith(p, 1197 A0_MOV, /* opcode */ 1198 depth, /* dest reg */ 1199 A0_DEST_CHANNEL_W, /* write mask */ 1200 0, /* saturate? */ 1201 swizzle(depth, X, Y, Z, Z), /* src0 */ 1202 0, 0 /* src1, src2 */); 1203 } 1204 } 1205 1206 1207 void 1208 i915_translate_fragment_program( struct i915_context *i915, 1209 struct i915_fragment_shader *fs) 1210 { 1211 struct i915_fp_compile *p; 1212 const struct tgsi_token *tokens = fs->state.tokens; 1213 struct i915_token_list* i_tokens; 1214 1215 #if 0 1216 tgsi_dump(tokens, 0); 1217 #endif 1218 1219 /* hw doesn't seem to like empty frag programs, even when the depth write 1220 * fixup gets emitted below - may that one is fishy, too? */ 1221 if (fs->info.num_instructions == 1) { 1222 i915_use_passthrough_shader(fs); 1223 1224 return; 1225 } 1226 1227 p = i915_init_compile(i915, fs); 1228 1229 i_tokens = i915_optimize(tokens); 1230 i915_translate_instructions(p, i_tokens, fs); 1231 i915_fixup_depth_write(p); 1232 1233 i915_fini_compile(i915, p); 1234 i915_optimize_free(i_tokens); 1235 1236 #if 0 1237 i915_disassemble_program(NULL, fs->program, fs->program_len); 1238 #endif 1239 } 1240