1 /************************************************************************** 2 * 3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 #include "main/glheader.h" 29 #include "main/macros.h" 30 #include "main/enums.h" 31 32 #include "program/prog_instruction.h" 33 #include "program/prog_parameter.h" 34 #include "program/program.h" 35 #include "program/programopt.h" 36 #include "program/prog_print.h" 37 38 #include "tnl/tnl.h" 39 #include "tnl/t_context.h" 40 41 #include "intel_batchbuffer.h" 42 43 #include "i915_reg.h" 44 #include "i915_context.h" 45 #include "i915_program.h" 46 47 static const GLfloat sin_quad_constants[2][4] = { 48 { 49 2.0, 50 -1.0, 51 .5, 52 .75 53 }, 54 { 55 4.0, 56 -4.0, 57 1.0 / (2.0 * M_PI), 58 .2225 59 } 60 }; 61 62 static const GLfloat sin_constants[4] = { 1.0, 63 -1.0 / (3 * 2 * 1), 64 1.0 / (5 * 4 * 3 * 2 * 1), 65 -1.0 / (7 * 6 * 5 * 4 * 3 * 2 * 1) 66 }; 67 68 /* 1, -1/2!, 1/4!, -1/6! */ 69 static const GLfloat cos_constants[4] = { 1.0, 70 -1.0 / (2 * 1), 71 1.0 / (4 * 3 * 2 * 1), 72 -1.0 / (6 * 5 * 4 * 3 * 2 * 1) 73 }; 74 75 /** 76 * Retrieve a ureg for the given source register. Will emit 77 * constants, apply swizzling and negation as needed. 78 */ 79 static GLuint 80 src_vector(struct i915_fragment_program *p, 81 const struct prog_src_register *source, 82 const struct gl_fragment_program *program) 83 { 84 GLuint src; 85 86 switch (source->File) { 87 88 /* Registers: 89 */ 90 case PROGRAM_TEMPORARY: 91 if (source->Index >= I915_MAX_TEMPORARY) { 92 i915_program_error(p, "Exceeded max temporary reg: %d/%d", 93 source->Index, I915_MAX_TEMPORARY); 94 return 0; 95 } 96 src = UREG(REG_TYPE_R, source->Index); 97 break; 98 case PROGRAM_INPUT: 99 switch (source->Index) { 100 case FRAG_ATTRIB_WPOS: 101 src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL); 102 break; 103 case FRAG_ATTRIB_COL0: 104 src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); 105 break; 106 case FRAG_ATTRIB_COL1: 107 src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); 108 src = swizzle(src, X, Y, Z, ONE); 109 break; 110 case FRAG_ATTRIB_FOGC: 111 src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); 112 src = swizzle(src, W, ZERO, ZERO, ONE); 113 break; 114 case FRAG_ATTRIB_TEX0: 115 case FRAG_ATTRIB_TEX1: 116 case FRAG_ATTRIB_TEX2: 117 case FRAG_ATTRIB_TEX3: 118 case FRAG_ATTRIB_TEX4: 119 case FRAG_ATTRIB_TEX5: 120 case FRAG_ATTRIB_TEX6: 121 case FRAG_ATTRIB_TEX7: 122 src = i915_emit_decl(p, REG_TYPE_T, 123 T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0), 124 D0_CHANNEL_ALL); 125 break; 126 127 case FRAG_ATTRIB_VAR0: 128 case FRAG_ATTRIB_VAR0 + 1: 129 case FRAG_ATTRIB_VAR0 + 2: 130 case FRAG_ATTRIB_VAR0 + 3: 131 case FRAG_ATTRIB_VAR0 + 4: 132 case FRAG_ATTRIB_VAR0 + 5: 133 case FRAG_ATTRIB_VAR0 + 6: 134 case FRAG_ATTRIB_VAR0 + 7: 135 src = i915_emit_decl(p, REG_TYPE_T, 136 T_TEX0 + (source->Index - FRAG_ATTRIB_VAR0), 137 D0_CHANNEL_ALL); 138 break; 139 140 default: 141 i915_program_error(p, "Bad source->Index: %d", source->Index); 142 return 0; 143 } 144 break; 145 146 case PROGRAM_OUTPUT: 147 switch (source->Index) { 148 case FRAG_RESULT_COLOR: 149 src = UREG(REG_TYPE_OC, 0); 150 break; 151 case FRAG_RESULT_DEPTH: 152 src = UREG(REG_TYPE_OD, 0); 153 break; 154 default: 155 i915_program_error(p, "Bad source->Index: %d", source->Index); 156 return 0; 157 } 158 break; 159 160 /* Various paramters and env values. All emitted to 161 * hardware as program constants. 162 */ 163 case PROGRAM_LOCAL_PARAM: 164 src = i915_emit_param4fv(p, program->Base.LocalParams[source->Index]); 165 break; 166 167 case PROGRAM_ENV_PARAM: 168 src = 169 i915_emit_param4fv(p, 170 p->ctx->FragmentProgram.Parameters[source-> 171 Index]); 172 break; 173 174 case PROGRAM_CONSTANT: 175 case PROGRAM_STATE_VAR: 176 case PROGRAM_NAMED_PARAM: 177 case PROGRAM_UNIFORM: 178 src = i915_emit_param4fv(p, 179 &program->Base.Parameters->ParameterValues[source->Index][0].f); 180 break; 181 182 default: 183 i915_program_error(p, "Bad source->File: %d", source->File); 184 return 0; 185 } 186 187 src = swizzle(src, 188 GET_SWZ(source->Swizzle, 0), 189 GET_SWZ(source->Swizzle, 1), 190 GET_SWZ(source->Swizzle, 2), GET_SWZ(source->Swizzle, 3)); 191 192 if (source->Negate) 193 src = negate(src, 194 GET_BIT(source->Negate, 0), 195 GET_BIT(source->Negate, 1), 196 GET_BIT(source->Negate, 2), 197 GET_BIT(source->Negate, 3)); 198 199 return src; 200 } 201 202 203 static GLuint 204 get_result_vector(struct i915_fragment_program *p, 205 const struct prog_instruction *inst) 206 { 207 switch (inst->DstReg.File) { 208 case PROGRAM_OUTPUT: 209 switch (inst->DstReg.Index) { 210 case FRAG_RESULT_COLOR: 211 case FRAG_RESULT_DATA0: 212 return UREG(REG_TYPE_OC, 0); 213 case FRAG_RESULT_DEPTH: 214 p->depth_written = 1; 215 return UREG(REG_TYPE_OD, 0); 216 default: 217 i915_program_error(p, "Bad inst->DstReg.Index: %d", 218 inst->DstReg.Index); 219 return 0; 220 } 221 case PROGRAM_TEMPORARY: 222 return UREG(REG_TYPE_R, inst->DstReg.Index); 223 default: 224 i915_program_error(p, "Bad inst->DstReg.File: %d", inst->DstReg.File); 225 return 0; 226 } 227 } 228 229 static GLuint 230 get_result_flags(const struct prog_instruction *inst) 231 { 232 GLuint flags = 0; 233 234 if (inst->SaturateMode == SATURATE_ZERO_ONE) 235 flags |= A0_DEST_SATURATE; 236 if (inst->DstReg.WriteMask & WRITEMASK_X) 237 flags |= A0_DEST_CHANNEL_X; 238 if (inst->DstReg.WriteMask & WRITEMASK_Y) 239 flags |= A0_DEST_CHANNEL_Y; 240 if (inst->DstReg.WriteMask & WRITEMASK_Z) 241 flags |= A0_DEST_CHANNEL_Z; 242 if (inst->DstReg.WriteMask & WRITEMASK_W) 243 flags |= A0_DEST_CHANNEL_W; 244 245 return flags; 246 } 247 248 static GLuint 249 translate_tex_src_target(struct i915_fragment_program *p, GLubyte bit) 250 { 251 switch (bit) { 252 case TEXTURE_1D_INDEX: 253 return D0_SAMPLE_TYPE_2D; 254 case TEXTURE_2D_INDEX: 255 return D0_SAMPLE_TYPE_2D; 256 case TEXTURE_RECT_INDEX: 257 return D0_SAMPLE_TYPE_2D; 258 case TEXTURE_3D_INDEX: 259 return D0_SAMPLE_TYPE_VOLUME; 260 case TEXTURE_CUBE_INDEX: 261 return D0_SAMPLE_TYPE_CUBE; 262 default: 263 i915_program_error(p, "TexSrcBit: %d", bit); 264 return 0; 265 } 266 } 267 268 #define EMIT_TEX( OP ) \ 269 do { \ 270 GLuint dim = translate_tex_src_target( p, inst->TexSrcTarget ); \ 271 const struct gl_fragment_program *program = &p->FragProg; \ 272 GLuint unit = program->Base.SamplerUnits[inst->TexSrcUnit]; \ 273 GLuint sampler = i915_emit_decl(p, REG_TYPE_S, \ 274 unit, dim); \ 275 GLuint coord = src_vector( p, &inst->SrcReg[0], program); \ 276 /* Texel lookup */ \ 277 \ 278 i915_emit_texld( p, get_live_regs(p, inst), \ 279 get_result_vector( p, inst ), \ 280 get_result_flags( inst ), \ 281 sampler, \ 282 coord, \ 283 OP); \ 284 } while (0) 285 286 #define EMIT_ARITH( OP, N ) \ 287 do { \ 288 i915_emit_arith( p, \ 289 OP, \ 290 get_result_vector( p, inst ), \ 291 get_result_flags( inst ), 0, \ 292 (N<1)?0:src_vector( p, &inst->SrcReg[0], program), \ 293 (N<2)?0:src_vector( p, &inst->SrcReg[1], program), \ 294 (N<3)?0:src_vector( p, &inst->SrcReg[2], program)); \ 295 } while (0) 296 297 #define EMIT_1ARG_ARITH( OP ) EMIT_ARITH( OP, 1 ) 298 #define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 ) 299 #define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 ) 300 301 /* 302 * TODO: consider moving this into core 303 */ 304 static bool calc_live_regs( struct i915_fragment_program *p ) 305 { 306 const struct gl_fragment_program *program = &p->FragProg; 307 GLuint regsUsed = ~((1 << I915_MAX_TEMPORARY) - 1); 308 uint8_t live_components[I915_MAX_TEMPORARY] = { 0, }; 309 GLint i; 310 311 for (i = program->Base.NumInstructions - 1; i >= 0; i--) { 312 struct prog_instruction *inst = &program->Base.Instructions[i]; 313 int opArgs = _mesa_num_inst_src_regs(inst->Opcode); 314 int a; 315 316 /* Register is written to: unmark as live for this and preceeding ops */ 317 if (inst->DstReg.File == PROGRAM_TEMPORARY) { 318 if (inst->DstReg.Index >= I915_MAX_TEMPORARY) 319 return false; 320 321 live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask; 322 if (live_components[inst->DstReg.Index] == 0) 323 regsUsed &= ~(1 << inst->DstReg.Index); 324 } 325 326 for (a = 0; a < opArgs; a++) { 327 /* Register is read from: mark as live for this and preceeding ops */ 328 if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) { 329 unsigned c; 330 331 if (inst->SrcReg[a].Index >= I915_MAX_TEMPORARY) 332 return false; 333 334 regsUsed |= 1 << inst->SrcReg[a].Index; 335 336 for (c = 0; c < 4; c++) { 337 const unsigned field = GET_SWZ(inst->SrcReg[a].Swizzle, c); 338 339 if (field <= SWIZZLE_W) 340 live_components[inst->SrcReg[a].Index] |= (1U << field); 341 } 342 } 343 } 344 345 p->usedRegs[i] = regsUsed; 346 } 347 348 return true; 349 } 350 351 static GLuint get_live_regs( struct i915_fragment_program *p, 352 const struct prog_instruction *inst ) 353 { 354 const struct gl_fragment_program *program = &p->FragProg; 355 GLuint nr = inst - program->Base.Instructions; 356 357 return p->usedRegs[nr]; 358 } 359 360 361 /* Possible concerns: 362 * 363 * SIN, COS -- could use another taylor step? 364 * LIT -- results seem a little different to sw mesa 365 * LOG -- different to mesa on negative numbers, but this is conformant. 366 * 367 * Parse failures -- Mesa doesn't currently give a good indication 368 * internally whether a particular program string parsed or not. This 369 * can lead to confusion -- hopefully we cope with it ok now. 370 * 371 */ 372 static void 373 upload_program(struct i915_fragment_program *p) 374 { 375 const struct gl_fragment_program *program = &p->FragProg; 376 const struct prog_instruction *inst = program->Base.Instructions; 377 378 if (INTEL_DEBUG & DEBUG_WM) 379 _mesa_print_program(&program->Base); 380 381 /* Is this a parse-failed program? Ensure a valid program is 382 * loaded, as the flagging of an error isn't sufficient to stop 383 * this being uploaded to hardware. 384 */ 385 if (inst[0].Opcode == OPCODE_END) { 386 GLuint tmp = i915_get_utemp(p); 387 i915_emit_arith(p, 388 A0_MOV, 389 UREG(REG_TYPE_OC, 0), 390 A0_DEST_CHANNEL_ALL, 0, 391 swizzle(tmp, ONE, ZERO, ONE, ONE), 0, 0); 392 return; 393 } 394 395 if (program->Base.NumInstructions > I915_MAX_INSN) { 396 i915_program_error(p, "Exceeded max instructions (%d out of %d)", 397 program->Base.NumInstructions, I915_MAX_INSN); 398 return; 399 } 400 401 /* Not always needed: 402 */ 403 if (!calc_live_regs(p)) { 404 i915_program_error(p, "Could not allocate registers"); 405 return; 406 } 407 408 while (1) { 409 GLuint src0, src1, src2, flags; 410 GLuint tmp = 0, dst, consts0 = 0, consts1 = 0; 411 412 switch (inst->Opcode) { 413 case OPCODE_ABS: 414 src0 = src_vector(p, &inst->SrcReg[0], program); 415 i915_emit_arith(p, 416 A0_MAX, 417 get_result_vector(p, inst), 418 get_result_flags(inst), 0, 419 src0, negate(src0, 1, 1, 1, 1), 0); 420 break; 421 422 case OPCODE_ADD: 423 EMIT_2ARG_ARITH(A0_ADD); 424 break; 425 426 case OPCODE_CMP: 427 src0 = src_vector(p, &inst->SrcReg[0], program); 428 src1 = src_vector(p, &inst->SrcReg[1], program); 429 src2 = src_vector(p, &inst->SrcReg[2], program); 430 i915_emit_arith(p, A0_CMP, get_result_vector(p, inst), get_result_flags(inst), 0, src0, src2, src1); /* NOTE: order of src2, src1 */ 431 break; 432 433 case OPCODE_COS: 434 src0 = src_vector(p, &inst->SrcReg[0], program); 435 tmp = i915_get_utemp(p); 436 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]); 437 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]); 438 439 /* Reduce range from repeating about [-pi,pi] to [-1,1] */ 440 i915_emit_arith(p, 441 A0_MAD, 442 tmp, A0_DEST_CHANNEL_X, 0, 443 src0, 444 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */ 445 swizzle(consts0, W, ZERO, ZERO, ZERO)); /* .75 */ 446 447 i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); 448 449 i915_emit_arith(p, 450 A0_MAD, 451 tmp, A0_DEST_CHANNEL_X, 0, 452 tmp, 453 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */ 454 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */ 455 456 /* Compute COS with the same calculation used for SIN, but a 457 * different source range has been mapped to [-1,1] this time. 458 */ 459 460 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */ 461 i915_emit_arith(p, 462 A0_MAX, 463 tmp, A0_DEST_CHANNEL_Y, 0, 464 swizzle(tmp, ZERO, X, ZERO, ZERO), 465 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 466 0); 467 468 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */ 469 i915_emit_arith(p, 470 A0_MUL, 471 tmp, A0_DEST_CHANNEL_Y, 0, 472 swizzle(tmp, ZERO, X, ZERO, ZERO), 473 tmp, 474 0); 475 476 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */ 477 i915_emit_arith(p, 478 A0_DP3, 479 tmp, A0_DEST_CHANNEL_X, 0, 480 tmp, 481 swizzle(consts1, X, Y, ZERO, ZERO), 482 0); 483 484 /* tmp.x now contains a first approximation (y). Now, weight it 485 * against tmp.y**2 to get closer. 486 */ 487 i915_emit_arith(p, 488 A0_MAX, 489 tmp, A0_DEST_CHANNEL_Y, 0, 490 swizzle(tmp, ZERO, X, ZERO, ZERO), 491 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 492 0); 493 494 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */ 495 i915_emit_arith(p, 496 A0_MAD, 497 tmp, A0_DEST_CHANNEL_Y, 0, 498 swizzle(tmp, ZERO, X, ZERO, ZERO), 499 swizzle(tmp, ZERO, Y, ZERO, ZERO), 500 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0)); 501 502 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */ 503 i915_emit_arith(p, 504 A0_MAD, 505 get_result_vector(p, inst), 506 get_result_flags(inst), 0, 507 swizzle(consts1, W, W, W, W), 508 swizzle(tmp, Y, Y, Y, Y), 509 swizzle(tmp, X, X, X, X)); 510 break; 511 512 case OPCODE_DP2: 513 src0 = src_vector(p, &inst->SrcReg[0], program); 514 src1 = src_vector(p, &inst->SrcReg[1], program); 515 i915_emit_arith(p, 516 A0_DP3, 517 get_result_vector(p, inst), 518 get_result_flags(inst), 0, 519 swizzle(src0, X, Y, ZERO, ZERO), 520 swizzle(src1, X, Y, ZERO, ZERO), 521 0); 522 break; 523 524 case OPCODE_DP3: 525 EMIT_2ARG_ARITH(A0_DP3); 526 break; 527 528 case OPCODE_DP4: 529 EMIT_2ARG_ARITH(A0_DP4); 530 break; 531 532 case OPCODE_DPH: 533 src0 = src_vector(p, &inst->SrcReg[0], program); 534 src1 = src_vector(p, &inst->SrcReg[1], program); 535 536 i915_emit_arith(p, 537 A0_DP4, 538 get_result_vector(p, inst), 539 get_result_flags(inst), 0, 540 swizzle(src0, X, Y, Z, ONE), src1, 0); 541 break; 542 543 case OPCODE_DST: 544 src0 = src_vector(p, &inst->SrcReg[0], program); 545 src1 = src_vector(p, &inst->SrcReg[1], program); 546 547 /* result[0] = 1 * 1; 548 * result[1] = a[1] * b[1]; 549 * result[2] = a[2] * 1; 550 * result[3] = 1 * b[3]; 551 */ 552 i915_emit_arith(p, 553 A0_MUL, 554 get_result_vector(p, inst), 555 get_result_flags(inst), 0, 556 swizzle(src0, ONE, Y, Z, ONE), 557 swizzle(src1, ONE, Y, ONE, W), 0); 558 break; 559 560 case OPCODE_EX2: 561 src0 = src_vector(p, &inst->SrcReg[0], program); 562 563 i915_emit_arith(p, 564 A0_EXP, 565 get_result_vector(p, inst), 566 get_result_flags(inst), 0, 567 swizzle(src0, X, X, X, X), 0, 0); 568 break; 569 570 case OPCODE_FLR: 571 EMIT_1ARG_ARITH(A0_FLR); 572 break; 573 574 case OPCODE_TRUNC: 575 EMIT_1ARG_ARITH(A0_TRC); 576 break; 577 578 case OPCODE_FRC: 579 EMIT_1ARG_ARITH(A0_FRC); 580 break; 581 582 case OPCODE_KIL: 583 src0 = src_vector(p, &inst->SrcReg[0], program); 584 tmp = i915_get_utemp(p); 585 586 i915_emit_texld(p, get_live_regs(p, inst), 587 tmp, A0_DEST_CHANNEL_ALL, /* use a dummy dest reg */ 588 0, src0, T0_TEXKILL); 589 break; 590 591 case OPCODE_KIL_NV: 592 if (inst->DstReg.CondMask == COND_TR) { 593 tmp = i915_get_utemp(p); 594 595 /* The KIL instruction discards the fragment if any component of 596 * the source is < 0. Emit an immediate operand of {-1}.xywz. 597 */ 598 i915_emit_texld(p, get_live_regs(p, inst), 599 tmp, A0_DEST_CHANNEL_ALL, 600 0, /* use a dummy dest reg */ 601 negate(swizzle(tmp, ONE, ONE, ONE, ONE), 602 1, 1, 1, 1), 603 T0_TEXKILL); 604 } else { 605 p->error = 1; 606 i915_program_error(p, "Unsupported KIL_NV condition code: %d", 607 inst->DstReg.CondMask); 608 } 609 break; 610 611 case OPCODE_LG2: 612 src0 = src_vector(p, &inst->SrcReg[0], program); 613 614 i915_emit_arith(p, 615 A0_LOG, 616 get_result_vector(p, inst), 617 get_result_flags(inst), 0, 618 swizzle(src0, X, X, X, X), 0, 0); 619 break; 620 621 case OPCODE_LIT: 622 src0 = src_vector(p, &inst->SrcReg[0], program); 623 tmp = i915_get_utemp(p); 624 625 /* tmp = max( a.xyzw, a.00zw ) 626 * XXX: Clamp tmp.w to -128..128 627 * tmp.y = log(tmp.y) 628 * tmp.y = tmp.w * tmp.y 629 * tmp.y = exp(tmp.y) 630 * result = cmp (a.11-x1, a.1x01, a.1xy1 ) 631 */ 632 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, 633 src0, swizzle(src0, ZERO, ZERO, Z, W), 0); 634 635 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, 636 swizzle(tmp, Y, Y, Y, Y), 0, 0); 637 638 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, 639 swizzle(tmp, ZERO, Y, ZERO, ZERO), 640 swizzle(tmp, ZERO, W, ZERO, ZERO), 0); 641 642 i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, 643 swizzle(tmp, Y, Y, Y, Y), 0, 0); 644 645 i915_emit_arith(p, A0_CMP, 646 get_result_vector(p, inst), 647 get_result_flags(inst), 0, 648 negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), 649 swizzle(tmp, ONE, X, ZERO, ONE), 650 swizzle(tmp, ONE, X, Y, ONE)); 651 652 break; 653 654 case OPCODE_LRP: 655 src0 = src_vector(p, &inst->SrcReg[0], program); 656 src1 = src_vector(p, &inst->SrcReg[1], program); 657 src2 = src_vector(p, &inst->SrcReg[2], program); 658 flags = get_result_flags(inst); 659 tmp = i915_get_utemp(p); 660 661 /* b*a + c*(1-a) 662 * 663 * b*a + c - ca 664 * 665 * tmp = b*a + c, 666 * result = (-c)*a + tmp 667 */ 668 i915_emit_arith(p, A0_MAD, tmp, 669 flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2); 670 671 i915_emit_arith(p, A0_MAD, 672 get_result_vector(p, inst), 673 flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp); 674 break; 675 676 case OPCODE_MAD: 677 EMIT_3ARG_ARITH(A0_MAD); 678 break; 679 680 case OPCODE_MAX: 681 EMIT_2ARG_ARITH(A0_MAX); 682 break; 683 684 case OPCODE_MIN: 685 src0 = src_vector(p, &inst->SrcReg[0], program); 686 src1 = src_vector(p, &inst->SrcReg[1], program); 687 tmp = i915_get_utemp(p); 688 flags = get_result_flags(inst); 689 690 i915_emit_arith(p, 691 A0_MAX, 692 tmp, flags & A0_DEST_CHANNEL_ALL, 0, 693 negate(src0, 1, 1, 1, 1), 694 negate(src1, 1, 1, 1, 1), 0); 695 696 i915_emit_arith(p, 697 A0_MOV, 698 get_result_vector(p, inst), 699 flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0); 700 break; 701 702 case OPCODE_MOV: 703 EMIT_1ARG_ARITH(A0_MOV); 704 break; 705 706 case OPCODE_MUL: 707 EMIT_2ARG_ARITH(A0_MUL); 708 break; 709 710 case OPCODE_POW: 711 src0 = src_vector(p, &inst->SrcReg[0], program); 712 src1 = src_vector(p, &inst->SrcReg[1], program); 713 tmp = i915_get_utemp(p); 714 flags = get_result_flags(inst); 715 716 /* XXX: masking on intermediate values, here and elsewhere. 717 */ 718 i915_emit_arith(p, 719 A0_LOG, 720 tmp, A0_DEST_CHANNEL_X, 0, 721 swizzle(src0, X, X, X, X), 0, 0); 722 723 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); 724 725 726 i915_emit_arith(p, 727 A0_EXP, 728 get_result_vector(p, inst), 729 flags, 0, swizzle(tmp, X, X, X, X), 0, 0); 730 731 break; 732 733 case OPCODE_RCP: 734 src0 = src_vector(p, &inst->SrcReg[0], program); 735 736 i915_emit_arith(p, 737 A0_RCP, 738 get_result_vector(p, inst), 739 get_result_flags(inst), 0, 740 swizzle(src0, X, X, X, X), 0, 0); 741 break; 742 743 case OPCODE_RSQ: 744 745 src0 = src_vector(p, &inst->SrcReg[0], program); 746 747 i915_emit_arith(p, 748 A0_RSQ, 749 get_result_vector(p, inst), 750 get_result_flags(inst), 0, 751 swizzle(src0, X, X, X, X), 0, 0); 752 break; 753 754 case OPCODE_SCS: 755 src0 = src_vector(p, &inst->SrcReg[0], program); 756 tmp = i915_get_utemp(p); 757 758 /* 759 * t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1 760 * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x 761 * t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x 762 * scs.x = DP4 t1, sin_constants 763 * t1 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1 764 * scs.y = DP4 t1, cos_constants 765 */ 766 i915_emit_arith(p, 767 A0_MUL, 768 tmp, A0_DEST_CHANNEL_XY, 0, 769 swizzle(src0, X, X, ONE, ONE), 770 swizzle(src0, X, ONE, ONE, ONE), 0); 771 772 i915_emit_arith(p, 773 A0_MUL, 774 tmp, A0_DEST_CHANNEL_ALL, 0, 775 swizzle(tmp, X, Y, X, Y), 776 swizzle(tmp, X, X, ONE, ONE), 0); 777 778 if (inst->DstReg.WriteMask & WRITEMASK_Y) { 779 GLuint tmp1; 780 781 if (inst->DstReg.WriteMask & WRITEMASK_X) 782 tmp1 = i915_get_utemp(p); 783 else 784 tmp1 = tmp; 785 786 i915_emit_arith(p, 787 A0_MUL, 788 tmp1, A0_DEST_CHANNEL_ALL, 0, 789 swizzle(tmp, X, Y, Y, W), 790 swizzle(tmp, X, Z, ONE, ONE), 0); 791 792 i915_emit_arith(p, 793 A0_DP4, 794 get_result_vector(p, inst), 795 A0_DEST_CHANNEL_Y, 0, 796 swizzle(tmp1, W, Z, Y, X), 797 i915_emit_const4fv(p, sin_constants), 0); 798 } 799 800 if (inst->DstReg.WriteMask & WRITEMASK_X) { 801 i915_emit_arith(p, 802 A0_MUL, 803 tmp, A0_DEST_CHANNEL_XYZ, 0, 804 swizzle(tmp, X, X, Z, ONE), 805 swizzle(tmp, Z, ONE, ONE, ONE), 0); 806 807 i915_emit_arith(p, 808 A0_DP4, 809 get_result_vector(p, inst), 810 A0_DEST_CHANNEL_X, 0, 811 swizzle(tmp, ONE, Z, Y, X), 812 i915_emit_const4fv(p, cos_constants), 0); 813 } 814 break; 815 816 case OPCODE_SEQ: 817 tmp = i915_get_utemp(p); 818 flags = get_result_flags(inst); 819 dst = get_result_vector(p, inst); 820 821 /* tmp = src1 >= src2 */ 822 i915_emit_arith(p, 823 A0_SGE, 824 tmp, 825 flags, 0, 826 src_vector(p, &inst->SrcReg[0], program), 827 src_vector(p, &inst->SrcReg[1], program), 828 0); 829 /* dst = src1 <= src2 */ 830 i915_emit_arith(p, 831 A0_SGE, 832 dst, 833 flags, 0, 834 negate(src_vector(p, &inst->SrcReg[0], program), 835 1, 1, 1, 1), 836 negate(src_vector(p, &inst->SrcReg[1], program), 837 1, 1, 1, 1), 838 0); 839 /* dst = tmp && dst */ 840 i915_emit_arith(p, 841 A0_MUL, 842 dst, 843 flags, 0, 844 dst, 845 tmp, 846 0); 847 break; 848 849 case OPCODE_SIN: 850 src0 = src_vector(p, &inst->SrcReg[0], program); 851 tmp = i915_get_utemp(p); 852 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]); 853 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]); 854 855 /* Reduce range from repeating about [-pi,pi] to [-1,1] */ 856 i915_emit_arith(p, 857 A0_MAD, 858 tmp, A0_DEST_CHANNEL_X, 0, 859 src0, 860 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */ 861 swizzle(consts0, Z, ZERO, ZERO, ZERO)); /* .5 */ 862 863 i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0); 864 865 i915_emit_arith(p, 866 A0_MAD, 867 tmp, A0_DEST_CHANNEL_X, 0, 868 tmp, 869 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */ 870 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */ 871 872 /* Compute sin using a quadratic and quartic. It gives continuity 873 * that repeating the Taylor series lacks every 2*pi, and has 874 * reduced error. 875 * 876 * The idea was described at: 877 * http://www.devmaster.net/forums/showthread.php?t=5784 878 */ 879 880 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */ 881 i915_emit_arith(p, 882 A0_MAX, 883 tmp, A0_DEST_CHANNEL_Y, 0, 884 swizzle(tmp, ZERO, X, ZERO, ZERO), 885 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 886 0); 887 888 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */ 889 i915_emit_arith(p, 890 A0_MUL, 891 tmp, A0_DEST_CHANNEL_Y, 0, 892 swizzle(tmp, ZERO, X, ZERO, ZERO), 893 tmp, 894 0); 895 896 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */ 897 i915_emit_arith(p, 898 A0_DP3, 899 tmp, A0_DEST_CHANNEL_X, 0, 900 tmp, 901 swizzle(consts1, X, Y, ZERO, ZERO), 902 0); 903 904 /* tmp.x now contains a first approximation (y). Now, weight it 905 * against tmp.y**2 to get closer. 906 */ 907 i915_emit_arith(p, 908 A0_MAX, 909 tmp, A0_DEST_CHANNEL_Y, 0, 910 swizzle(tmp, ZERO, X, ZERO, ZERO), 911 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0), 912 0); 913 914 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */ 915 i915_emit_arith(p, 916 A0_MAD, 917 tmp, A0_DEST_CHANNEL_Y, 0, 918 swizzle(tmp, ZERO, X, ZERO, ZERO), 919 swizzle(tmp, ZERO, Y, ZERO, ZERO), 920 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0)); 921 922 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */ 923 i915_emit_arith(p, 924 A0_MAD, 925 get_result_vector(p, inst), 926 get_result_flags(inst), 0, 927 swizzle(consts1, W, W, W, W), 928 swizzle(tmp, Y, Y, Y, Y), 929 swizzle(tmp, X, X, X, X)); 930 931 break; 932 933 case OPCODE_SGE: 934 EMIT_2ARG_ARITH(A0_SGE); 935 break; 936 937 case OPCODE_SGT: 938 i915_emit_arith(p, 939 A0_SLT, 940 get_result_vector( p, inst ), 941 get_result_flags( inst ), 0, 942 negate(src_vector( p, &inst->SrcReg[0], program), 943 1, 1, 1, 1), 944 negate(src_vector( p, &inst->SrcReg[1], program), 945 1, 1, 1, 1), 946 0); 947 break; 948 949 case OPCODE_SLE: 950 i915_emit_arith(p, 951 A0_SGE, 952 get_result_vector( p, inst ), 953 get_result_flags( inst ), 0, 954 negate(src_vector( p, &inst->SrcReg[0], program), 955 1, 1, 1, 1), 956 negate(src_vector( p, &inst->SrcReg[1], program), 957 1, 1, 1, 1), 958 0); 959 break; 960 961 case OPCODE_SLT: 962 EMIT_2ARG_ARITH(A0_SLT); 963 break; 964 965 case OPCODE_SNE: 966 tmp = i915_get_utemp(p); 967 flags = get_result_flags(inst); 968 dst = get_result_vector(p, inst); 969 970 /* tmp = src1 < src2 */ 971 i915_emit_arith(p, 972 A0_SLT, 973 tmp, 974 flags, 0, 975 src_vector(p, &inst->SrcReg[0], program), 976 src_vector(p, &inst->SrcReg[1], program), 977 0); 978 /* dst = src1 > src2 */ 979 i915_emit_arith(p, 980 A0_SLT, 981 dst, 982 flags, 0, 983 negate(src_vector(p, &inst->SrcReg[0], program), 984 1, 1, 1, 1), 985 negate(src_vector(p, &inst->SrcReg[1], program), 986 1, 1, 1, 1), 987 0); 988 /* dst = tmp || dst */ 989 i915_emit_arith(p, 990 A0_ADD, 991 dst, 992 flags | A0_DEST_SATURATE, 0, 993 dst, 994 tmp, 995 0); 996 break; 997 998 case OPCODE_SSG: 999 dst = get_result_vector(p, inst); 1000 flags = get_result_flags(inst); 1001 src0 = src_vector(p, &inst->SrcReg[0], program); 1002 tmp = i915_get_utemp(p); 1003 1004 /* tmp = (src < 0.0) */ 1005 i915_emit_arith(p, 1006 A0_SLT, 1007 tmp, 1008 flags, 0, 1009 src0, 1010 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 1011 0); 1012 1013 /* dst = (0.0 < src) */ 1014 i915_emit_arith(p, 1015 A0_SLT, 1016 dst, 1017 flags, 0, 1018 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 1019 src0, 1020 0); 1021 1022 /* dst = (src > 0.0) - (src < 0.0) */ 1023 i915_emit_arith(p, 1024 A0_ADD, 1025 dst, 1026 flags, 0, 1027 dst, 1028 negate(tmp, 1, 1, 1, 1), 1029 0); 1030 1031 break; 1032 1033 case OPCODE_SUB: 1034 src0 = src_vector(p, &inst->SrcReg[0], program); 1035 src1 = src_vector(p, &inst->SrcReg[1], program); 1036 1037 i915_emit_arith(p, 1038 A0_ADD, 1039 get_result_vector(p, inst), 1040 get_result_flags(inst), 0, 1041 src0, negate(src1, 1, 1, 1, 1), 0); 1042 break; 1043 1044 case OPCODE_SWZ: 1045 EMIT_1ARG_ARITH(A0_MOV); /* extended swizzle handled natively */ 1046 break; 1047 1048 case OPCODE_TEX: 1049 EMIT_TEX(T0_TEXLD); 1050 break; 1051 1052 case OPCODE_TXB: 1053 EMIT_TEX(T0_TEXLDB); 1054 break; 1055 1056 case OPCODE_TXP: 1057 EMIT_TEX(T0_TEXLDP); 1058 break; 1059 1060 case OPCODE_XPD: 1061 /* Cross product: 1062 * result.x = src0.y * src1.z - src0.z * src1.y; 1063 * result.y = src0.z * src1.x - src0.x * src1.z; 1064 * result.z = src0.x * src1.y - src0.y * src1.x; 1065 * result.w = undef; 1066 */ 1067 src0 = src_vector(p, &inst->SrcReg[0], program); 1068 src1 = src_vector(p, &inst->SrcReg[1], program); 1069 tmp = i915_get_utemp(p); 1070 1071 i915_emit_arith(p, 1072 A0_MUL, 1073 tmp, A0_DEST_CHANNEL_ALL, 0, 1074 swizzle(src0, Z, X, Y, ONE), 1075 swizzle(src1, Y, Z, X, ONE), 0); 1076 1077 i915_emit_arith(p, 1078 A0_MAD, 1079 get_result_vector(p, inst), 1080 get_result_flags(inst), 0, 1081 swizzle(src0, Y, Z, X, ONE), 1082 swizzle(src1, Z, X, Y, ONE), 1083 negate(tmp, 1, 1, 1, 0)); 1084 break; 1085 1086 case OPCODE_END: 1087 return; 1088 1089 case OPCODE_BGNLOOP: 1090 case OPCODE_BGNSUB: 1091 case OPCODE_BRA: 1092 case OPCODE_BRK: 1093 case OPCODE_CAL: 1094 case OPCODE_CONT: 1095 case OPCODE_DDX: 1096 case OPCODE_DDY: 1097 case OPCODE_ELSE: 1098 case OPCODE_ENDIF: 1099 case OPCODE_ENDLOOP: 1100 case OPCODE_ENDSUB: 1101 case OPCODE_IF: 1102 case OPCODE_RET: 1103 p->error = 1; 1104 i915_program_error(p, "Unsupported opcode: %s", 1105 _mesa_opcode_string(inst->Opcode)); 1106 return; 1107 1108 case OPCODE_EXP: 1109 case OPCODE_LOG: 1110 /* These opcodes are claimed as GLSL, NV_vp, and ARB_vp in 1111 * prog_instruction.h, but apparently GLSL doesn't ever emit them. 1112 * Instead, it translates to EX2 or LG2. 1113 */ 1114 case OPCODE_TXD: 1115 case OPCODE_TXL: 1116 /* These opcodes are claimed by GLSL in prog_instruction.h, but 1117 * only NV_vp/fp appears to emit them. 1118 */ 1119 default: 1120 i915_program_error(p, "bad opcode: %s", 1121 _mesa_opcode_string(inst->Opcode)); 1122 return; 1123 } 1124 1125 inst++; 1126 i915_release_utemps(p); 1127 } 1128 } 1129 1130 /* Rather than trying to intercept and jiggle depth writes during 1131 * emit, just move the value into its correct position at the end of 1132 * the program: 1133 */ 1134 static void 1135 fixup_depth_write(struct i915_fragment_program *p) 1136 { 1137 if (p->depth_written) { 1138 GLuint depth = UREG(REG_TYPE_OD, 0); 1139 1140 i915_emit_arith(p, 1141 A0_MOV, 1142 depth, A0_DEST_CHANNEL_W, 0, 1143 swizzle(depth, X, Y, Z, Z), 0, 0); 1144 } 1145 } 1146 1147 1148 static void 1149 check_wpos(struct i915_fragment_program *p) 1150 { 1151 GLbitfield64 inputs = p->FragProg.Base.InputsRead; 1152 GLint i; 1153 1154 p->wpos_tex = -1; 1155 1156 for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { 1157 if (inputs & (FRAG_BIT_TEX(i) | FRAG_BIT_VAR(i))) 1158 continue; 1159 else if (inputs & FRAG_BIT_WPOS) { 1160 p->wpos_tex = i; 1161 inputs &= ~FRAG_BIT_WPOS; 1162 } 1163 } 1164 1165 if (inputs & FRAG_BIT_WPOS) { 1166 i915_program_error(p, "No free texcoord for wpos value"); 1167 } 1168 } 1169 1170 1171 static void 1172 translate_program(struct i915_fragment_program *p) 1173 { 1174 struct i915_context *i915 = I915_CONTEXT(p->ctx); 1175 1176 if (INTEL_DEBUG & DEBUG_WM) { 1177 printf("fp:\n"); 1178 _mesa_print_program(&p->FragProg.Base); 1179 printf("\n"); 1180 } 1181 1182 i915_init_program(i915, p); 1183 check_wpos(p); 1184 upload_program(p); 1185 fixup_depth_write(p); 1186 i915_fini_program(p); 1187 1188 p->translated = 1; 1189 } 1190 1191 1192 static void 1193 track_params(struct i915_fragment_program *p) 1194 { 1195 GLint i; 1196 1197 if (p->nr_params) 1198 _mesa_load_state_parameters(p->ctx, p->FragProg.Base.Parameters); 1199 1200 for (i = 0; i < p->nr_params; i++) { 1201 GLint reg = p->param[i].reg; 1202 COPY_4V(p->constant[reg], p->param[i].values); 1203 } 1204 1205 p->params_uptodate = 1; 1206 p->on_hardware = 0; /* overkill */ 1207 } 1208 1209 1210 static void 1211 i915BindProgram(struct gl_context * ctx, GLenum target, struct gl_program *prog) 1212 { 1213 if (target == GL_FRAGMENT_PROGRAM_ARB) { 1214 struct i915_context *i915 = I915_CONTEXT(ctx); 1215 struct i915_fragment_program *p = (struct i915_fragment_program *) prog; 1216 1217 if (i915->current_program == p) 1218 return; 1219 1220 if (i915->current_program) { 1221 i915->current_program->on_hardware = 0; 1222 i915->current_program->params_uptodate = 0; 1223 } 1224 1225 i915->current_program = p; 1226 1227 assert(p->on_hardware == 0); 1228 assert(p->params_uptodate == 0); 1229 1230 } 1231 } 1232 1233 static struct gl_program * 1234 i915NewProgram(struct gl_context * ctx, GLenum target, GLuint id) 1235 { 1236 switch (target) { 1237 case GL_VERTEX_PROGRAM_ARB: 1238 return _mesa_init_vertex_program(ctx, CALLOC_STRUCT(gl_vertex_program), 1239 target, id); 1240 1241 case GL_FRAGMENT_PROGRAM_ARB:{ 1242 struct i915_fragment_program *prog = 1243 CALLOC_STRUCT(i915_fragment_program); 1244 if (prog) { 1245 i915_init_program(I915_CONTEXT(ctx), prog); 1246 1247 return _mesa_init_fragment_program(ctx, &prog->FragProg, 1248 target, id); 1249 } 1250 else 1251 return NULL; 1252 } 1253 1254 default: 1255 /* Just fallback: 1256 */ 1257 return _mesa_new_program(ctx, target, id); 1258 } 1259 } 1260 1261 static void 1262 i915DeleteProgram(struct gl_context * ctx, struct gl_program *prog) 1263 { 1264 if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1265 struct i915_context *i915 = I915_CONTEXT(ctx); 1266 struct i915_fragment_program *p = (struct i915_fragment_program *) prog; 1267 1268 if (i915->current_program == p) 1269 i915->current_program = 0; 1270 } 1271 1272 _mesa_delete_program(ctx, prog); 1273 } 1274 1275 1276 static GLboolean 1277 i915IsProgramNative(struct gl_context * ctx, GLenum target, struct gl_program *prog) 1278 { 1279 if (target == GL_FRAGMENT_PROGRAM_ARB) { 1280 struct i915_fragment_program *p = (struct i915_fragment_program *) prog; 1281 1282 if (!p->translated) 1283 translate_program(p); 1284 1285 return !p->error; 1286 } 1287 else 1288 return true; 1289 } 1290 1291 static GLboolean 1292 i915ProgramStringNotify(struct gl_context * ctx, 1293 GLenum target, struct gl_program *prog) 1294 { 1295 if (target == GL_FRAGMENT_PROGRAM_ARB) { 1296 struct i915_fragment_program *p = (struct i915_fragment_program *) prog; 1297 p->translated = 0; 1298 } 1299 1300 (void) _tnl_program_string(ctx, target, prog); 1301 1302 /* XXX check if program is legal, within limits */ 1303 return true; 1304 } 1305 1306 static void 1307 i915SamplerUniformChange(struct gl_context *ctx, 1308 GLenum target, struct gl_program *prog) 1309 { 1310 i915ProgramStringNotify(ctx, target, prog); 1311 } 1312 1313 void 1314 i915_update_program(struct gl_context *ctx) 1315 { 1316 struct intel_context *intel = intel_context(ctx); 1317 struct i915_context *i915 = i915_context(&intel->ctx); 1318 struct i915_fragment_program *fp = 1319 (struct i915_fragment_program *) ctx->FragmentProgram._Current; 1320 1321 if (i915->current_program != fp) { 1322 if (i915->current_program) { 1323 i915->current_program->on_hardware = 0; 1324 i915->current_program->params_uptodate = 0; 1325 } 1326 1327 i915->current_program = fp; 1328 } 1329 1330 if (!fp->translated) 1331 translate_program(fp); 1332 1333 FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, fp->error); 1334 } 1335 1336 void 1337 i915ValidateFragmentProgram(struct i915_context *i915) 1338 { 1339 struct gl_context *ctx = &i915->intel.ctx; 1340 struct intel_context *intel = intel_context(ctx); 1341 TNLcontext *tnl = TNL_CONTEXT(ctx); 1342 struct vertex_buffer *VB = &tnl->vb; 1343 1344 struct i915_fragment_program *p = 1345 (struct i915_fragment_program *) ctx->FragmentProgram._Current; 1346 1347 const GLbitfield64 inputsRead = p->FragProg.Base.InputsRead; 1348 GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK; 1349 GLuint s2 = S2_TEXCOORD_NONE; 1350 int i, offset = 0; 1351 1352 /* Important: 1353 */ 1354 VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr; 1355 1356 if (!p->translated) 1357 translate_program(p); 1358 1359 intel->vertex_attr_count = 0; 1360 intel->wpos_offset = 0; 1361 intel->coloroffset = 0; 1362 intel->specoffset = 0; 1363 1364 if (inputsRead & FRAG_BITS_TEX_ANY || p->wpos_tex != -1) { 1365 EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16); 1366 } 1367 else { 1368 EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12); 1369 } 1370 1371 /* Handle gl_PointSize builtin var here */ 1372 if (ctx->Point._Attenuated || ctx->VertexProgram.PointSizeEnabled) 1373 EMIT_ATTR(_TNL_ATTRIB_POINTSIZE, EMIT_1F, S4_VFMT_POINT_WIDTH, 4); 1374 1375 if (inputsRead & FRAG_BIT_COL0) { 1376 intel->coloroffset = offset / 4; 1377 EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4); 1378 } 1379 1380 if (inputsRead & FRAG_BIT_COL1) { 1381 intel->specoffset = offset / 4; 1382 EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_4UB_4F_BGRA, S4_VFMT_SPEC_FOG, 4); 1383 } 1384 1385 if ((inputsRead & FRAG_BIT_FOGC)) { 1386 EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4); 1387 } 1388 1389 for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { 1390 if (inputsRead & FRAG_BIT_TEX(i)) { 1391 int sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size; 1392 1393 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK); 1394 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz)); 1395 1396 EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4); 1397 } 1398 else if (inputsRead & FRAG_BIT_VAR(i)) { 1399 int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size; 1400 1401 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK); 1402 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz)); 1403 1404 EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4); 1405 } 1406 else if (i == p->wpos_tex) { 1407 int wpos_size = 4 * sizeof(float); 1408 /* If WPOS is required, duplicate the XYZ position data in an 1409 * unused texture coordinate: 1410 */ 1411 s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK); 1412 s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(wpos_size)); 1413 1414 intel->wpos_offset = offset; 1415 EMIT_PAD(wpos_size); 1416 } 1417 } 1418 1419 if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] || 1420 s4 != i915->state.Ctx[I915_CTXREG_LIS4]) { 1421 int k; 1422 1423 I915_STATECHANGE(i915, I915_UPLOAD_CTX); 1424 1425 /* Must do this *after* statechange, so as not to affect 1426 * buffered vertices reliant on the old state: 1427 */ 1428 intel->vertex_size = _tnl_install_attrs(&intel->ctx, 1429 intel->vertex_attrs, 1430 intel->vertex_attr_count, 1431 intel->ViewportMatrix.m, 0); 1432 1433 assert(intel->prim.current_offset == intel->prim.start_offset); 1434 intel->prim.start_offset = (intel->prim.current_offset + intel->vertex_size-1) / intel->vertex_size * intel->vertex_size; 1435 intel->prim.current_offset = intel->prim.start_offset; 1436 1437 intel->vertex_size >>= 2; 1438 1439 i915->state.Ctx[I915_CTXREG_LIS2] = s2; 1440 i915->state.Ctx[I915_CTXREG_LIS4] = s4; 1441 1442 k = intel->vtbl.check_vertex_size(intel, intel->vertex_size); 1443 assert(k); 1444 } 1445 1446 if (!p->params_uptodate) 1447 track_params(p); 1448 1449 if (!p->on_hardware) 1450 i915_upload_program(i915, p); 1451 1452 if (INTEL_DEBUG & DEBUG_WM) { 1453 printf("i915:\n"); 1454 i915_disassemble_program(i915->state.Program, i915->state.ProgramSize); 1455 } 1456 } 1457 1458 void 1459 i915InitFragProgFuncs(struct dd_function_table *functions) 1460 { 1461 functions->BindProgram = i915BindProgram; 1462 functions->NewProgram = i915NewProgram; 1463 functions->DeleteProgram = i915DeleteProgram; 1464 functions->IsProgramNative = i915IsProgramNative; 1465 functions->ProgramStringNotify = i915ProgramStringNotify; 1466 functions->SamplerUniformChange = i915SamplerUniformChange; 1467 } 1468