1 /************************************************************************** 2 * 3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 /** 29 * \file ffvertex_prog.c 30 * 31 * Create a vertex program to execute the current fixed function T&L pipeline. 32 * \author Keith Whitwell 33 */ 34 35 36 #include "main/glheader.h" 37 #include "main/mtypes.h" 38 #include "main/macros.h" 39 #include "main/mfeatures.h" 40 #include "main/enums.h" 41 #include "main/ffvertex_prog.h" 42 #include "program/program.h" 43 #include "program/prog_cache.h" 44 #include "program/prog_instruction.h" 45 #include "program/prog_parameter.h" 46 #include "program/prog_print.h" 47 #include "program/prog_statevars.h" 48 49 50 /** Max of number of lights and texture coord units */ 51 #define NUM_UNITS MAX2(MAX_TEXTURE_COORD_UNITS, MAX_LIGHTS) 52 53 struct state_key { 54 unsigned light_color_material_mask:12; 55 unsigned light_global_enabled:1; 56 unsigned light_local_viewer:1; 57 unsigned light_twoside:1; 58 unsigned material_shininess_is_zero:1; 59 unsigned need_eye_coords:1; 60 unsigned normalize:1; 61 unsigned rescale_normals:1; 62 63 unsigned fog_source_is_depth:1; 64 unsigned fog_distance_mode:2; 65 unsigned separate_specular:1; 66 unsigned point_attenuated:1; 67 unsigned point_array:1; 68 unsigned texture_enabled_global:1; 69 unsigned fragprog_inputs_read:12; 70 71 GLbitfield64 varying_vp_inputs; 72 73 struct { 74 unsigned light_enabled:1; 75 unsigned light_eyepos3_is_zero:1; 76 unsigned light_spotcutoff_is_180:1; 77 unsigned light_attenuated:1; 78 unsigned texunit_really_enabled:1; 79 unsigned texmat_enabled:1; 80 unsigned coord_replace:1; 81 unsigned texgen_enabled:4; 82 unsigned texgen_mode0:4; 83 unsigned texgen_mode1:4; 84 unsigned texgen_mode2:4; 85 unsigned texgen_mode3:4; 86 } unit[NUM_UNITS]; 87 }; 88 89 90 #define TXG_NONE 0 91 #define TXG_OBJ_LINEAR 1 92 #define TXG_EYE_LINEAR 2 93 #define TXG_SPHERE_MAP 3 94 #define TXG_REFLECTION_MAP 4 95 #define TXG_NORMAL_MAP 5 96 97 static GLuint translate_texgen( GLboolean enabled, GLenum mode ) 98 { 99 if (!enabled) 100 return TXG_NONE; 101 102 switch (mode) { 103 case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR; 104 case GL_EYE_LINEAR: return TXG_EYE_LINEAR; 105 case GL_SPHERE_MAP: return TXG_SPHERE_MAP; 106 case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP; 107 case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP; 108 default: return TXG_NONE; 109 } 110 } 111 112 #define FDM_EYE_RADIAL 0 113 #define FDM_EYE_PLANE 1 114 #define FDM_EYE_PLANE_ABS 2 115 116 static GLuint translate_fog_distance_mode( GLenum mode ) 117 { 118 switch (mode) { 119 case GL_EYE_RADIAL_NV: 120 return FDM_EYE_RADIAL; 121 case GL_EYE_PLANE: 122 return FDM_EYE_PLANE; 123 default: /* shouldn't happen; fall through to a sensible default */ 124 case GL_EYE_PLANE_ABSOLUTE_NV: 125 return FDM_EYE_PLANE_ABS; 126 } 127 } 128 129 static GLboolean check_active_shininess( struct gl_context *ctx, 130 const struct state_key *key, 131 GLuint side ) 132 { 133 GLuint attr = MAT_ATTRIB_FRONT_SHININESS + side; 134 135 if ((key->varying_vp_inputs & VERT_BIT_COLOR0) && 136 (key->light_color_material_mask & (1 << attr))) 137 return GL_TRUE; 138 139 if (key->varying_vp_inputs & VERT_ATTRIB_GENERIC(attr)) 140 return GL_TRUE; 141 142 if (ctx->Light.Material.Attrib[attr][0] != 0.0F) 143 return GL_TRUE; 144 145 return GL_FALSE; 146 } 147 148 149 static void make_state_key( struct gl_context *ctx, struct state_key *key ) 150 { 151 const struct gl_fragment_program *fp; 152 GLuint i; 153 154 memset(key, 0, sizeof(struct state_key)); 155 fp = ctx->FragmentProgram._Current; 156 157 /* This now relies on texenvprogram.c being active: 158 */ 159 assert(fp); 160 161 key->need_eye_coords = ctx->_NeedEyeCoords; 162 163 key->fragprog_inputs_read = fp->Base.InputsRead; 164 key->varying_vp_inputs = ctx->varying_vp_inputs; 165 166 if (ctx->RenderMode == GL_FEEDBACK) { 167 /* make sure the vertprog emits color and tex0 */ 168 key->fragprog_inputs_read |= (FRAG_BIT_COL0 | FRAG_BIT_TEX0); 169 } 170 171 key->separate_specular = (ctx->Light.Model.ColorControl == 172 GL_SEPARATE_SPECULAR_COLOR); 173 174 if (ctx->Light.Enabled) { 175 key->light_global_enabled = 1; 176 177 if (ctx->Light.Model.LocalViewer) 178 key->light_local_viewer = 1; 179 180 if (ctx->Light.Model.TwoSide) 181 key->light_twoside = 1; 182 183 if (ctx->Light.ColorMaterialEnabled) { 184 key->light_color_material_mask = ctx->Light._ColorMaterialBitmask; 185 } 186 187 for (i = 0; i < MAX_LIGHTS; i++) { 188 struct gl_light *light = &ctx->Light.Light[i]; 189 190 if (light->Enabled) { 191 key->unit[i].light_enabled = 1; 192 193 if (light->EyePosition[3] == 0.0) 194 key->unit[i].light_eyepos3_is_zero = 1; 195 196 if (light->SpotCutoff == 180.0) 197 key->unit[i].light_spotcutoff_is_180 = 1; 198 199 if (light->ConstantAttenuation != 1.0 || 200 light->LinearAttenuation != 0.0 || 201 light->QuadraticAttenuation != 0.0) 202 key->unit[i].light_attenuated = 1; 203 } 204 } 205 206 if (check_active_shininess(ctx, key, 0)) { 207 key->material_shininess_is_zero = 0; 208 } 209 else if (key->light_twoside && 210 check_active_shininess(ctx, key, 1)) { 211 key->material_shininess_is_zero = 0; 212 } 213 else { 214 key->material_shininess_is_zero = 1; 215 } 216 } 217 218 if (ctx->Transform.Normalize) 219 key->normalize = 1; 220 221 if (ctx->Transform.RescaleNormals) 222 key->rescale_normals = 1; 223 224 if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT) { 225 key->fog_source_is_depth = 1; 226 key->fog_distance_mode = translate_fog_distance_mode(ctx->Fog.FogDistanceMode); 227 } 228 229 if (ctx->Point._Attenuated) 230 key->point_attenuated = 1; 231 232 #if FEATURE_point_size_array 233 if (ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled) 234 key->point_array = 1; 235 #endif 236 237 if (ctx->Texture._TexGenEnabled || 238 ctx->Texture._TexMatEnabled || 239 ctx->Texture._EnabledUnits) 240 key->texture_enabled_global = 1; 241 242 for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { 243 struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; 244 245 if (texUnit->_ReallyEnabled) 246 key->unit[i].texunit_really_enabled = 1; 247 248 if (ctx->Point.PointSprite) 249 if (ctx->Point.CoordReplace[i]) 250 key->unit[i].coord_replace = 1; 251 252 if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i)) 253 key->unit[i].texmat_enabled = 1; 254 255 if (texUnit->TexGenEnabled) { 256 key->unit[i].texgen_enabled = 1; 257 258 key->unit[i].texgen_mode0 = 259 translate_texgen( texUnit->TexGenEnabled & (1<<0), 260 texUnit->GenS.Mode ); 261 key->unit[i].texgen_mode1 = 262 translate_texgen( texUnit->TexGenEnabled & (1<<1), 263 texUnit->GenT.Mode ); 264 key->unit[i].texgen_mode2 = 265 translate_texgen( texUnit->TexGenEnabled & (1<<2), 266 texUnit->GenR.Mode ); 267 key->unit[i].texgen_mode3 = 268 translate_texgen( texUnit->TexGenEnabled & (1<<3), 269 texUnit->GenQ.Mode ); 270 } 271 } 272 } 273 274 275 276 /* Very useful debugging tool - produces annotated listing of 277 * generated program with line/function references for each 278 * instruction back into this file: 279 */ 280 #define DISASSEM 0 281 282 283 /* Use uregs to represent registers internally, translate to Mesa's 284 * expected formats on emit. 285 * 286 * NOTE: These are passed by value extensively in this file rather 287 * than as usual by pointer reference. If this disturbs you, try 288 * remembering they are just 32bits in size. 289 * 290 * GCC is smart enough to deal with these dword-sized structures in 291 * much the same way as if I had defined them as dwords and was using 292 * macros to access and set the fields. This is much nicer and easier 293 * to evolve. 294 */ 295 struct ureg { 296 GLuint file:4; 297 GLint idx:9; /* relative addressing may be negative */ 298 /* sizeof(idx) should == sizeof(prog_src_reg::Index) */ 299 GLuint negate:1; 300 GLuint swz:12; 301 GLuint pad:6; 302 }; 303 304 305 struct tnl_program { 306 const struct state_key *state; 307 struct gl_vertex_program *program; 308 GLint max_inst; /** number of instructions allocated for program */ 309 GLboolean mvp_with_dp4; 310 311 GLuint temp_in_use; 312 GLuint temp_reserved; 313 314 struct ureg eye_position; 315 struct ureg eye_position_z; 316 struct ureg eye_position_normalized; 317 struct ureg transformed_normal; 318 struct ureg identity; 319 320 GLuint materials; 321 GLuint color_materials; 322 }; 323 324 325 static const struct ureg undef = { 326 PROGRAM_UNDEFINED, 327 0, 328 0, 329 0, 330 0 331 }; 332 333 /* Local shorthand: 334 */ 335 #define X SWIZZLE_X 336 #define Y SWIZZLE_Y 337 #define Z SWIZZLE_Z 338 #define W SWIZZLE_W 339 340 341 /* Construct a ureg: 342 */ 343 static struct ureg make_ureg(GLuint file, GLint idx) 344 { 345 struct ureg reg; 346 reg.file = file; 347 reg.idx = idx; 348 reg.negate = 0; 349 reg.swz = SWIZZLE_NOOP; 350 reg.pad = 0; 351 return reg; 352 } 353 354 355 356 static struct ureg negate( struct ureg reg ) 357 { 358 reg.negate ^= 1; 359 return reg; 360 } 361 362 363 static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w ) 364 { 365 reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x), 366 GET_SWZ(reg.swz, y), 367 GET_SWZ(reg.swz, z), 368 GET_SWZ(reg.swz, w)); 369 return reg; 370 } 371 372 373 static struct ureg swizzle1( struct ureg reg, int x ) 374 { 375 return swizzle(reg, x, x, x, x); 376 } 377 378 379 static struct ureg get_temp( struct tnl_program *p ) 380 { 381 int bit = ffs( ~p->temp_in_use ); 382 if (!bit) { 383 _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__); 384 exit(1); 385 } 386 387 if ((GLuint) bit > p->program->Base.NumTemporaries) 388 p->program->Base.NumTemporaries = bit; 389 390 p->temp_in_use |= 1<<(bit-1); 391 return make_ureg(PROGRAM_TEMPORARY, bit-1); 392 } 393 394 395 static struct ureg reserve_temp( struct tnl_program *p ) 396 { 397 struct ureg temp = get_temp( p ); 398 p->temp_reserved |= 1<<temp.idx; 399 return temp; 400 } 401 402 403 static void release_temp( struct tnl_program *p, struct ureg reg ) 404 { 405 if (reg.file == PROGRAM_TEMPORARY) { 406 p->temp_in_use &= ~(1<<reg.idx); 407 p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */ 408 } 409 } 410 411 static void release_temps( struct tnl_program *p ) 412 { 413 p->temp_in_use = p->temp_reserved; 414 } 415 416 417 static struct ureg register_param5(struct tnl_program *p, 418 GLint s0, 419 GLint s1, 420 GLint s2, 421 GLint s3, 422 GLint s4) 423 { 424 gl_state_index tokens[STATE_LENGTH]; 425 GLint idx; 426 tokens[0] = s0; 427 tokens[1] = s1; 428 tokens[2] = s2; 429 tokens[3] = s3; 430 tokens[4] = s4; 431 idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens ); 432 return make_ureg(PROGRAM_STATE_VAR, idx); 433 } 434 435 436 #define register_param1(p,s0) register_param5(p,s0,0,0,0,0) 437 #define register_param2(p,s0,s1) register_param5(p,s0,s1,0,0,0) 438 #define register_param3(p,s0,s1,s2) register_param5(p,s0,s1,s2,0,0) 439 #define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0) 440 441 442 443 /** 444 * \param input one of VERT_ATTRIB_x tokens. 445 */ 446 static struct ureg register_input( struct tnl_program *p, GLuint input ) 447 { 448 assert(input < VERT_ATTRIB_MAX); 449 450 if (p->state->varying_vp_inputs & VERT_BIT(input)) { 451 p->program->Base.InputsRead |= VERT_BIT(input); 452 return make_ureg(PROGRAM_INPUT, input); 453 } 454 else { 455 return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, input ); 456 } 457 } 458 459 460 /** 461 * \param input one of VERT_RESULT_x tokens. 462 */ 463 static struct ureg register_output( struct tnl_program *p, GLuint output ) 464 { 465 p->program->Base.OutputsWritten |= BITFIELD64_BIT(output); 466 return make_ureg(PROGRAM_OUTPUT, output); 467 } 468 469 470 static struct ureg register_const4f( struct tnl_program *p, 471 GLfloat s0, 472 GLfloat s1, 473 GLfloat s2, 474 GLfloat s3) 475 { 476 gl_constant_value values[4]; 477 GLint idx; 478 GLuint swizzle; 479 values[0].f = s0; 480 values[1].f = s1; 481 values[2].f = s2; 482 values[3].f = s3; 483 idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, 484 &swizzle ); 485 ASSERT(swizzle == SWIZZLE_NOOP); 486 return make_ureg(PROGRAM_CONSTANT, idx); 487 } 488 489 #define register_const1f(p, s0) register_const4f(p, s0, 0, 0, 1) 490 #define register_scalar_const(p, s0) register_const4f(p, s0, s0, s0, s0) 491 #define register_const2f(p, s0, s1) register_const4f(p, s0, s1, 0, 1) 492 #define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1) 493 494 static GLboolean is_undef( struct ureg reg ) 495 { 496 return reg.file == PROGRAM_UNDEFINED; 497 } 498 499 500 static struct ureg get_identity_param( struct tnl_program *p ) 501 { 502 if (is_undef(p->identity)) 503 p->identity = register_const4f(p, 0,0,0,1); 504 505 return p->identity; 506 } 507 508 static void register_matrix_param5( struct tnl_program *p, 509 GLint s0, /* modelview, projection, etc */ 510 GLint s1, /* texture matrix number */ 511 GLint s2, /* first row */ 512 GLint s3, /* last row */ 513 GLint s4, /* inverse, transpose, etc */ 514 struct ureg *matrix ) 515 { 516 GLint i; 517 518 /* This is a bit sad as the support is there to pull the whole 519 * matrix out in one go: 520 */ 521 for (i = 0; i <= s3 - s2; i++) 522 matrix[i] = register_param5( p, s0, s1, i, i, s4 ); 523 } 524 525 526 static void emit_arg( struct prog_src_register *src, 527 struct ureg reg ) 528 { 529 src->File = reg.file; 530 src->Index = reg.idx; 531 src->Swizzle = reg.swz; 532 src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE; 533 src->Abs = 0; 534 src->RelAddr = 0; 535 /* Check that bitfield sizes aren't exceeded */ 536 ASSERT(src->Index == reg.idx); 537 } 538 539 540 static void emit_dst( struct prog_dst_register *dst, 541 struct ureg reg, GLuint mask ) 542 { 543 dst->File = reg.file; 544 dst->Index = reg.idx; 545 /* allow zero as a shorthand for xyzw */ 546 dst->WriteMask = mask ? mask : WRITEMASK_XYZW; 547 dst->CondMask = COND_TR; /* always pass cond test */ 548 dst->CondSwizzle = SWIZZLE_NOOP; 549 dst->CondSrc = 0; 550 /* Check that bitfield sizes aren't exceeded */ 551 ASSERT(dst->Index == reg.idx); 552 } 553 554 555 static void debug_insn( struct prog_instruction *inst, const char *fn, 556 GLuint line ) 557 { 558 if (DISASSEM) { 559 static const char *last_fn; 560 561 if (fn != last_fn) { 562 last_fn = fn; 563 printf("%s:\n", fn); 564 } 565 566 printf("%d:\t", line); 567 _mesa_print_instruction(inst); 568 } 569 } 570 571 572 static void emit_op3fn(struct tnl_program *p, 573 enum prog_opcode op, 574 struct ureg dest, 575 GLuint mask, 576 struct ureg src0, 577 struct ureg src1, 578 struct ureg src2, 579 const char *fn, 580 GLuint line) 581 { 582 GLuint nr; 583 struct prog_instruction *inst; 584 585 assert((GLint) p->program->Base.NumInstructions <= p->max_inst); 586 587 if (p->program->Base.NumInstructions == p->max_inst) { 588 /* need to extend the program's instruction array */ 589 struct prog_instruction *newInst; 590 591 /* double the size */ 592 p->max_inst *= 2; 593 594 newInst = _mesa_alloc_instructions(p->max_inst); 595 if (!newInst) { 596 _mesa_error(NULL, GL_OUT_OF_MEMORY, "vertex program build"); 597 return; 598 } 599 600 _mesa_copy_instructions(newInst, 601 p->program->Base.Instructions, 602 p->program->Base.NumInstructions); 603 604 _mesa_free_instructions(p->program->Base.Instructions, 605 p->program->Base.NumInstructions); 606 607 p->program->Base.Instructions = newInst; 608 } 609 610 nr = p->program->Base.NumInstructions++; 611 612 inst = &p->program->Base.Instructions[nr]; 613 inst->Opcode = (enum prog_opcode) op; 614 inst->Data = 0; 615 616 emit_arg( &inst->SrcReg[0], src0 ); 617 emit_arg( &inst->SrcReg[1], src1 ); 618 emit_arg( &inst->SrcReg[2], src2 ); 619 620 emit_dst( &inst->DstReg, dest, mask ); 621 622 debug_insn(inst, fn, line); 623 } 624 625 626 #define emit_op3(p, op, dst, mask, src0, src1, src2) \ 627 emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__) 628 629 #define emit_op2(p, op, dst, mask, src0, src1) \ 630 emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__) 631 632 #define emit_op1(p, op, dst, mask, src0) \ 633 emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__) 634 635 636 static struct ureg make_temp( struct tnl_program *p, struct ureg reg ) 637 { 638 if (reg.file == PROGRAM_TEMPORARY && 639 !(p->temp_reserved & (1<<reg.idx))) 640 return reg; 641 else { 642 struct ureg temp = get_temp(p); 643 emit_op1(p, OPCODE_MOV, temp, 0, reg); 644 return temp; 645 } 646 } 647 648 649 /* Currently no tracking performed of input/output/register size or 650 * active elements. Could be used to reduce these operations, as 651 * could the matrix type. 652 */ 653 static void emit_matrix_transform_vec4( struct tnl_program *p, 654 struct ureg dest, 655 const struct ureg *mat, 656 struct ureg src) 657 { 658 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]); 659 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]); 660 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]); 661 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]); 662 } 663 664 665 /* This version is much easier to implement if writemasks are not 666 * supported natively on the target or (like SSE), the target doesn't 667 * have a clean/obvious dotproduct implementation. 668 */ 669 static void emit_transpose_matrix_transform_vec4( struct tnl_program *p, 670 struct ureg dest, 671 const struct ureg *mat, 672 struct ureg src) 673 { 674 struct ureg tmp; 675 676 if (dest.file != PROGRAM_TEMPORARY) 677 tmp = get_temp(p); 678 else 679 tmp = dest; 680 681 emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]); 682 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp); 683 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp); 684 emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp); 685 686 if (dest.file != PROGRAM_TEMPORARY) 687 release_temp(p, tmp); 688 } 689 690 691 static void emit_matrix_transform_vec3( struct tnl_program *p, 692 struct ureg dest, 693 const struct ureg *mat, 694 struct ureg src) 695 { 696 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]); 697 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]); 698 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]); 699 } 700 701 702 static void emit_normalize_vec3( struct tnl_program *p, 703 struct ureg dest, 704 struct ureg src ) 705 { 706 #if 0 707 /* XXX use this when drivers are ready for NRM3 */ 708 emit_op1(p, OPCODE_NRM3, dest, WRITEMASK_XYZ, src); 709 #else 710 struct ureg tmp = get_temp(p); 711 emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src); 712 emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp); 713 emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X)); 714 release_temp(p, tmp); 715 #endif 716 } 717 718 719 static void emit_passthrough( struct tnl_program *p, 720 GLuint input, 721 GLuint output ) 722 { 723 struct ureg out = register_output(p, output); 724 emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input)); 725 } 726 727 728 static struct ureg get_eye_position( struct tnl_program *p ) 729 { 730 if (is_undef(p->eye_position)) { 731 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 732 struct ureg modelview[4]; 733 734 p->eye_position = reserve_temp(p); 735 736 if (p->mvp_with_dp4) { 737 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 738 0, modelview ); 739 740 emit_matrix_transform_vec4(p, p->eye_position, modelview, pos); 741 } 742 else { 743 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 744 STATE_MATRIX_TRANSPOSE, modelview ); 745 746 emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos); 747 } 748 } 749 750 return p->eye_position; 751 } 752 753 754 static struct ureg get_eye_position_z( struct tnl_program *p ) 755 { 756 if (!is_undef(p->eye_position)) 757 return swizzle1(p->eye_position, Z); 758 759 if (is_undef(p->eye_position_z)) { 760 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 761 struct ureg modelview[4]; 762 763 p->eye_position_z = reserve_temp(p); 764 765 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 766 0, modelview ); 767 768 emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]); 769 } 770 771 return p->eye_position_z; 772 } 773 774 775 static struct ureg get_eye_position_normalized( struct tnl_program *p ) 776 { 777 if (is_undef(p->eye_position_normalized)) { 778 struct ureg eye = get_eye_position(p); 779 p->eye_position_normalized = reserve_temp(p); 780 emit_normalize_vec3(p, p->eye_position_normalized, eye); 781 } 782 783 return p->eye_position_normalized; 784 } 785 786 787 static struct ureg get_transformed_normal( struct tnl_program *p ) 788 { 789 if (is_undef(p->transformed_normal) && 790 !p->state->need_eye_coords && 791 !p->state->normalize && 792 !(p->state->need_eye_coords == p->state->rescale_normals)) 793 { 794 p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL ); 795 } 796 else if (is_undef(p->transformed_normal)) 797 { 798 struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL ); 799 struct ureg mvinv[3]; 800 struct ureg transformed_normal = reserve_temp(p); 801 802 if (p->state->need_eye_coords) { 803 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2, 804 STATE_MATRIX_INVTRANS, mvinv ); 805 806 /* Transform to eye space: 807 */ 808 emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal ); 809 normal = transformed_normal; 810 } 811 812 /* Normalize/Rescale: 813 */ 814 if (p->state->normalize) { 815 emit_normalize_vec3( p, transformed_normal, normal ); 816 normal = transformed_normal; 817 } 818 else if (p->state->need_eye_coords == p->state->rescale_normals) { 819 /* This is already adjusted for eye/non-eye rendering: 820 */ 821 struct ureg rescale = register_param2(p, STATE_INTERNAL, 822 STATE_NORMAL_SCALE); 823 824 emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale ); 825 normal = transformed_normal; 826 } 827 828 assert(normal.file == PROGRAM_TEMPORARY); 829 p->transformed_normal = normal; 830 } 831 832 return p->transformed_normal; 833 } 834 835 836 static void build_hpos( struct tnl_program *p ) 837 { 838 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 839 struct ureg hpos = register_output( p, VERT_RESULT_HPOS ); 840 struct ureg mvp[4]; 841 842 if (p->mvp_with_dp4) { 843 register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 844 0, mvp ); 845 emit_matrix_transform_vec4( p, hpos, mvp, pos ); 846 } 847 else { 848 register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 849 STATE_MATRIX_TRANSPOSE, mvp ); 850 emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos ); 851 } 852 } 853 854 855 static GLuint material_attrib( GLuint side, GLuint property ) 856 { 857 return (property - STATE_AMBIENT) * 2 + side; 858 } 859 860 861 /** 862 * Get a bitmask of which material values vary on a per-vertex basis. 863 */ 864 static void set_material_flags( struct tnl_program *p ) 865 { 866 p->color_materials = 0; 867 p->materials = 0; 868 869 if (p->state->varying_vp_inputs & VERT_BIT_COLOR0) { 870 p->materials = 871 p->color_materials = p->state->light_color_material_mask; 872 } 873 874 p->materials |= (p->state->varying_vp_inputs >> VERT_ATTRIB_GENERIC0); 875 } 876 877 878 static struct ureg get_material( struct tnl_program *p, GLuint side, 879 GLuint property ) 880 { 881 GLuint attrib = material_attrib(side, property); 882 883 if (p->color_materials & (1<<attrib)) 884 return register_input(p, VERT_ATTRIB_COLOR0); 885 else if (p->materials & (1<<attrib)) { 886 /* Put material values in the GENERIC slots -- they are not used 887 * for anything in fixed function mode. 888 */ 889 return register_input( p, attrib + VERT_ATTRIB_GENERIC0 ); 890 } 891 else 892 return register_param3( p, STATE_MATERIAL, side, property ); 893 } 894 895 #define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \ 896 MAT_BIT_FRONT_AMBIENT | \ 897 MAT_BIT_FRONT_DIFFUSE) << (side)) 898 899 900 /** 901 * Either return a precalculated constant value or emit code to 902 * calculate these values dynamically in the case where material calls 903 * are present between begin/end pairs. 904 * 905 * Probably want to shift this to the program compilation phase - if 906 * we always emitted the calculation here, a smart compiler could 907 * detect that it was constant (given a certain set of inputs), and 908 * lift it out of the main loop. That way the programs created here 909 * would be independent of the vertex_buffer details. 910 */ 911 static struct ureg get_scenecolor( struct tnl_program *p, GLuint side ) 912 { 913 if (p->materials & SCENE_COLOR_BITS(side)) { 914 struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT); 915 struct ureg material_emission = get_material(p, side, STATE_EMISSION); 916 struct ureg material_ambient = get_material(p, side, STATE_AMBIENT); 917 struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE); 918 struct ureg tmp = make_temp(p, material_diffuse); 919 emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient, 920 material_ambient, material_emission); 921 return tmp; 922 } 923 else 924 return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side ); 925 } 926 927 928 static struct ureg get_lightprod( struct tnl_program *p, GLuint light, 929 GLuint side, GLuint property ) 930 { 931 GLuint attrib = material_attrib(side, property); 932 if (p->materials & (1<<attrib)) { 933 struct ureg light_value = 934 register_param3(p, STATE_LIGHT, light, property); 935 struct ureg material_value = get_material(p, side, property); 936 struct ureg tmp = get_temp(p); 937 emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value); 938 return tmp; 939 } 940 else 941 return register_param4(p, STATE_LIGHTPROD, light, side, property); 942 } 943 944 945 static struct ureg calculate_light_attenuation( struct tnl_program *p, 946 GLuint i, 947 struct ureg VPpli, 948 struct ureg dist ) 949 { 950 struct ureg attenuation = register_param3(p, STATE_LIGHT, i, 951 STATE_ATTENUATION); 952 struct ureg att = undef; 953 954 /* Calculate spot attenuation: 955 */ 956 if (!p->state->unit[i].light_spotcutoff_is_180) { 957 struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL, 958 STATE_LIGHT_SPOT_DIR_NORMALIZED, i); 959 struct ureg spot = get_temp(p); 960 struct ureg slt = get_temp(p); 961 962 att = get_temp(p); 963 964 emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm); 965 emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot); 966 emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); 967 emit_op2(p, OPCODE_MUL, att, 0, slt, spot); 968 969 release_temp(p, spot); 970 release_temp(p, slt); 971 } 972 973 /* Calculate distance attenuation(See formula (2.4) at glspec 2.1 page 62): 974 * 975 * Skip the calucation when _dist_ is undefined(light_eyepos3_is_zero) 976 */ 977 if (p->state->unit[i].light_attenuated && !is_undef(dist)) { 978 if (is_undef(att)) 979 att = get_temp(p); 980 /* 1/d,d,d,1/d */ 981 emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist); 982 /* 1,d,d*d,1/d */ 983 emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y)); 984 /* 1/dist-atten */ 985 emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist); 986 987 if (!p->state->unit[i].light_spotcutoff_is_180) { 988 /* dist-atten */ 989 emit_op1(p, OPCODE_RCP, dist, 0, dist); 990 /* spot-atten * dist-atten */ 991 emit_op2(p, OPCODE_MUL, att, 0, dist, att); 992 } 993 else { 994 /* dist-atten */ 995 emit_op1(p, OPCODE_RCP, att, 0, dist); 996 } 997 } 998 999 return att; 1000 } 1001 1002 1003 /** 1004 * Compute: 1005 * lit.y = MAX(0, dots.x) 1006 * lit.z = SLT(0, dots.x) 1007 */ 1008 static void emit_degenerate_lit( struct tnl_program *p, 1009 struct ureg lit, 1010 struct ureg dots ) 1011 { 1012 struct ureg id = get_identity_param(p); /* id = {0,0,0,1} */ 1013 1014 /* Note that lit.x & lit.w will not be examined. Note also that 1015 * dots.xyzw == dots.xxxx. 1016 */ 1017 1018 /* MAX lit, id, dots; 1019 */ 1020 emit_op2(p, OPCODE_MAX, lit, WRITEMASK_XYZW, id, dots); 1021 1022 /* result[2] = (in > 0 ? 1 : 0) 1023 * SLT lit.z, id.z, dots; # lit.z = (0 < dots.z) ? 1 : 0 1024 */ 1025 emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, swizzle1(id,Z), dots); 1026 } 1027 1028 1029 /* Need to add some addtional parameters to allow lighting in object 1030 * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye 1031 * space lighting. 1032 */ 1033 static void build_lighting( struct tnl_program *p ) 1034 { 1035 const GLboolean twoside = p->state->light_twoside; 1036 const GLboolean separate = p->state->separate_specular; 1037 GLuint nr_lights = 0, count = 0; 1038 struct ureg normal = get_transformed_normal(p); 1039 struct ureg lit = get_temp(p); 1040 struct ureg dots = get_temp(p); 1041 struct ureg _col0 = undef, _col1 = undef; 1042 struct ureg _bfc0 = undef, _bfc1 = undef; 1043 GLuint i; 1044 1045 /* 1046 * NOTE: 1047 * dots.x = dot(normal, VPpli) 1048 * dots.y = dot(normal, halfAngle) 1049 * dots.z = back.shininess 1050 * dots.w = front.shininess 1051 */ 1052 1053 for (i = 0; i < MAX_LIGHTS; i++) 1054 if (p->state->unit[i].light_enabled) 1055 nr_lights++; 1056 1057 set_material_flags(p); 1058 1059 { 1060 if (!p->state->material_shininess_is_zero) { 1061 struct ureg shininess = get_material(p, 0, STATE_SHININESS); 1062 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); 1063 release_temp(p, shininess); 1064 } 1065 1066 _col0 = make_temp(p, get_scenecolor(p, 0)); 1067 if (separate) 1068 _col1 = make_temp(p, get_identity_param(p)); 1069 else 1070 _col1 = _col0; 1071 } 1072 1073 if (twoside) { 1074 if (!p->state->material_shininess_is_zero) { 1075 /* Note that we negate the back-face specular exponent here. 1076 * The negation will be un-done later in the back-face code below. 1077 */ 1078 struct ureg shininess = get_material(p, 1, STATE_SHININESS); 1079 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, 1080 negate(swizzle1(shininess,X))); 1081 release_temp(p, shininess); 1082 } 1083 1084 _bfc0 = make_temp(p, get_scenecolor(p, 1)); 1085 if (separate) 1086 _bfc1 = make_temp(p, get_identity_param(p)); 1087 else 1088 _bfc1 = _bfc0; 1089 } 1090 1091 /* If no lights, still need to emit the scenecolor. 1092 */ 1093 { 1094 struct ureg res0 = register_output( p, VERT_RESULT_COL0 ); 1095 emit_op1(p, OPCODE_MOV, res0, 0, _col0); 1096 } 1097 1098 if (separate) { 1099 struct ureg res1 = register_output( p, VERT_RESULT_COL1 ); 1100 emit_op1(p, OPCODE_MOV, res1, 0, _col1); 1101 } 1102 1103 if (twoside) { 1104 struct ureg res0 = register_output( p, VERT_RESULT_BFC0 ); 1105 emit_op1(p, OPCODE_MOV, res0, 0, _bfc0); 1106 } 1107 1108 if (twoside && separate) { 1109 struct ureg res1 = register_output( p, VERT_RESULT_BFC1 ); 1110 emit_op1(p, OPCODE_MOV, res1, 0, _bfc1); 1111 } 1112 1113 if (nr_lights == 0) { 1114 release_temps(p); 1115 return; 1116 } 1117 1118 for (i = 0; i < MAX_LIGHTS; i++) { 1119 if (p->state->unit[i].light_enabled) { 1120 struct ureg half = undef; 1121 struct ureg att = undef, VPpli = undef; 1122 struct ureg dist = undef; 1123 1124 count++; 1125 if (p->state->unit[i].light_eyepos3_is_zero) { 1126 VPpli = register_param3(p, STATE_INTERNAL, 1127 STATE_LIGHT_POSITION_NORMALIZED, i); 1128 } else { 1129 struct ureg Ppli = register_param3(p, STATE_INTERNAL, 1130 STATE_LIGHT_POSITION, i); 1131 struct ureg V = get_eye_position(p); 1132 1133 VPpli = get_temp(p); 1134 dist = get_temp(p); 1135 1136 /* Calculate VPpli vector 1137 */ 1138 emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V); 1139 1140 /* Normalize VPpli. The dist value also used in 1141 * attenuation below. 1142 */ 1143 emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli); 1144 emit_op1(p, OPCODE_RSQ, dist, 0, dist); 1145 emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist); 1146 } 1147 1148 /* Calculate attenuation: 1149 */ 1150 att = calculate_light_attenuation(p, i, VPpli, dist); 1151 release_temp(p, dist); 1152 1153 /* Calculate viewer direction, or use infinite viewer: 1154 */ 1155 if (!p->state->material_shininess_is_zero) { 1156 if (p->state->light_local_viewer) { 1157 struct ureg eye_hat = get_eye_position_normalized(p); 1158 half = get_temp(p); 1159 emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); 1160 emit_normalize_vec3(p, half, half); 1161 } else if (p->state->unit[i].light_eyepos3_is_zero) { 1162 half = register_param3(p, STATE_INTERNAL, 1163 STATE_LIGHT_HALF_VECTOR, i); 1164 } else { 1165 struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); 1166 half = get_temp(p); 1167 emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir); 1168 emit_normalize_vec3(p, half, half); 1169 } 1170 } 1171 1172 /* Calculate dot products: 1173 */ 1174 if (p->state->material_shininess_is_zero) { 1175 emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli); 1176 } 1177 else { 1178 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); 1179 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half); 1180 } 1181 1182 /* Front face lighting: 1183 */ 1184 { 1185 struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT); 1186 struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE); 1187 struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR); 1188 struct ureg res0, res1; 1189 GLuint mask0, mask1; 1190 1191 if (count == nr_lights) { 1192 if (separate) { 1193 mask0 = WRITEMASK_XYZ; 1194 mask1 = WRITEMASK_XYZ; 1195 res0 = register_output( p, VERT_RESULT_COL0 ); 1196 res1 = register_output( p, VERT_RESULT_COL1 ); 1197 } 1198 else { 1199 mask0 = 0; 1200 mask1 = WRITEMASK_XYZ; 1201 res0 = _col0; 1202 res1 = register_output( p, VERT_RESULT_COL0 ); 1203 } 1204 } 1205 else { 1206 mask0 = 0; 1207 mask1 = 0; 1208 res0 = _col0; 1209 res1 = _col1; 1210 } 1211 1212 if (!is_undef(att)) { 1213 /* light is attenuated by distance */ 1214 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1215 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1216 emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); 1217 } 1218 else if (!p->state->material_shininess_is_zero) { 1219 /* there's a non-zero specular term */ 1220 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1221 emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); 1222 } 1223 else { 1224 /* no attenutation, no specular */ 1225 emit_degenerate_lit(p, lit, dots); 1226 emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); 1227 } 1228 1229 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0); 1230 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1); 1231 1232 release_temp(p, ambient); 1233 release_temp(p, diffuse); 1234 release_temp(p, specular); 1235 } 1236 1237 /* Back face lighting: 1238 */ 1239 if (twoside) { 1240 struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT); 1241 struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE); 1242 struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR); 1243 struct ureg res0, res1; 1244 GLuint mask0, mask1; 1245 1246 if (count == nr_lights) { 1247 if (separate) { 1248 mask0 = WRITEMASK_XYZ; 1249 mask1 = WRITEMASK_XYZ; 1250 res0 = register_output( p, VERT_RESULT_BFC0 ); 1251 res1 = register_output( p, VERT_RESULT_BFC1 ); 1252 } 1253 else { 1254 mask0 = 0; 1255 mask1 = WRITEMASK_XYZ; 1256 res0 = _bfc0; 1257 res1 = register_output( p, VERT_RESULT_BFC0 ); 1258 } 1259 } 1260 else { 1261 res0 = _bfc0; 1262 res1 = _bfc1; 1263 mask0 = 0; 1264 mask1 = 0; 1265 } 1266 1267 /* For the back face we need to negate the X and Y component 1268 * dot products. dots.Z has the negated back-face specular 1269 * exponent. We swizzle that into the W position. This 1270 * negation makes the back-face specular term positive again. 1271 */ 1272 dots = negate(swizzle(dots,X,Y,W,Z)); 1273 1274 if (!is_undef(att)) { 1275 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1276 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1277 emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); 1278 } 1279 else if (!p->state->material_shininess_is_zero) { 1280 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1281 emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); /**/ 1282 } 1283 else { 1284 emit_degenerate_lit(p, lit, dots); 1285 emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); 1286 } 1287 1288 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0); 1289 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1); 1290 /* restore dots to its original state for subsequent lights 1291 * by negating and swizzling again. 1292 */ 1293 dots = negate(swizzle(dots,X,Y,W,Z)); 1294 1295 release_temp(p, ambient); 1296 release_temp(p, diffuse); 1297 release_temp(p, specular); 1298 } 1299 1300 release_temp(p, half); 1301 release_temp(p, VPpli); 1302 release_temp(p, att); 1303 } 1304 } 1305 1306 release_temps( p ); 1307 } 1308 1309 1310 static void build_fog( struct tnl_program *p ) 1311 { 1312 struct ureg fog = register_output(p, VERT_RESULT_FOGC); 1313 struct ureg input; 1314 1315 if (p->state->fog_source_is_depth) { 1316 1317 switch (p->state->fog_distance_mode) { 1318 case FDM_EYE_RADIAL: /* Z = sqrt(Xe*Xe + Ye*Ye + Ze*Ze) */ 1319 input = get_eye_position(p); 1320 emit_op2(p, OPCODE_DP3, fog, WRITEMASK_X, input, input); 1321 emit_op1(p, OPCODE_RSQ, fog, WRITEMASK_X, fog); 1322 emit_op1(p, OPCODE_RCP, fog, WRITEMASK_X, fog); 1323 break; 1324 case FDM_EYE_PLANE: /* Z = Ze */ 1325 input = get_eye_position_z(p); 1326 emit_op1(p, OPCODE_MOV, fog, WRITEMASK_X, input); 1327 break; 1328 case FDM_EYE_PLANE_ABS: /* Z = abs(Ze) */ 1329 input = get_eye_position_z(p); 1330 emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input); 1331 break; 1332 default: assert(0); break; /* can't happen */ 1333 } 1334 1335 } 1336 else { 1337 input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); 1338 emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input); 1339 } 1340 1341 emit_op1(p, OPCODE_MOV, fog, WRITEMASK_YZW, get_identity_param(p)); 1342 } 1343 1344 1345 static void build_reflect_texgen( struct tnl_program *p, 1346 struct ureg dest, 1347 GLuint writemask ) 1348 { 1349 struct ureg normal = get_transformed_normal(p); 1350 struct ureg eye_hat = get_eye_position_normalized(p); 1351 struct ureg tmp = get_temp(p); 1352 1353 /* n.u */ 1354 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1355 /* 2n.u */ 1356 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1357 /* (-2n.u)n + u */ 1358 emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat); 1359 1360 release_temp(p, tmp); 1361 } 1362 1363 1364 static void build_sphere_texgen( struct tnl_program *p, 1365 struct ureg dest, 1366 GLuint writemask ) 1367 { 1368 struct ureg normal = get_transformed_normal(p); 1369 struct ureg eye_hat = get_eye_position_normalized(p); 1370 struct ureg tmp = get_temp(p); 1371 struct ureg half = register_scalar_const(p, .5); 1372 struct ureg r = get_temp(p); 1373 struct ureg inv_m = get_temp(p); 1374 struct ureg id = get_identity_param(p); 1375 1376 /* Could share the above calculations, but it would be 1377 * a fairly odd state for someone to set (both sphere and 1378 * reflection active for different texture coordinate 1379 * components. Of course - if two texture units enable 1380 * reflect and/or sphere, things start to tilt in favour 1381 * of seperating this out: 1382 */ 1383 1384 /* n.u */ 1385 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1386 /* 2n.u */ 1387 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1388 /* (-2n.u)n + u */ 1389 emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat); 1390 /* r + 0,0,1 */ 1391 emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z)); 1392 /* rx^2 + ry^2 + (rz+1)^2 */ 1393 emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp); 1394 /* 2/m */ 1395 emit_op1(p, OPCODE_RSQ, tmp, 0, tmp); 1396 /* 1/m */ 1397 emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half); 1398 /* r/m + 1/2 */ 1399 emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half); 1400 1401 release_temp(p, tmp); 1402 release_temp(p, r); 1403 release_temp(p, inv_m); 1404 } 1405 1406 1407 static void build_texture_transform( struct tnl_program *p ) 1408 { 1409 GLuint i, j; 1410 1411 for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { 1412 1413 if (!(p->state->fragprog_inputs_read & FRAG_BIT_TEX(i))) 1414 continue; 1415 1416 if (p->state->unit[i].coord_replace) 1417 continue; 1418 1419 if (p->state->unit[i].texgen_enabled || 1420 p->state->unit[i].texmat_enabled) { 1421 1422 GLuint texmat_enabled = p->state->unit[i].texmat_enabled; 1423 struct ureg out = register_output(p, VERT_RESULT_TEX0 + i); 1424 struct ureg out_texgen = undef; 1425 1426 if (p->state->unit[i].texgen_enabled) { 1427 GLuint copy_mask = 0; 1428 GLuint sphere_mask = 0; 1429 GLuint reflect_mask = 0; 1430 GLuint normal_mask = 0; 1431 GLuint modes[4]; 1432 1433 if (texmat_enabled) 1434 out_texgen = get_temp(p); 1435 else 1436 out_texgen = out; 1437 1438 modes[0] = p->state->unit[i].texgen_mode0; 1439 modes[1] = p->state->unit[i].texgen_mode1; 1440 modes[2] = p->state->unit[i].texgen_mode2; 1441 modes[3] = p->state->unit[i].texgen_mode3; 1442 1443 for (j = 0; j < 4; j++) { 1444 switch (modes[j]) { 1445 case TXG_OBJ_LINEAR: { 1446 struct ureg obj = register_input(p, VERT_ATTRIB_POS); 1447 struct ureg plane = 1448 register_param3(p, STATE_TEXGEN, i, 1449 STATE_TEXGEN_OBJECT_S + j); 1450 1451 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1452 obj, plane ); 1453 break; 1454 } 1455 case TXG_EYE_LINEAR: { 1456 struct ureg eye = get_eye_position(p); 1457 struct ureg plane = 1458 register_param3(p, STATE_TEXGEN, i, 1459 STATE_TEXGEN_EYE_S + j); 1460 1461 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1462 eye, plane ); 1463 break; 1464 } 1465 case TXG_SPHERE_MAP: 1466 sphere_mask |= WRITEMASK_X << j; 1467 break; 1468 case TXG_REFLECTION_MAP: 1469 reflect_mask |= WRITEMASK_X << j; 1470 break; 1471 case TXG_NORMAL_MAP: 1472 normal_mask |= WRITEMASK_X << j; 1473 break; 1474 case TXG_NONE: 1475 copy_mask |= WRITEMASK_X << j; 1476 } 1477 } 1478 1479 if (sphere_mask) { 1480 build_sphere_texgen(p, out_texgen, sphere_mask); 1481 } 1482 1483 if (reflect_mask) { 1484 build_reflect_texgen(p, out_texgen, reflect_mask); 1485 } 1486 1487 if (normal_mask) { 1488 struct ureg normal = get_transformed_normal(p); 1489 emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal ); 1490 } 1491 1492 if (copy_mask) { 1493 struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i); 1494 emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in ); 1495 } 1496 } 1497 1498 if (texmat_enabled) { 1499 struct ureg texmat[4]; 1500 struct ureg in = (!is_undef(out_texgen) ? 1501 out_texgen : 1502 register_input(p, VERT_ATTRIB_TEX0+i)); 1503 if (p->mvp_with_dp4) { 1504 register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, 1505 0, texmat ); 1506 emit_matrix_transform_vec4( p, out, texmat, in ); 1507 } 1508 else { 1509 register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, 1510 STATE_MATRIX_TRANSPOSE, texmat ); 1511 emit_transpose_matrix_transform_vec4( p, out, texmat, in ); 1512 } 1513 } 1514 1515 release_temps(p); 1516 } 1517 else { 1518 emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i); 1519 } 1520 } 1521 } 1522 1523 1524 /** 1525 * Point size attenuation computation. 1526 */ 1527 static void build_atten_pointsize( struct tnl_program *p ) 1528 { 1529 struct ureg eye = get_eye_position_z(p); 1530 struct ureg state_size = register_param2(p, STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED); 1531 struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION); 1532 struct ureg out = register_output(p, VERT_RESULT_PSIZ); 1533 struct ureg ut = get_temp(p); 1534 1535 /* dist = |eyez| */ 1536 emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z)); 1537 /* p1 + dist * (p2 + dist * p3); */ 1538 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1539 swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y)); 1540 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1541 ut, swizzle1(state_attenuation, X)); 1542 1543 /* 1 / sqrt(factor) */ 1544 emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut ); 1545 1546 #if 0 1547 /* out = pointSize / sqrt(factor) */ 1548 emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size); 1549 #else 1550 /* this is a good place to clamp the point size since there's likely 1551 * no hardware registers to clamp point size at rasterization time. 1552 */ 1553 emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size); 1554 emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y)); 1555 emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z)); 1556 #endif 1557 1558 release_temp(p, ut); 1559 } 1560 1561 1562 /** 1563 * Pass-though per-vertex point size, from user's point size array. 1564 */ 1565 static void build_array_pointsize( struct tnl_program *p ) 1566 { 1567 struct ureg in = register_input(p, VERT_ATTRIB_POINT_SIZE); 1568 struct ureg out = register_output(p, VERT_RESULT_PSIZ); 1569 emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, in); 1570 } 1571 1572 1573 static void build_tnl_program( struct tnl_program *p ) 1574 { 1575 /* Emit the program, starting with the modelview, projection transforms: 1576 */ 1577 build_hpos(p); 1578 1579 /* Lighting calculations: 1580 */ 1581 if (p->state->fragprog_inputs_read & (FRAG_BIT_COL0|FRAG_BIT_COL1)) { 1582 if (p->state->light_global_enabled) 1583 build_lighting(p); 1584 else { 1585 if (p->state->fragprog_inputs_read & FRAG_BIT_COL0) 1586 emit_passthrough(p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0); 1587 1588 if (p->state->fragprog_inputs_read & FRAG_BIT_COL1) 1589 emit_passthrough(p, VERT_ATTRIB_COLOR1, VERT_RESULT_COL1); 1590 } 1591 } 1592 1593 if (p->state->fragprog_inputs_read & FRAG_BIT_FOGC) 1594 build_fog(p); 1595 1596 if (p->state->fragprog_inputs_read & FRAG_BITS_TEX_ANY) 1597 build_texture_transform(p); 1598 1599 if (p->state->point_attenuated) 1600 build_atten_pointsize(p); 1601 else if (p->state->point_array) 1602 build_array_pointsize(p); 1603 1604 /* Finish up: 1605 */ 1606 emit_op1(p, OPCODE_END, undef, 0, undef); 1607 1608 /* Disassemble: 1609 */ 1610 if (DISASSEM) { 1611 printf ("\n"); 1612 } 1613 } 1614 1615 1616 static void 1617 create_new_program( const struct state_key *key, 1618 struct gl_vertex_program *program, 1619 GLboolean mvp_with_dp4, 1620 GLuint max_temps) 1621 { 1622 struct tnl_program p; 1623 1624 memset(&p, 0, sizeof(p)); 1625 p.state = key; 1626 p.program = program; 1627 p.eye_position = undef; 1628 p.eye_position_z = undef; 1629 p.eye_position_normalized = undef; 1630 p.transformed_normal = undef; 1631 p.identity = undef; 1632 p.temp_in_use = 0; 1633 p.mvp_with_dp4 = mvp_with_dp4; 1634 1635 if (max_temps >= sizeof(int) * 8) 1636 p.temp_reserved = 0; 1637 else 1638 p.temp_reserved = ~((1<<max_temps)-1); 1639 1640 /* Start by allocating 32 instructions. 1641 * If we need more, we'll grow the instruction array as needed. 1642 */ 1643 p.max_inst = 32; 1644 p.program->Base.Instructions = _mesa_alloc_instructions(p.max_inst); 1645 p.program->Base.String = NULL; 1646 p.program->Base.NumInstructions = 1647 p.program->Base.NumTemporaries = 1648 p.program->Base.NumParameters = 1649 p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0; 1650 p.program->Base.Parameters = _mesa_new_parameter_list(); 1651 p.program->Base.InputsRead = 0; 1652 p.program->Base.OutputsWritten = 0; 1653 1654 build_tnl_program( &p ); 1655 } 1656 1657 1658 /** 1659 * Return a vertex program which implements the current fixed-function 1660 * transform/lighting/texgen operations. 1661 */ 1662 struct gl_vertex_program * 1663 _mesa_get_fixed_func_vertex_program(struct gl_context *ctx) 1664 { 1665 struct gl_vertex_program *prog; 1666 struct state_key key; 1667 1668 /* Grab all the relevent state and put it in a single structure: 1669 */ 1670 make_state_key(ctx, &key); 1671 1672 /* Look for an already-prepared program for this state: 1673 */ 1674 prog = gl_vertex_program( 1675 _mesa_search_program_cache(ctx->VertexProgram.Cache, &key, sizeof(key))); 1676 1677 if (!prog) { 1678 /* OK, we'll have to build a new one */ 1679 if (0) 1680 printf("Build new TNL program\n"); 1681 1682 prog = gl_vertex_program(ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0)); 1683 if (!prog) 1684 return NULL; 1685 1686 create_new_program( &key, prog, 1687 ctx->mvp_with_dp4, 1688 ctx->Const.VertexProgram.MaxTemps ); 1689 1690 #if 0 1691 if (ctx->Driver.ProgramStringNotify) 1692 ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB, 1693 &prog->Base ); 1694 #endif 1695 _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache, 1696 &key, sizeof(key), &prog->Base); 1697 } 1698 1699 return prog; 1700 } 1701