1 /************************************************************************** 2 * 3 * Copyright 2007 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 /** 29 * \file ffvertex_prog.c 30 * 31 * Create a vertex program to execute the current fixed function T&L pipeline. 32 * \author Keith Whitwell 33 */ 34 35 36 #include "main/glheader.h" 37 #include "main/mtypes.h" 38 #include "main/macros.h" 39 #include "main/enums.h" 40 #include "main/ffvertex_prog.h" 41 #include "program/program.h" 42 #include "program/prog_cache.h" 43 #include "program/prog_instruction.h" 44 #include "program/prog_parameter.h" 45 #include "program/prog_print.h" 46 #include "program/prog_statevars.h" 47 #include "util/bitscan.h" 48 49 50 /** Max of number of lights and texture coord units */ 51 #define NUM_UNITS MAX2(MAX_TEXTURE_COORD_UNITS, MAX_LIGHTS) 52 53 struct state_key { 54 unsigned light_color_material_mask:12; 55 unsigned light_global_enabled:1; 56 unsigned light_local_viewer:1; 57 unsigned light_twoside:1; 58 unsigned material_shininess_is_zero:1; 59 unsigned need_eye_coords:1; 60 unsigned normalize:1; 61 unsigned rescale_normals:1; 62 63 unsigned fog_source_is_depth:1; 64 unsigned fog_distance_mode:2; 65 unsigned separate_specular:1; 66 unsigned point_attenuated:1; 67 unsigned point_array:1; 68 unsigned texture_enabled_global:1; 69 unsigned fragprog_inputs_read:12; 70 71 GLbitfield varying_vp_inputs; 72 73 struct { 74 unsigned light_enabled:1; 75 unsigned light_eyepos3_is_zero:1; 76 unsigned light_spotcutoff_is_180:1; 77 unsigned light_attenuated:1; 78 unsigned texunit_really_enabled:1; 79 unsigned texmat_enabled:1; 80 unsigned coord_replace:1; 81 unsigned texgen_enabled:4; 82 unsigned texgen_mode0:4; 83 unsigned texgen_mode1:4; 84 unsigned texgen_mode2:4; 85 unsigned texgen_mode3:4; 86 } unit[NUM_UNITS]; 87 }; 88 89 90 #define TXG_NONE 0 91 #define TXG_OBJ_LINEAR 1 92 #define TXG_EYE_LINEAR 2 93 #define TXG_SPHERE_MAP 3 94 #define TXG_REFLECTION_MAP 4 95 #define TXG_NORMAL_MAP 5 96 97 static GLuint translate_texgen( GLboolean enabled, GLenum mode ) 98 { 99 if (!enabled) 100 return TXG_NONE; 101 102 switch (mode) { 103 case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR; 104 case GL_EYE_LINEAR: return TXG_EYE_LINEAR; 105 case GL_SPHERE_MAP: return TXG_SPHERE_MAP; 106 case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP; 107 case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP; 108 default: return TXG_NONE; 109 } 110 } 111 112 #define FDM_EYE_RADIAL 0 113 #define FDM_EYE_PLANE 1 114 #define FDM_EYE_PLANE_ABS 2 115 116 static GLuint translate_fog_distance_mode( GLenum mode ) 117 { 118 switch (mode) { 119 case GL_EYE_RADIAL_NV: 120 return FDM_EYE_RADIAL; 121 case GL_EYE_PLANE: 122 return FDM_EYE_PLANE; 123 default: /* shouldn't happen; fall through to a sensible default */ 124 case GL_EYE_PLANE_ABSOLUTE_NV: 125 return FDM_EYE_PLANE_ABS; 126 } 127 } 128 129 static GLboolean check_active_shininess( struct gl_context *ctx, 130 const struct state_key *key, 131 GLuint side ) 132 { 133 GLuint attr = MAT_ATTRIB_FRONT_SHININESS + side; 134 135 if ((key->varying_vp_inputs & VERT_BIT_COLOR0) && 136 (key->light_color_material_mask & (1 << attr))) 137 return GL_TRUE; 138 139 if (key->varying_vp_inputs & VERT_BIT_GENERIC(attr)) 140 return GL_TRUE; 141 142 if (ctx->Light.Material.Attrib[attr][0] != 0.0F) 143 return GL_TRUE; 144 145 return GL_FALSE; 146 } 147 148 149 static void make_state_key( struct gl_context *ctx, struct state_key *key ) 150 { 151 const struct gl_program *fp = ctx->FragmentProgram._Current; 152 GLbitfield mask; 153 154 memset(key, 0, sizeof(struct state_key)); 155 156 /* This now relies on texenvprogram.c being active: 157 */ 158 assert(fp); 159 160 key->need_eye_coords = ctx->_NeedEyeCoords; 161 162 key->fragprog_inputs_read = fp->info.inputs_read; 163 key->varying_vp_inputs = ctx->varying_vp_inputs; 164 165 if (ctx->RenderMode == GL_FEEDBACK) { 166 /* make sure the vertprog emits color and tex0 */ 167 key->fragprog_inputs_read |= (VARYING_BIT_COL0 | VARYING_BIT_TEX0); 168 } 169 170 key->separate_specular = (ctx->Light.Model.ColorControl == 171 GL_SEPARATE_SPECULAR_COLOR); 172 173 if (ctx->Light.Enabled) { 174 key->light_global_enabled = 1; 175 176 if (ctx->Light.Model.LocalViewer) 177 key->light_local_viewer = 1; 178 179 if (ctx->Light.Model.TwoSide) 180 key->light_twoside = 1; 181 182 if (ctx->Light.ColorMaterialEnabled) { 183 key->light_color_material_mask = ctx->Light._ColorMaterialBitmask; 184 } 185 186 mask = ctx->Light._EnabledLights; 187 while (mask) { 188 const int i = u_bit_scan(&mask); 189 struct gl_light *light = &ctx->Light.Light[i]; 190 191 key->unit[i].light_enabled = 1; 192 193 if (light->EyePosition[3] == 0.0F) 194 key->unit[i].light_eyepos3_is_zero = 1; 195 196 if (light->SpotCutoff == 180.0F) 197 key->unit[i].light_spotcutoff_is_180 = 1; 198 199 if (light->ConstantAttenuation != 1.0F || 200 light->LinearAttenuation != 0.0F || 201 light->QuadraticAttenuation != 0.0F) 202 key->unit[i].light_attenuated = 1; 203 } 204 205 if (check_active_shininess(ctx, key, 0)) { 206 key->material_shininess_is_zero = 0; 207 } 208 else if (key->light_twoside && 209 check_active_shininess(ctx, key, 1)) { 210 key->material_shininess_is_zero = 0; 211 } 212 else { 213 key->material_shininess_is_zero = 1; 214 } 215 } 216 217 if (ctx->Transform.Normalize) 218 key->normalize = 1; 219 220 if (ctx->Transform.RescaleNormals) 221 key->rescale_normals = 1; 222 223 if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT) { 224 key->fog_source_is_depth = 1; 225 key->fog_distance_mode = translate_fog_distance_mode(ctx->Fog.FogDistanceMode); 226 } 227 228 if (ctx->Point._Attenuated) 229 key->point_attenuated = 1; 230 231 if (ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled) 232 key->point_array = 1; 233 234 if (ctx->Texture._TexGenEnabled || 235 ctx->Texture._TexMatEnabled || 236 ctx->Texture._MaxEnabledTexImageUnit != -1) 237 key->texture_enabled_global = 1; 238 239 mask = ctx->Texture._EnabledCoordUnits | ctx->Texture._TexGenEnabled 240 | ctx->Texture._TexMatEnabled | ctx->Point.CoordReplace; 241 while (mask) { 242 const int i = u_bit_scan(&mask); 243 struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; 244 245 if (texUnit->_Current) 246 key->unit[i].texunit_really_enabled = 1; 247 248 if (ctx->Point.PointSprite) 249 if (ctx->Point.CoordReplace & (1u << i)) 250 key->unit[i].coord_replace = 1; 251 252 if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i)) 253 key->unit[i].texmat_enabled = 1; 254 255 if (texUnit->TexGenEnabled) { 256 key->unit[i].texgen_enabled = 1; 257 258 key->unit[i].texgen_mode0 = 259 translate_texgen( texUnit->TexGenEnabled & (1<<0), 260 texUnit->GenS.Mode ); 261 key->unit[i].texgen_mode1 = 262 translate_texgen( texUnit->TexGenEnabled & (1<<1), 263 texUnit->GenT.Mode ); 264 key->unit[i].texgen_mode2 = 265 translate_texgen( texUnit->TexGenEnabled & (1<<2), 266 texUnit->GenR.Mode ); 267 key->unit[i].texgen_mode3 = 268 translate_texgen( texUnit->TexGenEnabled & (1<<3), 269 texUnit->GenQ.Mode ); 270 } 271 } 272 } 273 274 275 276 /* Very useful debugging tool - produces annotated listing of 277 * generated program with line/function references for each 278 * instruction back into this file: 279 */ 280 #define DISASSEM 0 281 282 283 /* Use uregs to represent registers internally, translate to Mesa's 284 * expected formats on emit. 285 * 286 * NOTE: These are passed by value extensively in this file rather 287 * than as usual by pointer reference. If this disturbs you, try 288 * remembering they are just 32bits in size. 289 * 290 * GCC is smart enough to deal with these dword-sized structures in 291 * much the same way as if I had defined them as dwords and was using 292 * macros to access and set the fields. This is much nicer and easier 293 * to evolve. 294 */ 295 struct ureg { 296 GLuint file:4; 297 GLint idx:9; /* relative addressing may be negative */ 298 /* sizeof(idx) should == sizeof(prog_src_reg::Index) */ 299 GLuint negate:1; 300 GLuint swz:12; 301 GLuint pad:6; 302 }; 303 304 305 struct tnl_program { 306 const struct state_key *state; 307 struct gl_program *program; 308 GLuint max_inst; /** number of instructions allocated for program */ 309 GLboolean mvp_with_dp4; 310 311 GLuint temp_in_use; 312 GLuint temp_reserved; 313 314 struct ureg eye_position; 315 struct ureg eye_position_z; 316 struct ureg eye_position_normalized; 317 struct ureg transformed_normal; 318 struct ureg identity; 319 320 GLuint materials; 321 GLuint color_materials; 322 }; 323 324 325 static const struct ureg undef = { 326 PROGRAM_UNDEFINED, 327 0, 328 0, 329 0, 330 0 331 }; 332 333 /* Local shorthand: 334 */ 335 #define X SWIZZLE_X 336 #define Y SWIZZLE_Y 337 #define Z SWIZZLE_Z 338 #define W SWIZZLE_W 339 340 341 /* Construct a ureg: 342 */ 343 static struct ureg make_ureg(GLuint file, GLint idx) 344 { 345 struct ureg reg; 346 reg.file = file; 347 reg.idx = idx; 348 reg.negate = 0; 349 reg.swz = SWIZZLE_NOOP; 350 reg.pad = 0; 351 return reg; 352 } 353 354 355 static struct ureg negate( struct ureg reg ) 356 { 357 reg.negate ^= 1; 358 return reg; 359 } 360 361 362 static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w ) 363 { 364 reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x), 365 GET_SWZ(reg.swz, y), 366 GET_SWZ(reg.swz, z), 367 GET_SWZ(reg.swz, w)); 368 return reg; 369 } 370 371 372 static struct ureg swizzle1( struct ureg reg, int x ) 373 { 374 return swizzle(reg, x, x, x, x); 375 } 376 377 378 static struct ureg get_temp( struct tnl_program *p ) 379 { 380 int bit = ffs( ~p->temp_in_use ); 381 if (!bit) { 382 _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__); 383 exit(1); 384 } 385 386 if ((GLuint) bit > p->program->arb.NumTemporaries) 387 p->program->arb.NumTemporaries = bit; 388 389 p->temp_in_use |= 1<<(bit-1); 390 return make_ureg(PROGRAM_TEMPORARY, bit-1); 391 } 392 393 394 static struct ureg reserve_temp( struct tnl_program *p ) 395 { 396 struct ureg temp = get_temp( p ); 397 p->temp_reserved |= 1<<temp.idx; 398 return temp; 399 } 400 401 402 static void release_temp( struct tnl_program *p, struct ureg reg ) 403 { 404 if (reg.file == PROGRAM_TEMPORARY) { 405 p->temp_in_use &= ~(1<<reg.idx); 406 p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */ 407 } 408 } 409 410 static void release_temps( struct tnl_program *p ) 411 { 412 p->temp_in_use = p->temp_reserved; 413 } 414 415 416 static struct ureg register_param5(struct tnl_program *p, 417 GLint s0, 418 GLint s1, 419 GLint s2, 420 GLint s3, 421 GLint s4) 422 { 423 gl_state_index tokens[STATE_LENGTH]; 424 GLint idx; 425 tokens[0] = s0; 426 tokens[1] = s1; 427 tokens[2] = s2; 428 tokens[3] = s3; 429 tokens[4] = s4; 430 idx = _mesa_add_state_reference(p->program->Parameters, tokens ); 431 return make_ureg(PROGRAM_STATE_VAR, idx); 432 } 433 434 435 #define register_param1(p,s0) register_param5(p,s0,0,0,0,0) 436 #define register_param2(p,s0,s1) register_param5(p,s0,s1,0,0,0) 437 #define register_param3(p,s0,s1,s2) register_param5(p,s0,s1,s2,0,0) 438 #define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0) 439 440 441 442 /** 443 * \param input one of VERT_ATTRIB_x tokens. 444 */ 445 static struct ureg register_input( struct tnl_program *p, GLuint input ) 446 { 447 assert(input < VERT_ATTRIB_MAX); 448 449 if (p->state->varying_vp_inputs & VERT_BIT(input)) { 450 p->program->info.inputs_read |= VERT_BIT(input); 451 return make_ureg(PROGRAM_INPUT, input); 452 } 453 else { 454 return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, input ); 455 } 456 } 457 458 459 /** 460 * \param input one of VARYING_SLOT_x tokens. 461 */ 462 static struct ureg register_output( struct tnl_program *p, GLuint output ) 463 { 464 p->program->info.outputs_written |= BITFIELD64_BIT(output); 465 return make_ureg(PROGRAM_OUTPUT, output); 466 } 467 468 469 static struct ureg register_const4f( struct tnl_program *p, 470 GLfloat s0, 471 GLfloat s1, 472 GLfloat s2, 473 GLfloat s3) 474 { 475 gl_constant_value values[4]; 476 GLint idx; 477 GLuint swizzle; 478 values[0].f = s0; 479 values[1].f = s1; 480 values[2].f = s2; 481 values[3].f = s3; 482 idx = _mesa_add_unnamed_constant(p->program->Parameters, values, 4, 483 &swizzle ); 484 assert(swizzle == SWIZZLE_NOOP); 485 return make_ureg(PROGRAM_CONSTANT, idx); 486 } 487 488 #define register_const1f(p, s0) register_const4f(p, s0, 0, 0, 1) 489 #define register_scalar_const(p, s0) register_const4f(p, s0, s0, s0, s0) 490 #define register_const2f(p, s0, s1) register_const4f(p, s0, s1, 0, 1) 491 #define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1) 492 493 static GLboolean is_undef( struct ureg reg ) 494 { 495 return reg.file == PROGRAM_UNDEFINED; 496 } 497 498 499 static struct ureg get_identity_param( struct tnl_program *p ) 500 { 501 if (is_undef(p->identity)) 502 p->identity = register_const4f(p, 0,0,0,1); 503 504 return p->identity; 505 } 506 507 static void register_matrix_param5( struct tnl_program *p, 508 GLint s0, /* modelview, projection, etc */ 509 GLint s1, /* texture matrix number */ 510 GLint s2, /* first row */ 511 GLint s3, /* last row */ 512 GLint s4, /* inverse, transpose, etc */ 513 struct ureg *matrix ) 514 { 515 GLint i; 516 517 /* This is a bit sad as the support is there to pull the whole 518 * matrix out in one go: 519 */ 520 for (i = 0; i <= s3 - s2; i++) 521 matrix[i] = register_param5( p, s0, s1, i, i, s4 ); 522 } 523 524 525 static void emit_arg( struct prog_src_register *src, 526 struct ureg reg ) 527 { 528 src->File = reg.file; 529 src->Index = reg.idx; 530 src->Swizzle = reg.swz; 531 src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE; 532 src->RelAddr = 0; 533 /* Check that bitfield sizes aren't exceeded */ 534 assert(src->Index == reg.idx); 535 } 536 537 538 static void emit_dst( struct prog_dst_register *dst, 539 struct ureg reg, GLuint mask ) 540 { 541 dst->File = reg.file; 542 dst->Index = reg.idx; 543 /* allow zero as a shorthand for xyzw */ 544 dst->WriteMask = mask ? mask : WRITEMASK_XYZW; 545 /* Check that bitfield sizes aren't exceeded */ 546 assert(dst->Index == reg.idx); 547 } 548 549 550 static void debug_insn( struct prog_instruction *inst, const char *fn, 551 GLuint line ) 552 { 553 if (DISASSEM) { 554 static const char *last_fn; 555 556 if (fn != last_fn) { 557 last_fn = fn; 558 printf("%s:\n", fn); 559 } 560 561 printf("%d:\t", line); 562 _mesa_print_instruction(inst); 563 } 564 } 565 566 567 static void emit_op3fn(struct tnl_program *p, 568 enum prog_opcode op, 569 struct ureg dest, 570 GLuint mask, 571 struct ureg src0, 572 struct ureg src1, 573 struct ureg src2, 574 const char *fn, 575 GLuint line) 576 { 577 GLuint nr; 578 struct prog_instruction *inst; 579 580 assert(p->program->arb.NumInstructions <= p->max_inst); 581 582 if (p->program->arb.NumInstructions == p->max_inst) { 583 /* need to extend the program's instruction array */ 584 struct prog_instruction *newInst; 585 586 /* double the size */ 587 p->max_inst *= 2; 588 589 newInst = 590 rzalloc_array(p->program, struct prog_instruction, p->max_inst); 591 if (!newInst) { 592 _mesa_error(NULL, GL_OUT_OF_MEMORY, "vertex program build"); 593 return; 594 } 595 596 _mesa_copy_instructions(newInst, p->program->arb.Instructions, 597 p->program->arb.NumInstructions); 598 599 ralloc_free(p->program->arb.Instructions); 600 601 p->program->arb.Instructions = newInst; 602 } 603 604 nr = p->program->arb.NumInstructions++; 605 606 inst = &p->program->arb.Instructions[nr]; 607 inst->Opcode = (enum prog_opcode) op; 608 609 emit_arg( &inst->SrcReg[0], src0 ); 610 emit_arg( &inst->SrcReg[1], src1 ); 611 emit_arg( &inst->SrcReg[2], src2 ); 612 613 emit_dst( &inst->DstReg, dest, mask ); 614 615 debug_insn(inst, fn, line); 616 } 617 618 619 #define emit_op3(p, op, dst, mask, src0, src1, src2) \ 620 emit_op3fn(p, op, dst, mask, src0, src1, src2, __func__, __LINE__) 621 622 #define emit_op2(p, op, dst, mask, src0, src1) \ 623 emit_op3fn(p, op, dst, mask, src0, src1, undef, __func__, __LINE__) 624 625 #define emit_op1(p, op, dst, mask, src0) \ 626 emit_op3fn(p, op, dst, mask, src0, undef, undef, __func__, __LINE__) 627 628 629 static struct ureg make_temp( struct tnl_program *p, struct ureg reg ) 630 { 631 if (reg.file == PROGRAM_TEMPORARY && 632 !(p->temp_reserved & (1<<reg.idx))) 633 return reg; 634 else { 635 struct ureg temp = get_temp(p); 636 emit_op1(p, OPCODE_MOV, temp, 0, reg); 637 return temp; 638 } 639 } 640 641 642 /* Currently no tracking performed of input/output/register size or 643 * active elements. Could be used to reduce these operations, as 644 * could the matrix type. 645 */ 646 static void emit_matrix_transform_vec4( struct tnl_program *p, 647 struct ureg dest, 648 const struct ureg *mat, 649 struct ureg src) 650 { 651 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]); 652 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]); 653 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]); 654 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]); 655 } 656 657 658 /* This version is much easier to implement if writemasks are not 659 * supported natively on the target or (like SSE), the target doesn't 660 * have a clean/obvious dotproduct implementation. 661 */ 662 static void emit_transpose_matrix_transform_vec4( struct tnl_program *p, 663 struct ureg dest, 664 const struct ureg *mat, 665 struct ureg src) 666 { 667 struct ureg tmp; 668 669 if (dest.file != PROGRAM_TEMPORARY) 670 tmp = get_temp(p); 671 else 672 tmp = dest; 673 674 emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]); 675 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp); 676 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp); 677 emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp); 678 679 if (dest.file != PROGRAM_TEMPORARY) 680 release_temp(p, tmp); 681 } 682 683 684 static void emit_matrix_transform_vec3( struct tnl_program *p, 685 struct ureg dest, 686 const struct ureg *mat, 687 struct ureg src) 688 { 689 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]); 690 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]); 691 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]); 692 } 693 694 695 static void emit_normalize_vec3( struct tnl_program *p, 696 struct ureg dest, 697 struct ureg src ) 698 { 699 struct ureg tmp = get_temp(p); 700 emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src); 701 emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp); 702 emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X)); 703 release_temp(p, tmp); 704 } 705 706 707 static void emit_passthrough( struct tnl_program *p, 708 GLuint input, 709 GLuint output ) 710 { 711 struct ureg out = register_output(p, output); 712 emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input)); 713 } 714 715 716 static struct ureg get_eye_position( struct tnl_program *p ) 717 { 718 if (is_undef(p->eye_position)) { 719 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 720 struct ureg modelview[4]; 721 722 p->eye_position = reserve_temp(p); 723 724 if (p->mvp_with_dp4) { 725 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 726 0, modelview ); 727 728 emit_matrix_transform_vec4(p, p->eye_position, modelview, pos); 729 } 730 else { 731 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 732 STATE_MATRIX_TRANSPOSE, modelview ); 733 734 emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos); 735 } 736 } 737 738 return p->eye_position; 739 } 740 741 742 static struct ureg get_eye_position_z( struct tnl_program *p ) 743 { 744 if (!is_undef(p->eye_position)) 745 return swizzle1(p->eye_position, Z); 746 747 if (is_undef(p->eye_position_z)) { 748 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 749 struct ureg modelview[4]; 750 751 p->eye_position_z = reserve_temp(p); 752 753 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 754 0, modelview ); 755 756 emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]); 757 } 758 759 return p->eye_position_z; 760 } 761 762 763 static struct ureg get_eye_position_normalized( struct tnl_program *p ) 764 { 765 if (is_undef(p->eye_position_normalized)) { 766 struct ureg eye = get_eye_position(p); 767 p->eye_position_normalized = reserve_temp(p); 768 emit_normalize_vec3(p, p->eye_position_normalized, eye); 769 } 770 771 return p->eye_position_normalized; 772 } 773 774 775 static struct ureg get_transformed_normal( struct tnl_program *p ) 776 { 777 if (is_undef(p->transformed_normal) && 778 !p->state->need_eye_coords && 779 !p->state->normalize && 780 !(p->state->need_eye_coords == p->state->rescale_normals)) 781 { 782 p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL ); 783 } 784 else if (is_undef(p->transformed_normal)) 785 { 786 struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL ); 787 struct ureg mvinv[3]; 788 struct ureg transformed_normal = reserve_temp(p); 789 790 if (p->state->need_eye_coords) { 791 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2, 792 STATE_MATRIX_INVTRANS, mvinv ); 793 794 /* Transform to eye space: 795 */ 796 emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal ); 797 normal = transformed_normal; 798 } 799 800 /* Normalize/Rescale: 801 */ 802 if (p->state->normalize) { 803 emit_normalize_vec3( p, transformed_normal, normal ); 804 normal = transformed_normal; 805 } 806 else if (p->state->need_eye_coords == p->state->rescale_normals) { 807 /* This is already adjusted for eye/non-eye rendering: 808 */ 809 struct ureg rescale = register_param2(p, STATE_INTERNAL, 810 STATE_NORMAL_SCALE); 811 812 emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale ); 813 normal = transformed_normal; 814 } 815 816 assert(normal.file == PROGRAM_TEMPORARY); 817 p->transformed_normal = normal; 818 } 819 820 return p->transformed_normal; 821 } 822 823 824 static void build_hpos( struct tnl_program *p ) 825 { 826 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 827 struct ureg hpos = register_output( p, VARYING_SLOT_POS ); 828 struct ureg mvp[4]; 829 830 if (p->mvp_with_dp4) { 831 register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 832 0, mvp ); 833 emit_matrix_transform_vec4( p, hpos, mvp, pos ); 834 } 835 else { 836 register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 837 STATE_MATRIX_TRANSPOSE, mvp ); 838 emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos ); 839 } 840 } 841 842 843 static GLuint material_attrib( GLuint side, GLuint property ) 844 { 845 return (property - STATE_AMBIENT) * 2 + side; 846 } 847 848 849 /** 850 * Get a bitmask of which material values vary on a per-vertex basis. 851 */ 852 static void set_material_flags( struct tnl_program *p ) 853 { 854 p->color_materials = 0; 855 p->materials = 0; 856 857 if (p->state->varying_vp_inputs & VERT_BIT_COLOR0) { 858 p->materials = 859 p->color_materials = p->state->light_color_material_mask; 860 } 861 862 p->materials |= (p->state->varying_vp_inputs >> VERT_ATTRIB_GENERIC0); 863 } 864 865 866 static struct ureg get_material( struct tnl_program *p, GLuint side, 867 GLuint property ) 868 { 869 GLuint attrib = material_attrib(side, property); 870 871 if (p->color_materials & (1<<attrib)) 872 return register_input(p, VERT_ATTRIB_COLOR0); 873 else if (p->materials & (1<<attrib)) { 874 /* Put material values in the GENERIC slots -- they are not used 875 * for anything in fixed function mode. 876 */ 877 return register_input( p, attrib + VERT_ATTRIB_GENERIC0 ); 878 } 879 else 880 return register_param3( p, STATE_MATERIAL, side, property ); 881 } 882 883 #define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \ 884 MAT_BIT_FRONT_AMBIENT | \ 885 MAT_BIT_FRONT_DIFFUSE) << (side)) 886 887 888 /** 889 * Either return a precalculated constant value or emit code to 890 * calculate these values dynamically in the case where material calls 891 * are present between begin/end pairs. 892 * 893 * Probably want to shift this to the program compilation phase - if 894 * we always emitted the calculation here, a smart compiler could 895 * detect that it was constant (given a certain set of inputs), and 896 * lift it out of the main loop. That way the programs created here 897 * would be independent of the vertex_buffer details. 898 */ 899 static struct ureg get_scenecolor( struct tnl_program *p, GLuint side ) 900 { 901 if (p->materials & SCENE_COLOR_BITS(side)) { 902 struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT); 903 struct ureg material_emission = get_material(p, side, STATE_EMISSION); 904 struct ureg material_ambient = get_material(p, side, STATE_AMBIENT); 905 struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE); 906 struct ureg tmp = make_temp(p, material_diffuse); 907 emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient, 908 material_ambient, material_emission); 909 return tmp; 910 } 911 else 912 return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side ); 913 } 914 915 916 static struct ureg get_lightprod( struct tnl_program *p, GLuint light, 917 GLuint side, GLuint property ) 918 { 919 GLuint attrib = material_attrib(side, property); 920 if (p->materials & (1<<attrib)) { 921 struct ureg light_value = 922 register_param3(p, STATE_LIGHT, light, property); 923 struct ureg material_value = get_material(p, side, property); 924 struct ureg tmp = get_temp(p); 925 emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value); 926 return tmp; 927 } 928 else 929 return register_param4(p, STATE_LIGHTPROD, light, side, property); 930 } 931 932 933 static struct ureg calculate_light_attenuation( struct tnl_program *p, 934 GLuint i, 935 struct ureg VPpli, 936 struct ureg dist ) 937 { 938 struct ureg attenuation = register_param3(p, STATE_LIGHT, i, 939 STATE_ATTENUATION); 940 struct ureg att = undef; 941 942 /* Calculate spot attenuation: 943 */ 944 if (!p->state->unit[i].light_spotcutoff_is_180) { 945 struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL, 946 STATE_LIGHT_SPOT_DIR_NORMALIZED, i); 947 struct ureg spot = get_temp(p); 948 struct ureg slt = get_temp(p); 949 950 att = get_temp(p); 951 952 emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm); 953 emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot); 954 emit_op1(p, OPCODE_ABS, spot, 0, spot); 955 emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); 956 emit_op2(p, OPCODE_MUL, att, 0, slt, spot); 957 958 release_temp(p, spot); 959 release_temp(p, slt); 960 } 961 962 /* Calculate distance attenuation(See formula (2.4) at glspec 2.1 page 62): 963 * 964 * Skip the calucation when _dist_ is undefined(light_eyepos3_is_zero) 965 */ 966 if (p->state->unit[i].light_attenuated && !is_undef(dist)) { 967 if (is_undef(att)) 968 att = get_temp(p); 969 /* 1/d,d,d,1/d */ 970 emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist); 971 /* 1,d,d*d,1/d */ 972 emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y)); 973 /* 1/dist-atten */ 974 emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist); 975 976 if (!p->state->unit[i].light_spotcutoff_is_180) { 977 /* dist-atten */ 978 emit_op1(p, OPCODE_RCP, dist, 0, dist); 979 /* spot-atten * dist-atten */ 980 emit_op2(p, OPCODE_MUL, att, 0, dist, att); 981 } 982 else { 983 /* dist-atten */ 984 emit_op1(p, OPCODE_RCP, att, 0, dist); 985 } 986 } 987 988 return att; 989 } 990 991 992 /** 993 * Compute: 994 * lit.y = MAX(0, dots.x) 995 * lit.z = SLT(0, dots.x) 996 */ 997 static void emit_degenerate_lit( struct tnl_program *p, 998 struct ureg lit, 999 struct ureg dots ) 1000 { 1001 struct ureg id = get_identity_param(p); /* id = {0,0,0,1} */ 1002 1003 /* Note that lit.x & lit.w will not be examined. Note also that 1004 * dots.xyzw == dots.xxxx. 1005 */ 1006 1007 /* MAX lit, id, dots; 1008 */ 1009 emit_op2(p, OPCODE_MAX, lit, WRITEMASK_XYZW, id, dots); 1010 1011 /* result[2] = (in > 0 ? 1 : 0) 1012 * SLT lit.z, id.z, dots; # lit.z = (0 < dots.z) ? 1 : 0 1013 */ 1014 emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, swizzle1(id,Z), dots); 1015 } 1016 1017 1018 /* Need to add some addtional parameters to allow lighting in object 1019 * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye 1020 * space lighting. 1021 */ 1022 static void build_lighting( struct tnl_program *p ) 1023 { 1024 const GLboolean twoside = p->state->light_twoside; 1025 const GLboolean separate = p->state->separate_specular; 1026 GLuint nr_lights = 0, count = 0; 1027 struct ureg normal = get_transformed_normal(p); 1028 struct ureg lit = get_temp(p); 1029 struct ureg dots = get_temp(p); 1030 struct ureg _col0 = undef, _col1 = undef; 1031 struct ureg _bfc0 = undef, _bfc1 = undef; 1032 GLuint i; 1033 1034 /* 1035 * NOTE: 1036 * dots.x = dot(normal, VPpli) 1037 * dots.y = dot(normal, halfAngle) 1038 * dots.z = back.shininess 1039 * dots.w = front.shininess 1040 */ 1041 1042 for (i = 0; i < MAX_LIGHTS; i++) 1043 if (p->state->unit[i].light_enabled) 1044 nr_lights++; 1045 1046 set_material_flags(p); 1047 1048 { 1049 if (!p->state->material_shininess_is_zero) { 1050 struct ureg shininess = get_material(p, 0, STATE_SHININESS); 1051 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); 1052 release_temp(p, shininess); 1053 } 1054 1055 _col0 = make_temp(p, get_scenecolor(p, 0)); 1056 if (separate) 1057 _col1 = make_temp(p, get_identity_param(p)); 1058 else 1059 _col1 = _col0; 1060 } 1061 1062 if (twoside) { 1063 if (!p->state->material_shininess_is_zero) { 1064 /* Note that we negate the back-face specular exponent here. 1065 * The negation will be un-done later in the back-face code below. 1066 */ 1067 struct ureg shininess = get_material(p, 1, STATE_SHININESS); 1068 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, 1069 negate(swizzle1(shininess,X))); 1070 release_temp(p, shininess); 1071 } 1072 1073 _bfc0 = make_temp(p, get_scenecolor(p, 1)); 1074 if (separate) 1075 _bfc1 = make_temp(p, get_identity_param(p)); 1076 else 1077 _bfc1 = _bfc0; 1078 } 1079 1080 /* If no lights, still need to emit the scenecolor. 1081 */ 1082 { 1083 struct ureg res0 = register_output( p, VARYING_SLOT_COL0 ); 1084 emit_op1(p, OPCODE_MOV, res0, 0, _col0); 1085 } 1086 1087 if (separate) { 1088 struct ureg res1 = register_output( p, VARYING_SLOT_COL1 ); 1089 emit_op1(p, OPCODE_MOV, res1, 0, _col1); 1090 } 1091 1092 if (twoside) { 1093 struct ureg res0 = register_output( p, VARYING_SLOT_BFC0 ); 1094 emit_op1(p, OPCODE_MOV, res0, 0, _bfc0); 1095 } 1096 1097 if (twoside && separate) { 1098 struct ureg res1 = register_output( p, VARYING_SLOT_BFC1 ); 1099 emit_op1(p, OPCODE_MOV, res1, 0, _bfc1); 1100 } 1101 1102 if (nr_lights == 0) { 1103 release_temps(p); 1104 return; 1105 } 1106 1107 for (i = 0; i < MAX_LIGHTS; i++) { 1108 if (p->state->unit[i].light_enabled) { 1109 struct ureg half = undef; 1110 struct ureg att = undef, VPpli = undef; 1111 struct ureg dist = undef; 1112 1113 count++; 1114 if (p->state->unit[i].light_eyepos3_is_zero) { 1115 VPpli = register_param3(p, STATE_INTERNAL, 1116 STATE_LIGHT_POSITION_NORMALIZED, i); 1117 } else { 1118 struct ureg Ppli = register_param3(p, STATE_INTERNAL, 1119 STATE_LIGHT_POSITION, i); 1120 struct ureg V = get_eye_position(p); 1121 1122 VPpli = get_temp(p); 1123 dist = get_temp(p); 1124 1125 /* Calculate VPpli vector 1126 */ 1127 emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V); 1128 1129 /* Normalize VPpli. The dist value also used in 1130 * attenuation below. 1131 */ 1132 emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli); 1133 emit_op1(p, OPCODE_RSQ, dist, 0, dist); 1134 emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist); 1135 } 1136 1137 /* Calculate attenuation: 1138 */ 1139 att = calculate_light_attenuation(p, i, VPpli, dist); 1140 release_temp(p, dist); 1141 1142 /* Calculate viewer direction, or use infinite viewer: 1143 */ 1144 if (!p->state->material_shininess_is_zero) { 1145 if (p->state->light_local_viewer) { 1146 struct ureg eye_hat = get_eye_position_normalized(p); 1147 half = get_temp(p); 1148 emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); 1149 emit_normalize_vec3(p, half, half); 1150 } else if (p->state->unit[i].light_eyepos3_is_zero) { 1151 half = register_param3(p, STATE_INTERNAL, 1152 STATE_LIGHT_HALF_VECTOR, i); 1153 } else { 1154 struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); 1155 half = get_temp(p); 1156 emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir); 1157 emit_normalize_vec3(p, half, half); 1158 } 1159 } 1160 1161 /* Calculate dot products: 1162 */ 1163 if (p->state->material_shininess_is_zero) { 1164 emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli); 1165 } 1166 else { 1167 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); 1168 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half); 1169 } 1170 1171 /* Front face lighting: 1172 */ 1173 { 1174 struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT); 1175 struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE); 1176 struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR); 1177 struct ureg res0, res1; 1178 GLuint mask0, mask1; 1179 1180 if (count == nr_lights) { 1181 if (separate) { 1182 mask0 = WRITEMASK_XYZ; 1183 mask1 = WRITEMASK_XYZ; 1184 res0 = register_output( p, VARYING_SLOT_COL0 ); 1185 res1 = register_output( p, VARYING_SLOT_COL1 ); 1186 } 1187 else { 1188 mask0 = 0; 1189 mask1 = WRITEMASK_XYZ; 1190 res0 = _col0; 1191 res1 = register_output( p, VARYING_SLOT_COL0 ); 1192 } 1193 } 1194 else { 1195 mask0 = 0; 1196 mask1 = 0; 1197 res0 = _col0; 1198 res1 = _col1; 1199 } 1200 1201 if (!is_undef(att)) { 1202 /* light is attenuated by distance */ 1203 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1204 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1205 emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); 1206 } 1207 else if (!p->state->material_shininess_is_zero) { 1208 /* there's a non-zero specular term */ 1209 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1210 emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); 1211 } 1212 else { 1213 /* no attenutation, no specular */ 1214 emit_degenerate_lit(p, lit, dots); 1215 emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); 1216 } 1217 1218 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0); 1219 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1); 1220 1221 release_temp(p, ambient); 1222 release_temp(p, diffuse); 1223 release_temp(p, specular); 1224 } 1225 1226 /* Back face lighting: 1227 */ 1228 if (twoside) { 1229 struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT); 1230 struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE); 1231 struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR); 1232 struct ureg res0, res1; 1233 GLuint mask0, mask1; 1234 1235 if (count == nr_lights) { 1236 if (separate) { 1237 mask0 = WRITEMASK_XYZ; 1238 mask1 = WRITEMASK_XYZ; 1239 res0 = register_output( p, VARYING_SLOT_BFC0 ); 1240 res1 = register_output( p, VARYING_SLOT_BFC1 ); 1241 } 1242 else { 1243 mask0 = 0; 1244 mask1 = WRITEMASK_XYZ; 1245 res0 = _bfc0; 1246 res1 = register_output( p, VARYING_SLOT_BFC0 ); 1247 } 1248 } 1249 else { 1250 res0 = _bfc0; 1251 res1 = _bfc1; 1252 mask0 = 0; 1253 mask1 = 0; 1254 } 1255 1256 /* For the back face we need to negate the X and Y component 1257 * dot products. dots.Z has the negated back-face specular 1258 * exponent. We swizzle that into the W position. This 1259 * negation makes the back-face specular term positive again. 1260 */ 1261 dots = negate(swizzle(dots,X,Y,W,Z)); 1262 1263 if (!is_undef(att)) { 1264 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1265 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1266 emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); 1267 } 1268 else if (!p->state->material_shininess_is_zero) { 1269 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1270 emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); /**/ 1271 } 1272 else { 1273 emit_degenerate_lit(p, lit, dots); 1274 emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); 1275 } 1276 1277 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0); 1278 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1); 1279 /* restore dots to its original state for subsequent lights 1280 * by negating and swizzling again. 1281 */ 1282 dots = negate(swizzle(dots,X,Y,W,Z)); 1283 1284 release_temp(p, ambient); 1285 release_temp(p, diffuse); 1286 release_temp(p, specular); 1287 } 1288 1289 release_temp(p, half); 1290 release_temp(p, VPpli); 1291 release_temp(p, att); 1292 } 1293 } 1294 1295 release_temps( p ); 1296 } 1297 1298 1299 static void build_fog( struct tnl_program *p ) 1300 { 1301 struct ureg fog = register_output(p, VARYING_SLOT_FOGC); 1302 struct ureg input; 1303 1304 if (p->state->fog_source_is_depth) { 1305 1306 switch (p->state->fog_distance_mode) { 1307 case FDM_EYE_RADIAL: /* Z = sqrt(Xe*Xe + Ye*Ye + Ze*Ze) */ 1308 input = get_eye_position(p); 1309 emit_op2(p, OPCODE_DP3, fog, WRITEMASK_X, input, input); 1310 emit_op1(p, OPCODE_RSQ, fog, WRITEMASK_X, fog); 1311 emit_op1(p, OPCODE_RCP, fog, WRITEMASK_X, fog); 1312 break; 1313 case FDM_EYE_PLANE: /* Z = Ze */ 1314 input = get_eye_position_z(p); 1315 emit_op1(p, OPCODE_MOV, fog, WRITEMASK_X, input); 1316 break; 1317 case FDM_EYE_PLANE_ABS: /* Z = abs(Ze) */ 1318 input = get_eye_position_z(p); 1319 emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input); 1320 break; 1321 default: 1322 assert(!"Bad fog mode in build_fog()"); 1323 break; 1324 } 1325 1326 } 1327 else { 1328 input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); 1329 emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input); 1330 } 1331 1332 emit_op1(p, OPCODE_MOV, fog, WRITEMASK_YZW, get_identity_param(p)); 1333 } 1334 1335 1336 static void build_reflect_texgen( struct tnl_program *p, 1337 struct ureg dest, 1338 GLuint writemask ) 1339 { 1340 struct ureg normal = get_transformed_normal(p); 1341 struct ureg eye_hat = get_eye_position_normalized(p); 1342 struct ureg tmp = get_temp(p); 1343 1344 /* n.u */ 1345 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1346 /* 2n.u */ 1347 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1348 /* (-2n.u)n + u */ 1349 emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat); 1350 1351 release_temp(p, tmp); 1352 } 1353 1354 1355 static void build_sphere_texgen( struct tnl_program *p, 1356 struct ureg dest, 1357 GLuint writemask ) 1358 { 1359 struct ureg normal = get_transformed_normal(p); 1360 struct ureg eye_hat = get_eye_position_normalized(p); 1361 struct ureg tmp = get_temp(p); 1362 struct ureg half = register_scalar_const(p, .5); 1363 struct ureg r = get_temp(p); 1364 struct ureg inv_m = get_temp(p); 1365 struct ureg id = get_identity_param(p); 1366 1367 /* Could share the above calculations, but it would be 1368 * a fairly odd state for someone to set (both sphere and 1369 * reflection active for different texture coordinate 1370 * components. Of course - if two texture units enable 1371 * reflect and/or sphere, things start to tilt in favour 1372 * of seperating this out: 1373 */ 1374 1375 /* n.u */ 1376 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1377 /* 2n.u */ 1378 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1379 /* (-2n.u)n + u */ 1380 emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat); 1381 /* r + 0,0,1 */ 1382 emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z)); 1383 /* rx^2 + ry^2 + (rz+1)^2 */ 1384 emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp); 1385 /* 2/m */ 1386 emit_op1(p, OPCODE_RSQ, tmp, 0, tmp); 1387 /* 1/m */ 1388 emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half); 1389 /* r/m + 1/2 */ 1390 emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half); 1391 1392 release_temp(p, tmp); 1393 release_temp(p, r); 1394 release_temp(p, inv_m); 1395 } 1396 1397 1398 static void build_texture_transform( struct tnl_program *p ) 1399 { 1400 GLuint i, j; 1401 1402 for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { 1403 1404 if (!(p->state->fragprog_inputs_read & VARYING_BIT_TEX(i))) 1405 continue; 1406 1407 if (p->state->unit[i].coord_replace) 1408 continue; 1409 1410 if (p->state->unit[i].texgen_enabled || 1411 p->state->unit[i].texmat_enabled) { 1412 1413 GLuint texmat_enabled = p->state->unit[i].texmat_enabled; 1414 struct ureg out = register_output(p, VARYING_SLOT_TEX0 + i); 1415 struct ureg out_texgen = undef; 1416 1417 if (p->state->unit[i].texgen_enabled) { 1418 GLuint copy_mask = 0; 1419 GLuint sphere_mask = 0; 1420 GLuint reflect_mask = 0; 1421 GLuint normal_mask = 0; 1422 GLuint modes[4]; 1423 1424 if (texmat_enabled) 1425 out_texgen = get_temp(p); 1426 else 1427 out_texgen = out; 1428 1429 modes[0] = p->state->unit[i].texgen_mode0; 1430 modes[1] = p->state->unit[i].texgen_mode1; 1431 modes[2] = p->state->unit[i].texgen_mode2; 1432 modes[3] = p->state->unit[i].texgen_mode3; 1433 1434 for (j = 0; j < 4; j++) { 1435 switch (modes[j]) { 1436 case TXG_OBJ_LINEAR: { 1437 struct ureg obj = register_input(p, VERT_ATTRIB_POS); 1438 struct ureg plane = 1439 register_param3(p, STATE_TEXGEN, i, 1440 STATE_TEXGEN_OBJECT_S + j); 1441 1442 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1443 obj, plane ); 1444 break; 1445 } 1446 case TXG_EYE_LINEAR: { 1447 struct ureg eye = get_eye_position(p); 1448 struct ureg plane = 1449 register_param3(p, STATE_TEXGEN, i, 1450 STATE_TEXGEN_EYE_S + j); 1451 1452 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1453 eye, plane ); 1454 break; 1455 } 1456 case TXG_SPHERE_MAP: 1457 sphere_mask |= WRITEMASK_X << j; 1458 break; 1459 case TXG_REFLECTION_MAP: 1460 reflect_mask |= WRITEMASK_X << j; 1461 break; 1462 case TXG_NORMAL_MAP: 1463 normal_mask |= WRITEMASK_X << j; 1464 break; 1465 case TXG_NONE: 1466 copy_mask |= WRITEMASK_X << j; 1467 } 1468 } 1469 1470 if (sphere_mask) { 1471 build_sphere_texgen(p, out_texgen, sphere_mask); 1472 } 1473 1474 if (reflect_mask) { 1475 build_reflect_texgen(p, out_texgen, reflect_mask); 1476 } 1477 1478 if (normal_mask) { 1479 struct ureg normal = get_transformed_normal(p); 1480 emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal ); 1481 } 1482 1483 if (copy_mask) { 1484 struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i); 1485 emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in ); 1486 } 1487 } 1488 1489 if (texmat_enabled) { 1490 struct ureg texmat[4]; 1491 struct ureg in = (!is_undef(out_texgen) ? 1492 out_texgen : 1493 register_input(p, VERT_ATTRIB_TEX0+i)); 1494 if (p->mvp_with_dp4) { 1495 register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, 1496 0, texmat ); 1497 emit_matrix_transform_vec4( p, out, texmat, in ); 1498 } 1499 else { 1500 register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, 1501 STATE_MATRIX_TRANSPOSE, texmat ); 1502 emit_transpose_matrix_transform_vec4( p, out, texmat, in ); 1503 } 1504 } 1505 1506 release_temps(p); 1507 } 1508 else { 1509 emit_passthrough(p, VERT_ATTRIB_TEX0+i, VARYING_SLOT_TEX0+i); 1510 } 1511 } 1512 } 1513 1514 1515 /** 1516 * Point size attenuation computation. 1517 */ 1518 static void build_atten_pointsize( struct tnl_program *p ) 1519 { 1520 struct ureg eye = get_eye_position_z(p); 1521 struct ureg state_size = register_param2(p, STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED); 1522 struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION); 1523 struct ureg out = register_output(p, VARYING_SLOT_PSIZ); 1524 struct ureg ut = get_temp(p); 1525 1526 /* dist = |eyez| */ 1527 emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z)); 1528 /* p1 + dist * (p2 + dist * p3); */ 1529 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1530 swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y)); 1531 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1532 ut, swizzle1(state_attenuation, X)); 1533 1534 /* 1 / sqrt(factor) */ 1535 emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut ); 1536 1537 #if 0 1538 /* out = pointSize / sqrt(factor) */ 1539 emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size); 1540 #else 1541 /* this is a good place to clamp the point size since there's likely 1542 * no hardware registers to clamp point size at rasterization time. 1543 */ 1544 emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size); 1545 emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y)); 1546 emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z)); 1547 #endif 1548 1549 release_temp(p, ut); 1550 } 1551 1552 1553 /** 1554 * Pass-though per-vertex point size, from user's point size array. 1555 */ 1556 static void build_array_pointsize( struct tnl_program *p ) 1557 { 1558 struct ureg in = register_input(p, VERT_ATTRIB_POINT_SIZE); 1559 struct ureg out = register_output(p, VARYING_SLOT_PSIZ); 1560 emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, in); 1561 } 1562 1563 1564 static void build_tnl_program( struct tnl_program *p ) 1565 { 1566 /* Emit the program, starting with the modelview, projection transforms: 1567 */ 1568 build_hpos(p); 1569 1570 /* Lighting calculations: 1571 */ 1572 if (p->state->fragprog_inputs_read & (VARYING_BIT_COL0|VARYING_BIT_COL1)) { 1573 if (p->state->light_global_enabled) 1574 build_lighting(p); 1575 else { 1576 if (p->state->fragprog_inputs_read & VARYING_BIT_COL0) 1577 emit_passthrough(p, VERT_ATTRIB_COLOR0, VARYING_SLOT_COL0); 1578 1579 if (p->state->fragprog_inputs_read & VARYING_BIT_COL1) 1580 emit_passthrough(p, VERT_ATTRIB_COLOR1, VARYING_SLOT_COL1); 1581 } 1582 } 1583 1584 if (p->state->fragprog_inputs_read & VARYING_BIT_FOGC) 1585 build_fog(p); 1586 1587 if (p->state->fragprog_inputs_read & VARYING_BITS_TEX_ANY) 1588 build_texture_transform(p); 1589 1590 if (p->state->point_attenuated) 1591 build_atten_pointsize(p); 1592 else if (p->state->point_array) 1593 build_array_pointsize(p); 1594 1595 /* Finish up: 1596 */ 1597 emit_op1(p, OPCODE_END, undef, 0, undef); 1598 1599 /* Disassemble: 1600 */ 1601 if (DISASSEM) { 1602 printf ("\n"); 1603 } 1604 } 1605 1606 1607 static void 1608 create_new_program( const struct state_key *key, 1609 struct gl_program *program, 1610 GLboolean mvp_with_dp4, 1611 GLuint max_temps) 1612 { 1613 struct tnl_program p; 1614 1615 memset(&p, 0, sizeof(p)); 1616 p.state = key; 1617 p.program = program; 1618 p.eye_position = undef; 1619 p.eye_position_z = undef; 1620 p.eye_position_normalized = undef; 1621 p.transformed_normal = undef; 1622 p.identity = undef; 1623 p.temp_in_use = 0; 1624 p.mvp_with_dp4 = mvp_with_dp4; 1625 1626 if (max_temps >= sizeof(int) * 8) 1627 p.temp_reserved = 0; 1628 else 1629 p.temp_reserved = ~((1<<max_temps)-1); 1630 1631 /* Start by allocating 32 instructions. 1632 * If we need more, we'll grow the instruction array as needed. 1633 */ 1634 p.max_inst = 32; 1635 p.program->arb.Instructions = 1636 rzalloc_array(program, struct prog_instruction, p.max_inst); 1637 p.program->String = NULL; 1638 p.program->arb.NumInstructions = 1639 p.program->arb.NumTemporaries = 1640 p.program->arb.NumParameters = 1641 p.program->arb.NumAttributes = p.program->arb.NumAddressRegs = 0; 1642 p.program->Parameters = _mesa_new_parameter_list(); 1643 p.program->info.inputs_read = 0; 1644 p.program->info.outputs_written = 0; 1645 1646 build_tnl_program( &p ); 1647 } 1648 1649 1650 /** 1651 * Return a vertex program which implements the current fixed-function 1652 * transform/lighting/texgen operations. 1653 */ 1654 struct gl_program * 1655 _mesa_get_fixed_func_vertex_program(struct gl_context *ctx) 1656 { 1657 struct gl_program *prog; 1658 struct state_key key; 1659 1660 /* Grab all the relevant state and put it in a single structure: 1661 */ 1662 make_state_key(ctx, &key); 1663 1664 /* Look for an already-prepared program for this state: 1665 */ 1666 prog = _mesa_search_program_cache(ctx->VertexProgram.Cache, &key, 1667 sizeof(key)); 1668 1669 if (!prog) { 1670 /* OK, we'll have to build a new one */ 1671 if (0) 1672 printf("Build new TNL program\n"); 1673 1674 prog = ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0, true); 1675 if (!prog) 1676 return NULL; 1677 1678 create_new_program( &key, prog, 1679 ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS, 1680 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTemps ); 1681 1682 if (ctx->Driver.ProgramStringNotify) 1683 ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, prog); 1684 1685 _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache, &key, 1686 sizeof(key), prog); 1687 } 1688 1689 return prog; 1690 } 1691