Home | History | Annotate | Download | only in main
      1 /**************************************************************************
      2  *
      3  * Copyright 2007 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 /**
     29  * \file ffvertex_prog.c
     30  *
     31  * Create a vertex program to execute the current fixed function T&L pipeline.
     32  * \author Keith Whitwell
     33  */
     34 
     35 
     36 #include "main/glheader.h"
     37 #include "main/mtypes.h"
     38 #include "main/macros.h"
     39 #include "main/enums.h"
     40 #include "main/ffvertex_prog.h"
     41 #include "program/program.h"
     42 #include "program/prog_cache.h"
     43 #include "program/prog_instruction.h"
     44 #include "program/prog_parameter.h"
     45 #include "program/prog_print.h"
     46 #include "program/prog_statevars.h"
     47 #include "util/bitscan.h"
     48 
     49 
     50 /** Max of number of lights and texture coord units */
     51 #define NUM_UNITS MAX2(MAX_TEXTURE_COORD_UNITS, MAX_LIGHTS)
     52 
     53 struct state_key {
     54    unsigned light_color_material_mask:12;
     55    unsigned light_global_enabled:1;
     56    unsigned light_local_viewer:1;
     57    unsigned light_twoside:1;
     58    unsigned material_shininess_is_zero:1;
     59    unsigned need_eye_coords:1;
     60    unsigned normalize:1;
     61    unsigned rescale_normals:1;
     62 
     63    unsigned fog_source_is_depth:1;
     64    unsigned fog_distance_mode:2;
     65    unsigned separate_specular:1;
     66    unsigned point_attenuated:1;
     67    unsigned point_array:1;
     68    unsigned texture_enabled_global:1;
     69    unsigned fragprog_inputs_read:12;
     70 
     71    GLbitfield varying_vp_inputs;
     72 
     73    struct {
     74       unsigned light_enabled:1;
     75       unsigned light_eyepos3_is_zero:1;
     76       unsigned light_spotcutoff_is_180:1;
     77       unsigned light_attenuated:1;
     78       unsigned texunit_really_enabled:1;
     79       unsigned texmat_enabled:1;
     80       unsigned coord_replace:1;
     81       unsigned texgen_enabled:4;
     82       unsigned texgen_mode0:4;
     83       unsigned texgen_mode1:4;
     84       unsigned texgen_mode2:4;
     85       unsigned texgen_mode3:4;
     86    } unit[NUM_UNITS];
     87 };
     88 
     89 
     90 #define TXG_NONE           0
     91 #define TXG_OBJ_LINEAR     1
     92 #define TXG_EYE_LINEAR     2
     93 #define TXG_SPHERE_MAP     3
     94 #define TXG_REFLECTION_MAP 4
     95 #define TXG_NORMAL_MAP     5
     96 
     97 static GLuint translate_texgen( GLboolean enabled, GLenum mode )
     98 {
     99    if (!enabled)
    100       return TXG_NONE;
    101 
    102    switch (mode) {
    103    case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR;
    104    case GL_EYE_LINEAR: return TXG_EYE_LINEAR;
    105    case GL_SPHERE_MAP: return TXG_SPHERE_MAP;
    106    case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP;
    107    case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP;
    108    default: return TXG_NONE;
    109    }
    110 }
    111 
    112 #define FDM_EYE_RADIAL    0
    113 #define FDM_EYE_PLANE     1
    114 #define FDM_EYE_PLANE_ABS 2
    115 
    116 static GLuint translate_fog_distance_mode( GLenum mode )
    117 {
    118    switch (mode) {
    119    case GL_EYE_RADIAL_NV:
    120       return FDM_EYE_RADIAL;
    121    case GL_EYE_PLANE:
    122       return FDM_EYE_PLANE;
    123    default: /* shouldn't happen; fall through to a sensible default */
    124    case GL_EYE_PLANE_ABSOLUTE_NV:
    125       return FDM_EYE_PLANE_ABS;
    126    }
    127 }
    128 
    129 static GLboolean check_active_shininess( struct gl_context *ctx,
    130                                          const struct state_key *key,
    131                                          GLuint side )
    132 {
    133    GLuint attr = MAT_ATTRIB_FRONT_SHININESS + side;
    134 
    135    if ((key->varying_vp_inputs & VERT_BIT_COLOR0) &&
    136        (key->light_color_material_mask & (1 << attr)))
    137       return GL_TRUE;
    138 
    139    if (key->varying_vp_inputs & VERT_BIT_GENERIC(attr))
    140       return GL_TRUE;
    141 
    142    if (ctx->Light.Material.Attrib[attr][0] != 0.0F)
    143       return GL_TRUE;
    144 
    145    return GL_FALSE;
    146 }
    147 
    148 
    149 static void make_state_key( struct gl_context *ctx, struct state_key *key )
    150 {
    151    const struct gl_program *fp = ctx->FragmentProgram._Current;
    152    GLbitfield mask;
    153 
    154    memset(key, 0, sizeof(struct state_key));
    155 
    156    /* This now relies on texenvprogram.c being active:
    157     */
    158    assert(fp);
    159 
    160    key->need_eye_coords = ctx->_NeedEyeCoords;
    161 
    162    key->fragprog_inputs_read = fp->info.inputs_read;
    163    key->varying_vp_inputs = ctx->varying_vp_inputs;
    164 
    165    if (ctx->RenderMode == GL_FEEDBACK) {
    166       /* make sure the vertprog emits color and tex0 */
    167       key->fragprog_inputs_read |= (VARYING_BIT_COL0 | VARYING_BIT_TEX0);
    168    }
    169 
    170    key->separate_specular = (ctx->Light.Model.ColorControl ==
    171 			     GL_SEPARATE_SPECULAR_COLOR);
    172 
    173    if (ctx->Light.Enabled) {
    174       key->light_global_enabled = 1;
    175 
    176       if (ctx->Light.Model.LocalViewer)
    177 	 key->light_local_viewer = 1;
    178 
    179       if (ctx->Light.Model.TwoSide)
    180 	 key->light_twoside = 1;
    181 
    182       if (ctx->Light.ColorMaterialEnabled) {
    183 	 key->light_color_material_mask = ctx->Light._ColorMaterialBitmask;
    184       }
    185 
    186       mask = ctx->Light._EnabledLights;
    187       while (mask) {
    188          const int i = u_bit_scan(&mask);
    189          struct gl_light *light = &ctx->Light.Light[i];
    190 
    191          key->unit[i].light_enabled = 1;
    192 
    193          if (light->EyePosition[3] == 0.0F)
    194             key->unit[i].light_eyepos3_is_zero = 1;
    195 
    196          if (light->SpotCutoff == 180.0F)
    197             key->unit[i].light_spotcutoff_is_180 = 1;
    198 
    199          if (light->ConstantAttenuation != 1.0F ||
    200              light->LinearAttenuation != 0.0F ||
    201              light->QuadraticAttenuation != 0.0F)
    202             key->unit[i].light_attenuated = 1;
    203       }
    204 
    205       if (check_active_shininess(ctx, key, 0)) {
    206          key->material_shininess_is_zero = 0;
    207       }
    208       else if (key->light_twoside &&
    209                check_active_shininess(ctx, key, 1)) {
    210          key->material_shininess_is_zero = 0;
    211       }
    212       else {
    213          key->material_shininess_is_zero = 1;
    214       }
    215    }
    216 
    217    if (ctx->Transform.Normalize)
    218       key->normalize = 1;
    219 
    220    if (ctx->Transform.RescaleNormals)
    221       key->rescale_normals = 1;
    222 
    223    if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT) {
    224       key->fog_source_is_depth = 1;
    225       key->fog_distance_mode = translate_fog_distance_mode(ctx->Fog.FogDistanceMode);
    226    }
    227 
    228    if (ctx->Point._Attenuated)
    229       key->point_attenuated = 1;
    230 
    231    if (ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled)
    232       key->point_array = 1;
    233 
    234    if (ctx->Texture._TexGenEnabled ||
    235        ctx->Texture._TexMatEnabled ||
    236        ctx->Texture._MaxEnabledTexImageUnit != -1)
    237       key->texture_enabled_global = 1;
    238 
    239    mask = ctx->Texture._EnabledCoordUnits | ctx->Texture._TexGenEnabled
    240       | ctx->Texture._TexMatEnabled | ctx->Point.CoordReplace;
    241    while (mask) {
    242       const int i = u_bit_scan(&mask);
    243       struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
    244 
    245       if (texUnit->_Current)
    246 	 key->unit[i].texunit_really_enabled = 1;
    247 
    248       if (ctx->Point.PointSprite)
    249 	 if (ctx->Point.CoordReplace & (1u << i))
    250 	    key->unit[i].coord_replace = 1;
    251 
    252       if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i))
    253 	 key->unit[i].texmat_enabled = 1;
    254 
    255       if (texUnit->TexGenEnabled) {
    256 	 key->unit[i].texgen_enabled = 1;
    257 
    258 	 key->unit[i].texgen_mode0 =
    259 	    translate_texgen( texUnit->TexGenEnabled & (1<<0),
    260 			      texUnit->GenS.Mode );
    261 	 key->unit[i].texgen_mode1 =
    262 	    translate_texgen( texUnit->TexGenEnabled & (1<<1),
    263 			      texUnit->GenT.Mode );
    264 	 key->unit[i].texgen_mode2 =
    265 	    translate_texgen( texUnit->TexGenEnabled & (1<<2),
    266 			      texUnit->GenR.Mode );
    267 	 key->unit[i].texgen_mode3 =
    268 	    translate_texgen( texUnit->TexGenEnabled & (1<<3),
    269 			      texUnit->GenQ.Mode );
    270       }
    271    }
    272 }
    273 
    274 
    275 
    276 /* Very useful debugging tool - produces annotated listing of
    277  * generated program with line/function references for each
    278  * instruction back into this file:
    279  */
    280 #define DISASSEM 0
    281 
    282 
    283 /* Use uregs to represent registers internally, translate to Mesa's
    284  * expected formats on emit.
    285  *
    286  * NOTE: These are passed by value extensively in this file rather
    287  * than as usual by pointer reference.  If this disturbs you, try
    288  * remembering they are just 32bits in size.
    289  *
    290  * GCC is smart enough to deal with these dword-sized structures in
    291  * much the same way as if I had defined them as dwords and was using
    292  * macros to access and set the fields.  This is much nicer and easier
    293  * to evolve.
    294  */
    295 struct ureg {
    296    GLuint file:4;
    297    GLint idx:9;      /* relative addressing may be negative */
    298                      /* sizeof(idx) should == sizeof(prog_src_reg::Index) */
    299    GLuint negate:1;
    300    GLuint swz:12;
    301    GLuint pad:6;
    302 };
    303 
    304 
    305 struct tnl_program {
    306    const struct state_key *state;
    307    struct gl_program *program;
    308    GLuint max_inst;  /** number of instructions allocated for program */
    309    GLboolean mvp_with_dp4;
    310 
    311    GLuint temp_in_use;
    312    GLuint temp_reserved;
    313 
    314    struct ureg eye_position;
    315    struct ureg eye_position_z;
    316    struct ureg eye_position_normalized;
    317    struct ureg transformed_normal;
    318    struct ureg identity;
    319 
    320    GLuint materials;
    321    GLuint color_materials;
    322 };
    323 
    324 
    325 static const struct ureg undef = {
    326    PROGRAM_UNDEFINED,
    327    0,
    328    0,
    329    0,
    330    0
    331 };
    332 
    333 /* Local shorthand:
    334  */
    335 #define X    SWIZZLE_X
    336 #define Y    SWIZZLE_Y
    337 #define Z    SWIZZLE_Z
    338 #define W    SWIZZLE_W
    339 
    340 
    341 /* Construct a ureg:
    342  */
    343 static struct ureg make_ureg(GLuint file, GLint idx)
    344 {
    345    struct ureg reg;
    346    reg.file = file;
    347    reg.idx = idx;
    348    reg.negate = 0;
    349    reg.swz = SWIZZLE_NOOP;
    350    reg.pad = 0;
    351    return reg;
    352 }
    353 
    354 
    355 static struct ureg negate( struct ureg reg )
    356 {
    357    reg.negate ^= 1;
    358    return reg;
    359 }
    360 
    361 
    362 static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w )
    363 {
    364    reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x),
    365 			   GET_SWZ(reg.swz, y),
    366 			   GET_SWZ(reg.swz, z),
    367 			   GET_SWZ(reg.swz, w));
    368    return reg;
    369 }
    370 
    371 
    372 static struct ureg swizzle1( struct ureg reg, int x )
    373 {
    374    return swizzle(reg, x, x, x, x);
    375 }
    376 
    377 
    378 static struct ureg get_temp( struct tnl_program *p )
    379 {
    380    int bit = ffs( ~p->temp_in_use );
    381    if (!bit) {
    382       _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__);
    383       exit(1);
    384    }
    385 
    386    if ((GLuint) bit > p->program->arb.NumTemporaries)
    387       p->program->arb.NumTemporaries = bit;
    388 
    389    p->temp_in_use |= 1<<(bit-1);
    390    return make_ureg(PROGRAM_TEMPORARY, bit-1);
    391 }
    392 
    393 
    394 static struct ureg reserve_temp( struct tnl_program *p )
    395 {
    396    struct ureg temp = get_temp( p );
    397    p->temp_reserved |= 1<<temp.idx;
    398    return temp;
    399 }
    400 
    401 
    402 static void release_temp( struct tnl_program *p, struct ureg reg )
    403 {
    404    if (reg.file == PROGRAM_TEMPORARY) {
    405       p->temp_in_use &= ~(1<<reg.idx);
    406       p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */
    407    }
    408 }
    409 
    410 static void release_temps( struct tnl_program *p )
    411 {
    412    p->temp_in_use = p->temp_reserved;
    413 }
    414 
    415 
    416 static struct ureg register_param5(struct tnl_program *p,
    417 				   GLint s0,
    418 				   GLint s1,
    419 				   GLint s2,
    420 				   GLint s3,
    421                                    GLint s4)
    422 {
    423    gl_state_index tokens[STATE_LENGTH];
    424    GLint idx;
    425    tokens[0] = s0;
    426    tokens[1] = s1;
    427    tokens[2] = s2;
    428    tokens[3] = s3;
    429    tokens[4] = s4;
    430    idx = _mesa_add_state_reference(p->program->Parameters, tokens );
    431    return make_ureg(PROGRAM_STATE_VAR, idx);
    432 }
    433 
    434 
    435 #define register_param1(p,s0)          register_param5(p,s0,0,0,0,0)
    436 #define register_param2(p,s0,s1)       register_param5(p,s0,s1,0,0,0)
    437 #define register_param3(p,s0,s1,s2)    register_param5(p,s0,s1,s2,0,0)
    438 #define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0)
    439 
    440 
    441 
    442 /**
    443  * \param input  one of VERT_ATTRIB_x tokens.
    444  */
    445 static struct ureg register_input( struct tnl_program *p, GLuint input )
    446 {
    447    assert(input < VERT_ATTRIB_MAX);
    448 
    449    if (p->state->varying_vp_inputs & VERT_BIT(input)) {
    450       p->program->info.inputs_read |= VERT_BIT(input);
    451       return make_ureg(PROGRAM_INPUT, input);
    452    }
    453    else {
    454       return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, input );
    455    }
    456 }
    457 
    458 
    459 /**
    460  * \param input  one of VARYING_SLOT_x tokens.
    461  */
    462 static struct ureg register_output( struct tnl_program *p, GLuint output )
    463 {
    464    p->program->info.outputs_written |= BITFIELD64_BIT(output);
    465    return make_ureg(PROGRAM_OUTPUT, output);
    466 }
    467 
    468 
    469 static struct ureg register_const4f( struct tnl_program *p,
    470 			      GLfloat s0,
    471 			      GLfloat s1,
    472 			      GLfloat s2,
    473 			      GLfloat s3)
    474 {
    475    gl_constant_value values[4];
    476    GLint idx;
    477    GLuint swizzle;
    478    values[0].f = s0;
    479    values[1].f = s1;
    480    values[2].f = s2;
    481    values[3].f = s3;
    482    idx = _mesa_add_unnamed_constant(p->program->Parameters, values, 4,
    483                                     &swizzle );
    484    assert(swizzle == SWIZZLE_NOOP);
    485    return make_ureg(PROGRAM_CONSTANT, idx);
    486 }
    487 
    488 #define register_const1f(p, s0)         register_const4f(p, s0, 0, 0, 1)
    489 #define register_scalar_const(p, s0)    register_const4f(p, s0, s0, s0, s0)
    490 #define register_const2f(p, s0, s1)     register_const4f(p, s0, s1, 0, 1)
    491 #define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1)
    492 
    493 static GLboolean is_undef( struct ureg reg )
    494 {
    495    return reg.file == PROGRAM_UNDEFINED;
    496 }
    497 
    498 
    499 static struct ureg get_identity_param( struct tnl_program *p )
    500 {
    501    if (is_undef(p->identity))
    502       p->identity = register_const4f(p, 0,0,0,1);
    503 
    504    return p->identity;
    505 }
    506 
    507 static void register_matrix_param5( struct tnl_program *p,
    508 				    GLint s0, /* modelview, projection, etc */
    509 				    GLint s1, /* texture matrix number */
    510 				    GLint s2, /* first row */
    511 				    GLint s3, /* last row */
    512 				    GLint s4, /* inverse, transpose, etc */
    513 				    struct ureg *matrix )
    514 {
    515    GLint i;
    516 
    517    /* This is a bit sad as the support is there to pull the whole
    518     * matrix out in one go:
    519     */
    520    for (i = 0; i <= s3 - s2; i++)
    521       matrix[i] = register_param5( p, s0, s1, i, i, s4 );
    522 }
    523 
    524 
    525 static void emit_arg( struct prog_src_register *src,
    526 		      struct ureg reg )
    527 {
    528    src->File = reg.file;
    529    src->Index = reg.idx;
    530    src->Swizzle = reg.swz;
    531    src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE;
    532    src->RelAddr = 0;
    533    /* Check that bitfield sizes aren't exceeded */
    534    assert(src->Index == reg.idx);
    535 }
    536 
    537 
    538 static void emit_dst( struct prog_dst_register *dst,
    539 		      struct ureg reg, GLuint mask )
    540 {
    541    dst->File = reg.file;
    542    dst->Index = reg.idx;
    543    /* allow zero as a shorthand for xyzw */
    544    dst->WriteMask = mask ? mask : WRITEMASK_XYZW;
    545    /* Check that bitfield sizes aren't exceeded */
    546    assert(dst->Index == reg.idx);
    547 }
    548 
    549 
    550 static void debug_insn( struct prog_instruction *inst, const char *fn,
    551 			GLuint line )
    552 {
    553    if (DISASSEM) {
    554       static const char *last_fn;
    555 
    556       if (fn != last_fn) {
    557 	 last_fn = fn;
    558 	 printf("%s:\n", fn);
    559       }
    560 
    561       printf("%d:\t", line);
    562       _mesa_print_instruction(inst);
    563    }
    564 }
    565 
    566 
    567 static void emit_op3fn(struct tnl_program *p,
    568                        enum prog_opcode op,
    569 		       struct ureg dest,
    570 		       GLuint mask,
    571 		       struct ureg src0,
    572 		       struct ureg src1,
    573 		       struct ureg src2,
    574 		       const char *fn,
    575 		       GLuint line)
    576 {
    577    GLuint nr;
    578    struct prog_instruction *inst;
    579 
    580    assert(p->program->arb.NumInstructions <= p->max_inst);
    581 
    582    if (p->program->arb.NumInstructions == p->max_inst) {
    583       /* need to extend the program's instruction array */
    584       struct prog_instruction *newInst;
    585 
    586       /* double the size */
    587       p->max_inst *= 2;
    588 
    589       newInst =
    590          rzalloc_array(p->program, struct prog_instruction, p->max_inst);
    591       if (!newInst) {
    592          _mesa_error(NULL, GL_OUT_OF_MEMORY, "vertex program build");
    593          return;
    594       }
    595 
    596       _mesa_copy_instructions(newInst, p->program->arb.Instructions,
    597                               p->program->arb.NumInstructions);
    598 
    599       ralloc_free(p->program->arb.Instructions);
    600 
    601       p->program->arb.Instructions = newInst;
    602    }
    603 
    604    nr = p->program->arb.NumInstructions++;
    605 
    606    inst = &p->program->arb.Instructions[nr];
    607    inst->Opcode = (enum prog_opcode) op;
    608 
    609    emit_arg( &inst->SrcReg[0], src0 );
    610    emit_arg( &inst->SrcReg[1], src1 );
    611    emit_arg( &inst->SrcReg[2], src2 );
    612 
    613    emit_dst( &inst->DstReg, dest, mask );
    614 
    615    debug_insn(inst, fn, line);
    616 }
    617 
    618 
    619 #define emit_op3(p, op, dst, mask, src0, src1, src2) \
    620    emit_op3fn(p, op, dst, mask, src0, src1, src2, __func__, __LINE__)
    621 
    622 #define emit_op2(p, op, dst, mask, src0, src1) \
    623     emit_op3fn(p, op, dst, mask, src0, src1, undef, __func__, __LINE__)
    624 
    625 #define emit_op1(p, op, dst, mask, src0) \
    626     emit_op3fn(p, op, dst, mask, src0, undef, undef, __func__, __LINE__)
    627 
    628 
    629 static struct ureg make_temp( struct tnl_program *p, struct ureg reg )
    630 {
    631    if (reg.file == PROGRAM_TEMPORARY &&
    632        !(p->temp_reserved & (1<<reg.idx)))
    633       return reg;
    634    else {
    635       struct ureg temp = get_temp(p);
    636       emit_op1(p, OPCODE_MOV, temp, 0, reg);
    637       return temp;
    638    }
    639 }
    640 
    641 
    642 /* Currently no tracking performed of input/output/register size or
    643  * active elements.  Could be used to reduce these operations, as
    644  * could the matrix type.
    645  */
    646 static void emit_matrix_transform_vec4( struct tnl_program *p,
    647 					struct ureg dest,
    648 					const struct ureg *mat,
    649 					struct ureg src)
    650 {
    651    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]);
    652    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]);
    653    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]);
    654    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]);
    655 }
    656 
    657 
    658 /* This version is much easier to implement if writemasks are not
    659  * supported natively on the target or (like SSE), the target doesn't
    660  * have a clean/obvious dotproduct implementation.
    661  */
    662 static void emit_transpose_matrix_transform_vec4( struct tnl_program *p,
    663 						  struct ureg dest,
    664 						  const struct ureg *mat,
    665 						  struct ureg src)
    666 {
    667    struct ureg tmp;
    668 
    669    if (dest.file != PROGRAM_TEMPORARY)
    670       tmp = get_temp(p);
    671    else
    672       tmp = dest;
    673 
    674    emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]);
    675    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp);
    676    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp);
    677    emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp);
    678 
    679    if (dest.file != PROGRAM_TEMPORARY)
    680       release_temp(p, tmp);
    681 }
    682 
    683 
    684 static void emit_matrix_transform_vec3( struct tnl_program *p,
    685 					struct ureg dest,
    686 					const struct ureg *mat,
    687 					struct ureg src)
    688 {
    689    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]);
    690    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]);
    691    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]);
    692 }
    693 
    694 
    695 static void emit_normalize_vec3( struct tnl_program *p,
    696 				 struct ureg dest,
    697 				 struct ureg src )
    698 {
    699    struct ureg tmp = get_temp(p);
    700    emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src);
    701    emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp);
    702    emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X));
    703    release_temp(p, tmp);
    704 }
    705 
    706 
    707 static void emit_passthrough( struct tnl_program *p,
    708 			      GLuint input,
    709 			      GLuint output )
    710 {
    711    struct ureg out = register_output(p, output);
    712    emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input));
    713 }
    714 
    715 
    716 static struct ureg get_eye_position( struct tnl_program *p )
    717 {
    718    if (is_undef(p->eye_position)) {
    719       struct ureg pos = register_input( p, VERT_ATTRIB_POS );
    720       struct ureg modelview[4];
    721 
    722       p->eye_position = reserve_temp(p);
    723 
    724       if (p->mvp_with_dp4) {
    725 	 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
    726                                  0, modelview );
    727 
    728 	 emit_matrix_transform_vec4(p, p->eye_position, modelview, pos);
    729       }
    730       else {
    731 	 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
    732 				 STATE_MATRIX_TRANSPOSE, modelview );
    733 
    734 	 emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos);
    735       }
    736    }
    737 
    738    return p->eye_position;
    739 }
    740 
    741 
    742 static struct ureg get_eye_position_z( struct tnl_program *p )
    743 {
    744    if (!is_undef(p->eye_position))
    745       return swizzle1(p->eye_position, Z);
    746 
    747    if (is_undef(p->eye_position_z)) {
    748       struct ureg pos = register_input( p, VERT_ATTRIB_POS );
    749       struct ureg modelview[4];
    750 
    751       p->eye_position_z = reserve_temp(p);
    752 
    753       register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
    754                               0, modelview );
    755 
    756       emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]);
    757    }
    758 
    759    return p->eye_position_z;
    760 }
    761 
    762 
    763 static struct ureg get_eye_position_normalized( struct tnl_program *p )
    764 {
    765    if (is_undef(p->eye_position_normalized)) {
    766       struct ureg eye = get_eye_position(p);
    767       p->eye_position_normalized = reserve_temp(p);
    768       emit_normalize_vec3(p, p->eye_position_normalized, eye);
    769    }
    770 
    771    return p->eye_position_normalized;
    772 }
    773 
    774 
    775 static struct ureg get_transformed_normal( struct tnl_program *p )
    776 {
    777    if (is_undef(p->transformed_normal) &&
    778        !p->state->need_eye_coords &&
    779        !p->state->normalize &&
    780        !(p->state->need_eye_coords == p->state->rescale_normals))
    781    {
    782       p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL );
    783    }
    784    else if (is_undef(p->transformed_normal))
    785    {
    786       struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL );
    787       struct ureg mvinv[3];
    788       struct ureg transformed_normal = reserve_temp(p);
    789 
    790       if (p->state->need_eye_coords) {
    791          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2,
    792                                  STATE_MATRIX_INVTRANS, mvinv );
    793 
    794          /* Transform to eye space:
    795           */
    796          emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal );
    797          normal = transformed_normal;
    798       }
    799 
    800       /* Normalize/Rescale:
    801        */
    802       if (p->state->normalize) {
    803 	 emit_normalize_vec3( p, transformed_normal, normal );
    804          normal = transformed_normal;
    805       }
    806       else if (p->state->need_eye_coords == p->state->rescale_normals) {
    807          /* This is already adjusted for eye/non-eye rendering:
    808           */
    809 	 struct ureg rescale = register_param2(p, STATE_INTERNAL,
    810                                                STATE_NORMAL_SCALE);
    811 
    812 	 emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale );
    813          normal = transformed_normal;
    814       }
    815 
    816       assert(normal.file == PROGRAM_TEMPORARY);
    817       p->transformed_normal = normal;
    818    }
    819 
    820    return p->transformed_normal;
    821 }
    822 
    823 
    824 static void build_hpos( struct tnl_program *p )
    825 {
    826    struct ureg pos = register_input( p, VERT_ATTRIB_POS );
    827    struct ureg hpos = register_output( p, VARYING_SLOT_POS );
    828    struct ureg mvp[4];
    829 
    830    if (p->mvp_with_dp4) {
    831       register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
    832 			      0, mvp );
    833       emit_matrix_transform_vec4( p, hpos, mvp, pos );
    834    }
    835    else {
    836       register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
    837 			      STATE_MATRIX_TRANSPOSE, mvp );
    838       emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos );
    839    }
    840 }
    841 
    842 
    843 static GLuint material_attrib( GLuint side, GLuint property )
    844 {
    845    return (property - STATE_AMBIENT) * 2 + side;
    846 }
    847 
    848 
    849 /**
    850  * Get a bitmask of which material values vary on a per-vertex basis.
    851  */
    852 static void set_material_flags( struct tnl_program *p )
    853 {
    854    p->color_materials = 0;
    855    p->materials = 0;
    856 
    857    if (p->state->varying_vp_inputs & VERT_BIT_COLOR0) {
    858       p->materials =
    859 	 p->color_materials = p->state->light_color_material_mask;
    860    }
    861 
    862    p->materials |= (p->state->varying_vp_inputs >> VERT_ATTRIB_GENERIC0);
    863 }
    864 
    865 
    866 static struct ureg get_material( struct tnl_program *p, GLuint side,
    867 				 GLuint property )
    868 {
    869    GLuint attrib = material_attrib(side, property);
    870 
    871    if (p->color_materials & (1<<attrib))
    872       return register_input(p, VERT_ATTRIB_COLOR0);
    873    else if (p->materials & (1<<attrib)) {
    874       /* Put material values in the GENERIC slots -- they are not used
    875        * for anything in fixed function mode.
    876        */
    877       return register_input( p, attrib + VERT_ATTRIB_GENERIC0 );
    878    }
    879    else
    880       return register_param3( p, STATE_MATERIAL, side, property );
    881 }
    882 
    883 #define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \
    884 				   MAT_BIT_FRONT_AMBIENT | \
    885 				   MAT_BIT_FRONT_DIFFUSE) << (side))
    886 
    887 
    888 /**
    889  * Either return a precalculated constant value or emit code to
    890  * calculate these values dynamically in the case where material calls
    891  * are present between begin/end pairs.
    892  *
    893  * Probably want to shift this to the program compilation phase - if
    894  * we always emitted the calculation here, a smart compiler could
    895  * detect that it was constant (given a certain set of inputs), and
    896  * lift it out of the main loop.  That way the programs created here
    897  * would be independent of the vertex_buffer details.
    898  */
    899 static struct ureg get_scenecolor( struct tnl_program *p, GLuint side )
    900 {
    901    if (p->materials & SCENE_COLOR_BITS(side)) {
    902       struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT);
    903       struct ureg material_emission = get_material(p, side, STATE_EMISSION);
    904       struct ureg material_ambient = get_material(p, side, STATE_AMBIENT);
    905       struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE);
    906       struct ureg tmp = make_temp(p, material_diffuse);
    907       emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient,
    908 	       material_ambient, material_emission);
    909       return tmp;
    910    }
    911    else
    912       return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side );
    913 }
    914 
    915 
    916 static struct ureg get_lightprod( struct tnl_program *p, GLuint light,
    917 				  GLuint side, GLuint property )
    918 {
    919    GLuint attrib = material_attrib(side, property);
    920    if (p->materials & (1<<attrib)) {
    921       struct ureg light_value =
    922 	 register_param3(p, STATE_LIGHT, light, property);
    923       struct ureg material_value = get_material(p, side, property);
    924       struct ureg tmp = get_temp(p);
    925       emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value);
    926       return tmp;
    927    }
    928    else
    929       return register_param4(p, STATE_LIGHTPROD, light, side, property);
    930 }
    931 
    932 
    933 static struct ureg calculate_light_attenuation( struct tnl_program *p,
    934 						GLuint i,
    935 						struct ureg VPpli,
    936 						struct ureg dist )
    937 {
    938    struct ureg attenuation = register_param3(p, STATE_LIGHT, i,
    939 					     STATE_ATTENUATION);
    940    struct ureg att = undef;
    941 
    942    /* Calculate spot attenuation:
    943     */
    944    if (!p->state->unit[i].light_spotcutoff_is_180) {
    945       struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL,
    946 						  STATE_LIGHT_SPOT_DIR_NORMALIZED, i);
    947       struct ureg spot = get_temp(p);
    948       struct ureg slt = get_temp(p);
    949 
    950       att = get_temp(p);
    951 
    952       emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm);
    953       emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
    954       emit_op1(p, OPCODE_ABS, spot, 0, spot);
    955       emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
    956       emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
    957 
    958       release_temp(p, spot);
    959       release_temp(p, slt);
    960    }
    961 
    962    /* Calculate distance attenuation(See formula (2.4) at glspec 2.1 page 62):
    963     *
    964     * Skip the calucation when _dist_ is undefined(light_eyepos3_is_zero)
    965     */
    966    if (p->state->unit[i].light_attenuated && !is_undef(dist)) {
    967       if (is_undef(att))
    968          att = get_temp(p);
    969       /* 1/d,d,d,1/d */
    970       emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist);
    971       /* 1,d,d*d,1/d */
    972       emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y));
    973       /* 1/dist-atten */
    974       emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist);
    975 
    976       if (!p->state->unit[i].light_spotcutoff_is_180) {
    977 	 /* dist-atten */
    978 	 emit_op1(p, OPCODE_RCP, dist, 0, dist);
    979 	 /* spot-atten * dist-atten */
    980 	 emit_op2(p, OPCODE_MUL, att, 0, dist, att);
    981       }
    982       else {
    983 	 /* dist-atten */
    984 	 emit_op1(p, OPCODE_RCP, att, 0, dist);
    985       }
    986    }
    987 
    988    return att;
    989 }
    990 
    991 
    992 /**
    993  * Compute:
    994  *   lit.y = MAX(0, dots.x)
    995  *   lit.z = SLT(0, dots.x)
    996  */
    997 static void emit_degenerate_lit( struct tnl_program *p,
    998                                  struct ureg lit,
    999                                  struct ureg dots )
   1000 {
   1001    struct ureg id = get_identity_param(p);  /* id = {0,0,0,1} */
   1002 
   1003    /* Note that lit.x & lit.w will not be examined.  Note also that
   1004     * dots.xyzw == dots.xxxx.
   1005     */
   1006 
   1007    /* MAX lit, id, dots;
   1008     */
   1009    emit_op2(p, OPCODE_MAX, lit, WRITEMASK_XYZW, id, dots);
   1010 
   1011    /* result[2] = (in > 0 ? 1 : 0)
   1012     * SLT lit.z, id.z, dots;   # lit.z = (0 < dots.z) ? 1 : 0
   1013     */
   1014    emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, swizzle1(id,Z), dots);
   1015 }
   1016 
   1017 
   1018 /* Need to add some addtional parameters to allow lighting in object
   1019  * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye
   1020  * space lighting.
   1021  */
   1022 static void build_lighting( struct tnl_program *p )
   1023 {
   1024    const GLboolean twoside = p->state->light_twoside;
   1025    const GLboolean separate = p->state->separate_specular;
   1026    GLuint nr_lights = 0, count = 0;
   1027    struct ureg normal = get_transformed_normal(p);
   1028    struct ureg lit = get_temp(p);
   1029    struct ureg dots = get_temp(p);
   1030    struct ureg _col0 = undef, _col1 = undef;
   1031    struct ureg _bfc0 = undef, _bfc1 = undef;
   1032    GLuint i;
   1033 
   1034    /*
   1035     * NOTE:
   1036     * dots.x = dot(normal, VPpli)
   1037     * dots.y = dot(normal, halfAngle)
   1038     * dots.z = back.shininess
   1039     * dots.w = front.shininess
   1040     */
   1041 
   1042    for (i = 0; i < MAX_LIGHTS; i++)
   1043       if (p->state->unit[i].light_enabled)
   1044 	 nr_lights++;
   1045 
   1046    set_material_flags(p);
   1047 
   1048    {
   1049       if (!p->state->material_shininess_is_zero) {
   1050          struct ureg shininess = get_material(p, 0, STATE_SHININESS);
   1051          emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X));
   1052          release_temp(p, shininess);
   1053       }
   1054 
   1055       _col0 = make_temp(p, get_scenecolor(p, 0));
   1056       if (separate)
   1057 	 _col1 = make_temp(p, get_identity_param(p));
   1058       else
   1059 	 _col1 = _col0;
   1060    }
   1061 
   1062    if (twoside) {
   1063       if (!p->state->material_shininess_is_zero) {
   1064          /* Note that we negate the back-face specular exponent here.
   1065           * The negation will be un-done later in the back-face code below.
   1066           */
   1067          struct ureg shininess = get_material(p, 1, STATE_SHININESS);
   1068          emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z,
   1069                   negate(swizzle1(shininess,X)));
   1070          release_temp(p, shininess);
   1071       }
   1072 
   1073       _bfc0 = make_temp(p, get_scenecolor(p, 1));
   1074       if (separate)
   1075 	 _bfc1 = make_temp(p, get_identity_param(p));
   1076       else
   1077 	 _bfc1 = _bfc0;
   1078    }
   1079 
   1080    /* If no lights, still need to emit the scenecolor.
   1081     */
   1082    {
   1083       struct ureg res0 = register_output( p, VARYING_SLOT_COL0 );
   1084       emit_op1(p, OPCODE_MOV, res0, 0, _col0);
   1085    }
   1086 
   1087    if (separate) {
   1088       struct ureg res1 = register_output( p, VARYING_SLOT_COL1 );
   1089       emit_op1(p, OPCODE_MOV, res1, 0, _col1);
   1090    }
   1091 
   1092    if (twoside) {
   1093       struct ureg res0 = register_output( p, VARYING_SLOT_BFC0 );
   1094       emit_op1(p, OPCODE_MOV, res0, 0, _bfc0);
   1095    }
   1096 
   1097    if (twoside && separate) {
   1098       struct ureg res1 = register_output( p, VARYING_SLOT_BFC1 );
   1099       emit_op1(p, OPCODE_MOV, res1, 0, _bfc1);
   1100    }
   1101 
   1102    if (nr_lights == 0) {
   1103       release_temps(p);
   1104       return;
   1105    }
   1106 
   1107    for (i = 0; i < MAX_LIGHTS; i++) {
   1108       if (p->state->unit[i].light_enabled) {
   1109 	 struct ureg half = undef;
   1110 	 struct ureg att = undef, VPpli = undef;
   1111 	 struct ureg dist = undef;
   1112 
   1113 	 count++;
   1114          if (p->state->unit[i].light_eyepos3_is_zero) {
   1115              VPpli = register_param3(p, STATE_INTERNAL,
   1116                                      STATE_LIGHT_POSITION_NORMALIZED, i);
   1117          } else {
   1118             struct ureg Ppli = register_param3(p, STATE_INTERNAL,
   1119                                                STATE_LIGHT_POSITION, i);
   1120             struct ureg V = get_eye_position(p);
   1121 
   1122             VPpli = get_temp(p);
   1123             dist = get_temp(p);
   1124 
   1125             /* Calculate VPpli vector
   1126              */
   1127             emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V);
   1128 
   1129             /* Normalize VPpli.  The dist value also used in
   1130              * attenuation below.
   1131              */
   1132             emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli);
   1133             emit_op1(p, OPCODE_RSQ, dist, 0, dist);
   1134             emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist);
   1135          }
   1136 
   1137          /* Calculate attenuation:
   1138           */
   1139          att = calculate_light_attenuation(p, i, VPpli, dist);
   1140          release_temp(p, dist);
   1141 
   1142 	 /* Calculate viewer direction, or use infinite viewer:
   1143 	  */
   1144          if (!p->state->material_shininess_is_zero) {
   1145             if (p->state->light_local_viewer) {
   1146                struct ureg eye_hat = get_eye_position_normalized(p);
   1147                half = get_temp(p);
   1148                emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
   1149                emit_normalize_vec3(p, half, half);
   1150             } else if (p->state->unit[i].light_eyepos3_is_zero) {
   1151                half = register_param3(p, STATE_INTERNAL,
   1152                                       STATE_LIGHT_HALF_VECTOR, i);
   1153             } else {
   1154                struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z);
   1155                half = get_temp(p);
   1156                emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir);
   1157                emit_normalize_vec3(p, half, half);
   1158             }
   1159 	 }
   1160 
   1161 	 /* Calculate dot products:
   1162 	  */
   1163          if (p->state->material_shininess_is_zero) {
   1164             emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli);
   1165          }
   1166          else {
   1167             emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
   1168             emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
   1169          }
   1170 
   1171 	 /* Front face lighting:
   1172 	  */
   1173 	 {
   1174 	    struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT);
   1175 	    struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE);
   1176 	    struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR);
   1177 	    struct ureg res0, res1;
   1178 	    GLuint mask0, mask1;
   1179 
   1180 	    if (count == nr_lights) {
   1181 	       if (separate) {
   1182 		  mask0 = WRITEMASK_XYZ;
   1183 		  mask1 = WRITEMASK_XYZ;
   1184 		  res0 = register_output( p, VARYING_SLOT_COL0 );
   1185 		  res1 = register_output( p, VARYING_SLOT_COL1 );
   1186 	       }
   1187 	       else {
   1188 		  mask0 = 0;
   1189 		  mask1 = WRITEMASK_XYZ;
   1190 		  res0 = _col0;
   1191 		  res1 = register_output( p, VARYING_SLOT_COL0 );
   1192 	       }
   1193 	    }
   1194             else {
   1195 	       mask0 = 0;
   1196 	       mask1 = 0;
   1197 	       res0 = _col0;
   1198 	       res1 = _col1;
   1199 	    }
   1200 
   1201 	    if (!is_undef(att)) {
   1202                /* light is attenuated by distance */
   1203                emit_op1(p, OPCODE_LIT, lit, 0, dots);
   1204                emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
   1205                emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0);
   1206             }
   1207             else if (!p->state->material_shininess_is_zero) {
   1208                /* there's a non-zero specular term */
   1209                emit_op1(p, OPCODE_LIT, lit, 0, dots);
   1210                emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
   1211             }
   1212             else {
   1213                /* no attenutation, no specular */
   1214                emit_degenerate_lit(p, lit, dots);
   1215                emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
   1216             }
   1217 
   1218 	    emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0);
   1219 	    emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1);
   1220 
   1221 	    release_temp(p, ambient);
   1222 	    release_temp(p, diffuse);
   1223 	    release_temp(p, specular);
   1224 	 }
   1225 
   1226 	 /* Back face lighting:
   1227 	  */
   1228 	 if (twoside) {
   1229 	    struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT);
   1230 	    struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE);
   1231 	    struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR);
   1232 	    struct ureg res0, res1;
   1233 	    GLuint mask0, mask1;
   1234 
   1235 	    if (count == nr_lights) {
   1236 	       if (separate) {
   1237 		  mask0 = WRITEMASK_XYZ;
   1238 		  mask1 = WRITEMASK_XYZ;
   1239 		  res0 = register_output( p, VARYING_SLOT_BFC0 );
   1240 		  res1 = register_output( p, VARYING_SLOT_BFC1 );
   1241 	       }
   1242 	       else {
   1243 		  mask0 = 0;
   1244 		  mask1 = WRITEMASK_XYZ;
   1245 		  res0 = _bfc0;
   1246 		  res1 = register_output( p, VARYING_SLOT_BFC0 );
   1247 	       }
   1248 	    }
   1249             else {
   1250 	       res0 = _bfc0;
   1251 	       res1 = _bfc1;
   1252 	       mask0 = 0;
   1253 	       mask1 = 0;
   1254 	    }
   1255 
   1256             /* For the back face we need to negate the X and Y component
   1257              * dot products.  dots.Z has the negated back-face specular
   1258              * exponent.  We swizzle that into the W position.  This
   1259              * negation makes the back-face specular term positive again.
   1260              */
   1261             dots = negate(swizzle(dots,X,Y,W,Z));
   1262 
   1263 	    if (!is_undef(att)) {
   1264                emit_op1(p, OPCODE_LIT, lit, 0, dots);
   1265 	       emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
   1266                emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0);
   1267             }
   1268             else if (!p->state->material_shininess_is_zero) {
   1269                emit_op1(p, OPCODE_LIT, lit, 0, dots);
   1270                emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); /**/
   1271             }
   1272             else {
   1273                emit_degenerate_lit(p, lit, dots);
   1274                emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0);
   1275             }
   1276 
   1277 	    emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0);
   1278 	    emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1);
   1279             /* restore dots to its original state for subsequent lights
   1280              * by negating and swizzling again.
   1281              */
   1282             dots = negate(swizzle(dots,X,Y,W,Z));
   1283 
   1284 	    release_temp(p, ambient);
   1285 	    release_temp(p, diffuse);
   1286 	    release_temp(p, specular);
   1287 	 }
   1288 
   1289 	 release_temp(p, half);
   1290 	 release_temp(p, VPpli);
   1291 	 release_temp(p, att);
   1292       }
   1293    }
   1294 
   1295    release_temps( p );
   1296 }
   1297 
   1298 
   1299 static void build_fog( struct tnl_program *p )
   1300 {
   1301    struct ureg fog = register_output(p, VARYING_SLOT_FOGC);
   1302    struct ureg input;
   1303 
   1304    if (p->state->fog_source_is_depth) {
   1305 
   1306       switch (p->state->fog_distance_mode) {
   1307       case FDM_EYE_RADIAL: /* Z = sqrt(Xe*Xe + Ye*Ye + Ze*Ze) */
   1308          input = get_eye_position(p);
   1309          emit_op2(p, OPCODE_DP3, fog, WRITEMASK_X, input, input);
   1310          emit_op1(p, OPCODE_RSQ, fog, WRITEMASK_X, fog);
   1311          emit_op1(p, OPCODE_RCP, fog, WRITEMASK_X, fog);
   1312          break;
   1313       case FDM_EYE_PLANE: /* Z = Ze */
   1314          input = get_eye_position_z(p);
   1315          emit_op1(p, OPCODE_MOV, fog, WRITEMASK_X, input);
   1316          break;
   1317       case FDM_EYE_PLANE_ABS: /* Z = abs(Ze) */
   1318          input = get_eye_position_z(p);
   1319          emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input);
   1320          break;
   1321       default:
   1322          assert(!"Bad fog mode in build_fog()");
   1323          break;
   1324       }
   1325 
   1326    }
   1327    else {
   1328       input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X);
   1329       emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input);
   1330    }
   1331 
   1332    emit_op1(p, OPCODE_MOV, fog, WRITEMASK_YZW, get_identity_param(p));
   1333 }
   1334 
   1335 
   1336 static void build_reflect_texgen( struct tnl_program *p,
   1337 				  struct ureg dest,
   1338 				  GLuint writemask )
   1339 {
   1340    struct ureg normal = get_transformed_normal(p);
   1341    struct ureg eye_hat = get_eye_position_normalized(p);
   1342    struct ureg tmp = get_temp(p);
   1343 
   1344    /* n.u */
   1345    emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
   1346    /* 2n.u */
   1347    emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
   1348    /* (-2n.u)n + u */
   1349    emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat);
   1350 
   1351    release_temp(p, tmp);
   1352 }
   1353 
   1354 
   1355 static void build_sphere_texgen( struct tnl_program *p,
   1356 				 struct ureg dest,
   1357 				 GLuint writemask )
   1358 {
   1359    struct ureg normal = get_transformed_normal(p);
   1360    struct ureg eye_hat = get_eye_position_normalized(p);
   1361    struct ureg tmp = get_temp(p);
   1362    struct ureg half = register_scalar_const(p, .5);
   1363    struct ureg r = get_temp(p);
   1364    struct ureg inv_m = get_temp(p);
   1365    struct ureg id = get_identity_param(p);
   1366 
   1367    /* Could share the above calculations, but it would be
   1368     * a fairly odd state for someone to set (both sphere and
   1369     * reflection active for different texture coordinate
   1370     * components.  Of course - if two texture units enable
   1371     * reflect and/or sphere, things start to tilt in favour
   1372     * of seperating this out:
   1373     */
   1374 
   1375    /* n.u */
   1376    emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
   1377    /* 2n.u */
   1378    emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
   1379    /* (-2n.u)n + u */
   1380    emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat);
   1381    /* r + 0,0,1 */
   1382    emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z));
   1383    /* rx^2 + ry^2 + (rz+1)^2 */
   1384    emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp);
   1385    /* 2/m */
   1386    emit_op1(p, OPCODE_RSQ, tmp, 0, tmp);
   1387    /* 1/m */
   1388    emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half);
   1389    /* r/m + 1/2 */
   1390    emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half);
   1391 
   1392    release_temp(p, tmp);
   1393    release_temp(p, r);
   1394    release_temp(p, inv_m);
   1395 }
   1396 
   1397 
   1398 static void build_texture_transform( struct tnl_program *p )
   1399 {
   1400    GLuint i, j;
   1401 
   1402    for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) {
   1403 
   1404       if (!(p->state->fragprog_inputs_read & VARYING_BIT_TEX(i)))
   1405 	 continue;
   1406 
   1407       if (p->state->unit[i].coord_replace)
   1408   	 continue;
   1409 
   1410       if (p->state->unit[i].texgen_enabled ||
   1411 	  p->state->unit[i].texmat_enabled) {
   1412 
   1413 	 GLuint texmat_enabled = p->state->unit[i].texmat_enabled;
   1414 	 struct ureg out = register_output(p, VARYING_SLOT_TEX0 + i);
   1415 	 struct ureg out_texgen = undef;
   1416 
   1417 	 if (p->state->unit[i].texgen_enabled) {
   1418 	    GLuint copy_mask = 0;
   1419 	    GLuint sphere_mask = 0;
   1420 	    GLuint reflect_mask = 0;
   1421 	    GLuint normal_mask = 0;
   1422 	    GLuint modes[4];
   1423 
   1424 	    if (texmat_enabled)
   1425 	       out_texgen = get_temp(p);
   1426 	    else
   1427 	       out_texgen = out;
   1428 
   1429 	    modes[0] = p->state->unit[i].texgen_mode0;
   1430 	    modes[1] = p->state->unit[i].texgen_mode1;
   1431 	    modes[2] = p->state->unit[i].texgen_mode2;
   1432 	    modes[3] = p->state->unit[i].texgen_mode3;
   1433 
   1434 	    for (j = 0; j < 4; j++) {
   1435 	       switch (modes[j]) {
   1436 	       case TXG_OBJ_LINEAR: {
   1437 		  struct ureg obj = register_input(p, VERT_ATTRIB_POS);
   1438 		  struct ureg plane =
   1439 		     register_param3(p, STATE_TEXGEN, i,
   1440 				     STATE_TEXGEN_OBJECT_S + j);
   1441 
   1442 		  emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
   1443 			   obj, plane );
   1444 		  break;
   1445 	       }
   1446 	       case TXG_EYE_LINEAR: {
   1447 		  struct ureg eye = get_eye_position(p);
   1448 		  struct ureg plane =
   1449 		     register_param3(p, STATE_TEXGEN, i,
   1450 				     STATE_TEXGEN_EYE_S + j);
   1451 
   1452 		  emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
   1453 			   eye, plane );
   1454 		  break;
   1455 	       }
   1456 	       case TXG_SPHERE_MAP:
   1457 		  sphere_mask |= WRITEMASK_X << j;
   1458 		  break;
   1459 	       case TXG_REFLECTION_MAP:
   1460 		  reflect_mask |= WRITEMASK_X << j;
   1461 		  break;
   1462 	       case TXG_NORMAL_MAP:
   1463 		  normal_mask |= WRITEMASK_X << j;
   1464 		  break;
   1465 	       case TXG_NONE:
   1466 		  copy_mask |= WRITEMASK_X << j;
   1467 	       }
   1468 	    }
   1469 
   1470 	    if (sphere_mask) {
   1471 	       build_sphere_texgen(p, out_texgen, sphere_mask);
   1472 	    }
   1473 
   1474 	    if (reflect_mask) {
   1475 	       build_reflect_texgen(p, out_texgen, reflect_mask);
   1476 	    }
   1477 
   1478 	    if (normal_mask) {
   1479 	       struct ureg normal = get_transformed_normal(p);
   1480 	       emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal );
   1481 	    }
   1482 
   1483 	    if (copy_mask) {
   1484 	       struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i);
   1485 	       emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in );
   1486 	    }
   1487 	 }
   1488 
   1489 	 if (texmat_enabled) {
   1490 	    struct ureg texmat[4];
   1491 	    struct ureg in = (!is_undef(out_texgen) ?
   1492 			      out_texgen :
   1493 			      register_input(p, VERT_ATTRIB_TEX0+i));
   1494 	    if (p->mvp_with_dp4) {
   1495 	       register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
   1496 				       0, texmat );
   1497 	       emit_matrix_transform_vec4( p, out, texmat, in );
   1498 	    }
   1499 	    else {
   1500 	       register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
   1501 				       STATE_MATRIX_TRANSPOSE, texmat );
   1502 	       emit_transpose_matrix_transform_vec4( p, out, texmat, in );
   1503 	    }
   1504 	 }
   1505 
   1506 	 release_temps(p);
   1507       }
   1508       else {
   1509 	 emit_passthrough(p, VERT_ATTRIB_TEX0+i, VARYING_SLOT_TEX0+i);
   1510       }
   1511    }
   1512 }
   1513 
   1514 
   1515 /**
   1516  * Point size attenuation computation.
   1517  */
   1518 static void build_atten_pointsize( struct tnl_program *p )
   1519 {
   1520    struct ureg eye = get_eye_position_z(p);
   1521    struct ureg state_size = register_param2(p, STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED);
   1522    struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION);
   1523    struct ureg out = register_output(p, VARYING_SLOT_PSIZ);
   1524    struct ureg ut = get_temp(p);
   1525 
   1526    /* dist = |eyez| */
   1527    emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z));
   1528    /* p1 + dist * (p2 + dist * p3); */
   1529    emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
   1530 		swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y));
   1531    emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
   1532 		ut, swizzle1(state_attenuation, X));
   1533 
   1534    /* 1 / sqrt(factor) */
   1535    emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut );
   1536 
   1537 #if 0
   1538    /* out = pointSize / sqrt(factor) */
   1539    emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size);
   1540 #else
   1541    /* this is a good place to clamp the point size since there's likely
   1542     * no hardware registers to clamp point size at rasterization time.
   1543     */
   1544    emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size);
   1545    emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y));
   1546    emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z));
   1547 #endif
   1548 
   1549    release_temp(p, ut);
   1550 }
   1551 
   1552 
   1553 /**
   1554  * Pass-though per-vertex point size, from user's point size array.
   1555  */
   1556 static void build_array_pointsize( struct tnl_program *p )
   1557 {
   1558    struct ureg in = register_input(p, VERT_ATTRIB_POINT_SIZE);
   1559    struct ureg out = register_output(p, VARYING_SLOT_PSIZ);
   1560    emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, in);
   1561 }
   1562 
   1563 
   1564 static void build_tnl_program( struct tnl_program *p )
   1565 {
   1566    /* Emit the program, starting with the modelview, projection transforms:
   1567     */
   1568    build_hpos(p);
   1569 
   1570    /* Lighting calculations:
   1571     */
   1572    if (p->state->fragprog_inputs_read & (VARYING_BIT_COL0|VARYING_BIT_COL1)) {
   1573       if (p->state->light_global_enabled)
   1574 	 build_lighting(p);
   1575       else {
   1576 	 if (p->state->fragprog_inputs_read & VARYING_BIT_COL0)
   1577 	    emit_passthrough(p, VERT_ATTRIB_COLOR0, VARYING_SLOT_COL0);
   1578 
   1579 	 if (p->state->fragprog_inputs_read & VARYING_BIT_COL1)
   1580 	    emit_passthrough(p, VERT_ATTRIB_COLOR1, VARYING_SLOT_COL1);
   1581       }
   1582    }
   1583 
   1584    if (p->state->fragprog_inputs_read & VARYING_BIT_FOGC)
   1585       build_fog(p);
   1586 
   1587    if (p->state->fragprog_inputs_read & VARYING_BITS_TEX_ANY)
   1588       build_texture_transform(p);
   1589 
   1590    if (p->state->point_attenuated)
   1591       build_atten_pointsize(p);
   1592    else if (p->state->point_array)
   1593       build_array_pointsize(p);
   1594 
   1595    /* Finish up:
   1596     */
   1597    emit_op1(p, OPCODE_END, undef, 0, undef);
   1598 
   1599    /* Disassemble:
   1600     */
   1601    if (DISASSEM) {
   1602       printf ("\n");
   1603    }
   1604 }
   1605 
   1606 
   1607 static void
   1608 create_new_program( const struct state_key *key,
   1609                     struct gl_program *program,
   1610                     GLboolean mvp_with_dp4,
   1611                     GLuint max_temps)
   1612 {
   1613    struct tnl_program p;
   1614 
   1615    memset(&p, 0, sizeof(p));
   1616    p.state = key;
   1617    p.program = program;
   1618    p.eye_position = undef;
   1619    p.eye_position_z = undef;
   1620    p.eye_position_normalized = undef;
   1621    p.transformed_normal = undef;
   1622    p.identity = undef;
   1623    p.temp_in_use = 0;
   1624    p.mvp_with_dp4 = mvp_with_dp4;
   1625 
   1626    if (max_temps >= sizeof(int) * 8)
   1627       p.temp_reserved = 0;
   1628    else
   1629       p.temp_reserved = ~((1<<max_temps)-1);
   1630 
   1631    /* Start by allocating 32 instructions.
   1632     * If we need more, we'll grow the instruction array as needed.
   1633     */
   1634    p.max_inst = 32;
   1635    p.program->arb.Instructions =
   1636       rzalloc_array(program, struct prog_instruction, p.max_inst);
   1637    p.program->String = NULL;
   1638    p.program->arb.NumInstructions =
   1639    p.program->arb.NumTemporaries =
   1640    p.program->arb.NumParameters =
   1641    p.program->arb.NumAttributes = p.program->arb.NumAddressRegs = 0;
   1642    p.program->Parameters = _mesa_new_parameter_list();
   1643    p.program->info.inputs_read = 0;
   1644    p.program->info.outputs_written = 0;
   1645 
   1646    build_tnl_program( &p );
   1647 }
   1648 
   1649 
   1650 /**
   1651  * Return a vertex program which implements the current fixed-function
   1652  * transform/lighting/texgen operations.
   1653  */
   1654 struct gl_program *
   1655 _mesa_get_fixed_func_vertex_program(struct gl_context *ctx)
   1656 {
   1657    struct gl_program *prog;
   1658    struct state_key key;
   1659 
   1660    /* Grab all the relevant state and put it in a single structure:
   1661     */
   1662    make_state_key(ctx, &key);
   1663 
   1664    /* Look for an already-prepared program for this state:
   1665     */
   1666    prog = _mesa_search_program_cache(ctx->VertexProgram.Cache, &key,
   1667                                      sizeof(key));
   1668 
   1669    if (!prog) {
   1670       /* OK, we'll have to build a new one */
   1671       if (0)
   1672          printf("Build new TNL program\n");
   1673 
   1674       prog = ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0, true);
   1675       if (!prog)
   1676          return NULL;
   1677 
   1678       create_new_program( &key, prog,
   1679                           ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS,
   1680                           ctx->Const.Program[MESA_SHADER_VERTEX].MaxTemps );
   1681 
   1682       if (ctx->Driver.ProgramStringNotify)
   1683          ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, prog);
   1684 
   1685       _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache, &key,
   1686                                  sizeof(key), prog);
   1687    }
   1688 
   1689    return prog;
   1690 }
   1691