Home | History | Annotate | Download | only in i915
      1 /**************************************************************************
      2  *
      3  * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 #include "main/glheader.h"
     29 #include "main/macros.h"
     30 #include "main/enums.h"
     31 
     32 #include "program/prog_instruction.h"
     33 #include "program/prog_parameter.h"
     34 #include "program/program.h"
     35 #include "program/programopt.h"
     36 #include "program/prog_print.h"
     37 
     38 #include "tnl/tnl.h"
     39 #include "tnl/t_context.h"
     40 
     41 #include "intel_batchbuffer.h"
     42 
     43 #include "i915_reg.h"
     44 #include "i915_context.h"
     45 #include "i915_program.h"
     46 
     47 static const GLfloat sin_quad_constants[2][4] = {
     48    {
     49       2.0,
     50       -1.0,
     51       .5,
     52       .75
     53    },
     54    {
     55       4.0,
     56       -4.0,
     57       1.0 / (2.0 * M_PI),
     58       .2225
     59    }
     60 };
     61 
     62 static const GLfloat sin_constants[4] = { 1.0,
     63    -1.0 / (3 * 2 * 1),
     64    1.0 / (5 * 4 * 3 * 2 * 1),
     65    -1.0 / (7 * 6 * 5 * 4 * 3 * 2 * 1)
     66 };
     67 
     68 /* 1, -1/2!, 1/4!, -1/6! */
     69 static const GLfloat cos_constants[4] = { 1.0,
     70    -1.0 / (2 * 1),
     71    1.0 / (4 * 3 * 2 * 1),
     72    -1.0 / (6 * 5 * 4 * 3 * 2 * 1)
     73 };
     74 
     75 /**
     76  * Retrieve a ureg for the given source register.  Will emit
     77  * constants, apply swizzling and negation as needed.
     78  */
     79 static GLuint
     80 src_vector(struct i915_fragment_program *p,
     81            const struct prog_src_register *source,
     82            const struct gl_fragment_program *program)
     83 {
     84    GLuint src;
     85 
     86    switch (source->File) {
     87 
     88       /* Registers:
     89        */
     90    case PROGRAM_TEMPORARY:
     91       if (source->Index >= I915_MAX_TEMPORARY) {
     92          i915_program_error(p, "Exceeded max temporary reg: %d/%d",
     93 			    source->Index, I915_MAX_TEMPORARY);
     94          return 0;
     95       }
     96       src = UREG(REG_TYPE_R, source->Index);
     97       break;
     98    case PROGRAM_INPUT:
     99       switch (source->Index) {
    100       case FRAG_ATTRIB_WPOS:
    101          src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL);
    102          break;
    103       case FRAG_ATTRIB_COL0:
    104          src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
    105          break;
    106       case FRAG_ATTRIB_COL1:
    107          src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
    108          src = swizzle(src, X, Y, Z, ONE);
    109          break;
    110       case FRAG_ATTRIB_FOGC:
    111          src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
    112          src = swizzle(src, W, ZERO, ZERO, ONE);
    113          break;
    114       case FRAG_ATTRIB_TEX0:
    115       case FRAG_ATTRIB_TEX1:
    116       case FRAG_ATTRIB_TEX2:
    117       case FRAG_ATTRIB_TEX3:
    118       case FRAG_ATTRIB_TEX4:
    119       case FRAG_ATTRIB_TEX5:
    120       case FRAG_ATTRIB_TEX6:
    121       case FRAG_ATTRIB_TEX7:
    122          src = i915_emit_decl(p, REG_TYPE_T,
    123                               T_TEX0 + (source->Index - FRAG_ATTRIB_TEX0),
    124                               D0_CHANNEL_ALL);
    125 	 break;
    126 
    127       case FRAG_ATTRIB_VAR0:
    128       case FRAG_ATTRIB_VAR0 + 1:
    129       case FRAG_ATTRIB_VAR0 + 2:
    130       case FRAG_ATTRIB_VAR0 + 3:
    131       case FRAG_ATTRIB_VAR0 + 4:
    132       case FRAG_ATTRIB_VAR0 + 5:
    133       case FRAG_ATTRIB_VAR0 + 6:
    134       case FRAG_ATTRIB_VAR0 + 7:
    135          src = i915_emit_decl(p, REG_TYPE_T,
    136                               T_TEX0 + (source->Index - FRAG_ATTRIB_VAR0),
    137                               D0_CHANNEL_ALL);
    138          break;
    139 
    140       default:
    141          i915_program_error(p, "Bad source->Index: %d", source->Index);
    142          return 0;
    143       }
    144       break;
    145 
    146    case PROGRAM_OUTPUT:
    147       switch (source->Index) {
    148       case FRAG_RESULT_COLOR:
    149 	 src = UREG(REG_TYPE_OC, 0);
    150 	 break;
    151       case FRAG_RESULT_DEPTH:
    152 	 src = UREG(REG_TYPE_OD, 0);
    153 	 break;
    154       default:
    155 	 i915_program_error(p, "Bad source->Index: %d", source->Index);
    156 	 return 0;
    157       }
    158       break;
    159 
    160       /* Various paramters and env values.  All emitted to
    161        * hardware as program constants.
    162        */
    163    case PROGRAM_LOCAL_PARAM:
    164       src = i915_emit_param4fv(p, program->Base.LocalParams[source->Index]);
    165       break;
    166 
    167    case PROGRAM_ENV_PARAM:
    168       src =
    169          i915_emit_param4fv(p,
    170                             p->ctx->FragmentProgram.Parameters[source->
    171                                                                Index]);
    172       break;
    173 
    174    case PROGRAM_CONSTANT:
    175    case PROGRAM_STATE_VAR:
    176    case PROGRAM_NAMED_PARAM:
    177    case PROGRAM_UNIFORM:
    178       src = i915_emit_param4fv(p,
    179 	 &program->Base.Parameters->ParameterValues[source->Index][0].f);
    180       break;
    181 
    182    default:
    183       i915_program_error(p, "Bad source->File: %d", source->File);
    184       return 0;
    185    }
    186 
    187    src = swizzle(src,
    188                  GET_SWZ(source->Swizzle, 0),
    189                  GET_SWZ(source->Swizzle, 1),
    190                  GET_SWZ(source->Swizzle, 2), GET_SWZ(source->Swizzle, 3));
    191 
    192    if (source->Negate)
    193       src = negate(src,
    194                    GET_BIT(source->Negate, 0),
    195                    GET_BIT(source->Negate, 1),
    196                    GET_BIT(source->Negate, 2),
    197                    GET_BIT(source->Negate, 3));
    198 
    199    return src;
    200 }
    201 
    202 
    203 static GLuint
    204 get_result_vector(struct i915_fragment_program *p,
    205                   const struct prog_instruction *inst)
    206 {
    207    switch (inst->DstReg.File) {
    208    case PROGRAM_OUTPUT:
    209       switch (inst->DstReg.Index) {
    210       case FRAG_RESULT_COLOR:
    211       case FRAG_RESULT_DATA0:
    212          return UREG(REG_TYPE_OC, 0);
    213       case FRAG_RESULT_DEPTH:
    214          p->depth_written = 1;
    215          return UREG(REG_TYPE_OD, 0);
    216       default:
    217          i915_program_error(p, "Bad inst->DstReg.Index: %d",
    218 			    inst->DstReg.Index);
    219          return 0;
    220       }
    221    case PROGRAM_TEMPORARY:
    222       return UREG(REG_TYPE_R, inst->DstReg.Index);
    223    default:
    224       i915_program_error(p, "Bad inst->DstReg.File: %d", inst->DstReg.File);
    225       return 0;
    226    }
    227 }
    228 
    229 static GLuint
    230 get_result_flags(const struct prog_instruction *inst)
    231 {
    232    GLuint flags = 0;
    233 
    234    if (inst->SaturateMode == SATURATE_ZERO_ONE)
    235       flags |= A0_DEST_SATURATE;
    236    if (inst->DstReg.WriteMask & WRITEMASK_X)
    237       flags |= A0_DEST_CHANNEL_X;
    238    if (inst->DstReg.WriteMask & WRITEMASK_Y)
    239       flags |= A0_DEST_CHANNEL_Y;
    240    if (inst->DstReg.WriteMask & WRITEMASK_Z)
    241       flags |= A0_DEST_CHANNEL_Z;
    242    if (inst->DstReg.WriteMask & WRITEMASK_W)
    243       flags |= A0_DEST_CHANNEL_W;
    244 
    245    return flags;
    246 }
    247 
    248 static GLuint
    249 translate_tex_src_target(struct i915_fragment_program *p, GLubyte bit)
    250 {
    251    switch (bit) {
    252    case TEXTURE_1D_INDEX:
    253       return D0_SAMPLE_TYPE_2D;
    254    case TEXTURE_2D_INDEX:
    255       return D0_SAMPLE_TYPE_2D;
    256    case TEXTURE_RECT_INDEX:
    257       return D0_SAMPLE_TYPE_2D;
    258    case TEXTURE_3D_INDEX:
    259       return D0_SAMPLE_TYPE_VOLUME;
    260    case TEXTURE_CUBE_INDEX:
    261       return D0_SAMPLE_TYPE_CUBE;
    262    default:
    263       i915_program_error(p, "TexSrcBit: %d", bit);
    264       return 0;
    265    }
    266 }
    267 
    268 #define EMIT_TEX( OP )						\
    269 do {								\
    270    GLuint dim = translate_tex_src_target( p, inst->TexSrcTarget );	\
    271    const struct gl_fragment_program *program = &p->FragProg;	\
    272    GLuint unit = program->Base.SamplerUnits[inst->TexSrcUnit];	\
    273    GLuint sampler = i915_emit_decl(p, REG_TYPE_S,		\
    274 				   unit, dim);			\
    275    GLuint coord = src_vector( p, &inst->SrcReg[0], program);	\
    276    /* Texel lookup */						\
    277 								\
    278    i915_emit_texld( p, get_live_regs(p, inst),						\
    279 	       get_result_vector( p, inst ),			\
    280 	       get_result_flags( inst ),			\
    281 	       sampler,						\
    282 	       coord,						\
    283 	       OP);						\
    284 } while (0)
    285 
    286 #define EMIT_ARITH( OP, N )						\
    287 do {									\
    288    i915_emit_arith( p,							\
    289 	       OP,							\
    290 	       get_result_vector( p, inst ), 				\
    291 	       get_result_flags( inst ), 0,			\
    292 	       (N<1)?0:src_vector( p, &inst->SrcReg[0], program),	\
    293 	       (N<2)?0:src_vector( p, &inst->SrcReg[1], program),	\
    294 	       (N<3)?0:src_vector( p, &inst->SrcReg[2], program));	\
    295 } while (0)
    296 
    297 #define EMIT_1ARG_ARITH( OP ) EMIT_ARITH( OP, 1 )
    298 #define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 )
    299 #define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 )
    300 
    301 /*
    302  * TODO: consider moving this into core
    303  */
    304 static bool calc_live_regs( struct i915_fragment_program *p )
    305 {
    306     const struct gl_fragment_program *program = &p->FragProg;
    307     GLuint regsUsed = ~((1 << I915_MAX_TEMPORARY) - 1);
    308     uint8_t live_components[I915_MAX_TEMPORARY] = { 0, };
    309     GLint i;
    310 
    311     for (i = program->Base.NumInstructions - 1; i >= 0; i--) {
    312         struct prog_instruction *inst = &program->Base.Instructions[i];
    313         int opArgs = _mesa_num_inst_src_regs(inst->Opcode);
    314         int a;
    315 
    316         /* Register is written to: unmark as live for this and preceeding ops */
    317         if (inst->DstReg.File == PROGRAM_TEMPORARY) {
    318 	    if (inst->DstReg.Index >= I915_MAX_TEMPORARY)
    319 	       return false;
    320 
    321             live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask;
    322             if (live_components[inst->DstReg.Index] == 0)
    323                 regsUsed &= ~(1 << inst->DstReg.Index);
    324         }
    325 
    326         for (a = 0; a < opArgs; a++) {
    327             /* Register is read from: mark as live for this and preceeding ops */
    328             if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) {
    329                 unsigned c;
    330 
    331 		if (inst->SrcReg[a].Index >= I915_MAX_TEMPORARY)
    332 		   return false;
    333 
    334                 regsUsed |= 1 << inst->SrcReg[a].Index;
    335 
    336                 for (c = 0; c < 4; c++) {
    337                     const unsigned field = GET_SWZ(inst->SrcReg[a].Swizzle, c);
    338 
    339                     if (field <= SWIZZLE_W)
    340                         live_components[inst->SrcReg[a].Index] |= (1U << field);
    341                 }
    342             }
    343         }
    344 
    345         p->usedRegs[i] = regsUsed;
    346     }
    347 
    348     return true;
    349 }
    350 
    351 static GLuint get_live_regs( struct i915_fragment_program *p,
    352                              const struct prog_instruction *inst )
    353 {
    354     const struct gl_fragment_program *program = &p->FragProg;
    355     GLuint nr = inst - program->Base.Instructions;
    356 
    357     return p->usedRegs[nr];
    358 }
    359 
    360 
    361 /* Possible concerns:
    362  *
    363  * SIN, COS -- could use another taylor step?
    364  * LIT      -- results seem a little different to sw mesa
    365  * LOG      -- different to mesa on negative numbers, but this is conformant.
    366  *
    367  * Parse failures -- Mesa doesn't currently give a good indication
    368  * internally whether a particular program string parsed or not.  This
    369  * can lead to confusion -- hopefully we cope with it ok now.
    370  *
    371  */
    372 static void
    373 upload_program(struct i915_fragment_program *p)
    374 {
    375    const struct gl_fragment_program *program = &p->FragProg;
    376    const struct prog_instruction *inst = program->Base.Instructions;
    377 
    378    if (INTEL_DEBUG & DEBUG_WM)
    379       _mesa_print_program(&program->Base);
    380 
    381    /* Is this a parse-failed program?  Ensure a valid program is
    382     * loaded, as the flagging of an error isn't sufficient to stop
    383     * this being uploaded to hardware.
    384     */
    385    if (inst[0].Opcode == OPCODE_END) {
    386       GLuint tmp = i915_get_utemp(p);
    387       i915_emit_arith(p,
    388                       A0_MOV,
    389                       UREG(REG_TYPE_OC, 0),
    390                       A0_DEST_CHANNEL_ALL, 0,
    391                       swizzle(tmp, ONE, ZERO, ONE, ONE), 0, 0);
    392       return;
    393    }
    394 
    395    if (program->Base.NumInstructions > I915_MAX_INSN) {
    396       i915_program_error(p, "Exceeded max instructions (%d out of %d)",
    397 			 program->Base.NumInstructions, I915_MAX_INSN);
    398       return;
    399    }
    400 
    401    /* Not always needed:
    402     */
    403    if (!calc_live_regs(p)) {
    404       i915_program_error(p, "Could not allocate registers");
    405       return;
    406    }
    407 
    408    while (1) {
    409       GLuint src0, src1, src2, flags;
    410       GLuint tmp = 0, dst, consts0 = 0, consts1 = 0;
    411 
    412       switch (inst->Opcode) {
    413       case OPCODE_ABS:
    414          src0 = src_vector(p, &inst->SrcReg[0], program);
    415          i915_emit_arith(p,
    416                          A0_MAX,
    417                          get_result_vector(p, inst),
    418                          get_result_flags(inst), 0,
    419                          src0, negate(src0, 1, 1, 1, 1), 0);
    420          break;
    421 
    422       case OPCODE_ADD:
    423          EMIT_2ARG_ARITH(A0_ADD);
    424          break;
    425 
    426       case OPCODE_CMP:
    427          src0 = src_vector(p, &inst->SrcReg[0], program);
    428          src1 = src_vector(p, &inst->SrcReg[1], program);
    429          src2 = src_vector(p, &inst->SrcReg[2], program);
    430          i915_emit_arith(p, A0_CMP, get_result_vector(p, inst), get_result_flags(inst), 0, src0, src2, src1);   /* NOTE: order of src2, src1 */
    431          break;
    432 
    433       case OPCODE_COS:
    434          src0 = src_vector(p, &inst->SrcReg[0], program);
    435          tmp = i915_get_utemp(p);
    436 	 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]);
    437 	 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]);
    438 
    439 	 /* Reduce range from repeating about [-pi,pi] to [-1,1] */
    440          i915_emit_arith(p,
    441                          A0_MAD,
    442                          tmp, A0_DEST_CHANNEL_X, 0,
    443                          src0,
    444 			 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */
    445 			 swizzle(consts0, W, ZERO, ZERO, ZERO)); /* .75 */
    446 
    447          i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
    448 
    449 	 i915_emit_arith(p,
    450 			 A0_MAD,
    451 			 tmp, A0_DEST_CHANNEL_X, 0,
    452 			 tmp,
    453 			 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */
    454 			 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */
    455 
    456 	 /* Compute COS with the same calculation used for SIN, but a
    457 	  * different source range has been mapped to [-1,1] this time.
    458 	  */
    459 
    460 	 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */
    461 	 i915_emit_arith(p,
    462                          A0_MAX,
    463 			 tmp, A0_DEST_CHANNEL_Y, 0,
    464 			 swizzle(tmp, ZERO, X, ZERO, ZERO),
    465 			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
    466 			 0);
    467 
    468 	 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */
    469 	 i915_emit_arith(p,
    470 			 A0_MUL,
    471 			 tmp, A0_DEST_CHANNEL_Y, 0,
    472 			 swizzle(tmp, ZERO, X, ZERO, ZERO),
    473 			 tmp,
    474 			 0);
    475 
    476 	 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */
    477          i915_emit_arith(p,
    478                          A0_DP3,
    479                          tmp, A0_DEST_CHANNEL_X, 0,
    480 			 tmp,
    481                          swizzle(consts1, X, Y, ZERO, ZERO),
    482 			 0);
    483 
    484 	 /* tmp.x now contains a first approximation (y).  Now, weight it
    485 	  * against tmp.y**2 to get closer.
    486 	  */
    487 	 i915_emit_arith(p,
    488                          A0_MAX,
    489 			 tmp, A0_DEST_CHANNEL_Y, 0,
    490 			 swizzle(tmp, ZERO, X, ZERO, ZERO),
    491 			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
    492 			 0);
    493 
    494 	 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */
    495 	 i915_emit_arith(p,
    496 			 A0_MAD,
    497 			 tmp, A0_DEST_CHANNEL_Y, 0,
    498 			 swizzle(tmp, ZERO, X, ZERO, ZERO),
    499 			 swizzle(tmp, ZERO, Y, ZERO, ZERO),
    500 			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0));
    501 
    502 	 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */
    503 	 i915_emit_arith(p,
    504 			 A0_MAD,
    505                          get_result_vector(p, inst),
    506                          get_result_flags(inst), 0,
    507 			 swizzle(consts1, W, W, W, W),
    508 			 swizzle(tmp, Y, Y, Y, Y),
    509 			 swizzle(tmp, X, X, X, X));
    510          break;
    511 
    512       case OPCODE_DP2:
    513          src0 = src_vector(p, &inst->SrcReg[0], program);
    514          src1 = src_vector(p, &inst->SrcReg[1], program);
    515 	 i915_emit_arith(p,
    516 			 A0_DP3,
    517                          get_result_vector(p, inst),
    518                          get_result_flags(inst), 0,
    519 			 swizzle(src0, X, Y, ZERO, ZERO),
    520 			 swizzle(src1, X, Y, ZERO, ZERO),
    521 			 0);
    522          break;
    523 
    524       case OPCODE_DP3:
    525          EMIT_2ARG_ARITH(A0_DP3);
    526          break;
    527 
    528       case OPCODE_DP4:
    529          EMIT_2ARG_ARITH(A0_DP4);
    530          break;
    531 
    532       case OPCODE_DPH:
    533          src0 = src_vector(p, &inst->SrcReg[0], program);
    534          src1 = src_vector(p, &inst->SrcReg[1], program);
    535 
    536          i915_emit_arith(p,
    537                          A0_DP4,
    538                          get_result_vector(p, inst),
    539                          get_result_flags(inst), 0,
    540                          swizzle(src0, X, Y, Z, ONE), src1, 0);
    541          break;
    542 
    543       case OPCODE_DST:
    544          src0 = src_vector(p, &inst->SrcReg[0], program);
    545          src1 = src_vector(p, &inst->SrcReg[1], program);
    546 
    547          /* result[0] = 1    * 1;
    548           * result[1] = a[1] * b[1];
    549           * result[2] = a[2] * 1;
    550           * result[3] = 1    * b[3];
    551           */
    552          i915_emit_arith(p,
    553                          A0_MUL,
    554                          get_result_vector(p, inst),
    555                          get_result_flags(inst), 0,
    556                          swizzle(src0, ONE, Y, Z, ONE),
    557                          swizzle(src1, ONE, Y, ONE, W), 0);
    558          break;
    559 
    560       case OPCODE_EX2:
    561          src0 = src_vector(p, &inst->SrcReg[0], program);
    562 
    563          i915_emit_arith(p,
    564                          A0_EXP,
    565                          get_result_vector(p, inst),
    566                          get_result_flags(inst), 0,
    567                          swizzle(src0, X, X, X, X), 0, 0);
    568          break;
    569 
    570       case OPCODE_FLR:
    571          EMIT_1ARG_ARITH(A0_FLR);
    572          break;
    573 
    574       case OPCODE_TRUNC:
    575 	 EMIT_1ARG_ARITH(A0_TRC);
    576 	 break;
    577 
    578       case OPCODE_FRC:
    579          EMIT_1ARG_ARITH(A0_FRC);
    580          break;
    581 
    582       case OPCODE_KIL:
    583          src0 = src_vector(p, &inst->SrcReg[0], program);
    584          tmp = i915_get_utemp(p);
    585 
    586          i915_emit_texld(p, get_live_regs(p, inst),
    587                          tmp, A0_DEST_CHANNEL_ALL,   /* use a dummy dest reg */
    588                          0, src0, T0_TEXKILL);
    589          break;
    590 
    591       case OPCODE_KIL_NV:
    592 	 if (inst->DstReg.CondMask == COND_TR) {
    593 	    tmp = i915_get_utemp(p);
    594 
    595 	    /* The KIL instruction discards the fragment if any component of
    596 	     * the source is < 0.  Emit an immediate operand of {-1}.xywz.
    597 	     */
    598 	    i915_emit_texld(p, get_live_regs(p, inst),
    599 			    tmp, A0_DEST_CHANNEL_ALL,
    600 			    0, /* use a dummy dest reg */
    601 			    negate(swizzle(tmp, ONE, ONE, ONE, ONE),
    602 				   1, 1, 1, 1),
    603 			    T0_TEXKILL);
    604 	 } else {
    605 	    p->error = 1;
    606 	    i915_program_error(p, "Unsupported KIL_NV condition code: %d",
    607 			       inst->DstReg.CondMask);
    608 	 }
    609 	 break;
    610 
    611       case OPCODE_LG2:
    612          src0 = src_vector(p, &inst->SrcReg[0], program);
    613 
    614          i915_emit_arith(p,
    615                          A0_LOG,
    616                          get_result_vector(p, inst),
    617                          get_result_flags(inst), 0,
    618                          swizzle(src0, X, X, X, X), 0, 0);
    619          break;
    620 
    621       case OPCODE_LIT:
    622          src0 = src_vector(p, &inst->SrcReg[0], program);
    623          tmp = i915_get_utemp(p);
    624 
    625          /* tmp = max( a.xyzw, a.00zw )
    626           * XXX: Clamp tmp.w to -128..128
    627           * tmp.y = log(tmp.y)
    628           * tmp.y = tmp.w * tmp.y
    629           * tmp.y = exp(tmp.y)
    630           * result = cmp (a.11-x1, a.1x01, a.1xy1 )
    631           */
    632          i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
    633                          src0, swizzle(src0, ZERO, ZERO, Z, W), 0);
    634 
    635          i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
    636                          swizzle(tmp, Y, Y, Y, Y), 0, 0);
    637 
    638          i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
    639                          swizzle(tmp, ZERO, Y, ZERO, ZERO),
    640                          swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
    641 
    642          i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
    643                          swizzle(tmp, Y, Y, Y, Y), 0, 0);
    644 
    645          i915_emit_arith(p, A0_CMP,
    646                          get_result_vector(p, inst),
    647                          get_result_flags(inst), 0,
    648                          negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
    649                          swizzle(tmp, ONE, X, ZERO, ONE),
    650                          swizzle(tmp, ONE, X, Y, ONE));
    651 
    652          break;
    653 
    654       case OPCODE_LRP:
    655          src0 = src_vector(p, &inst->SrcReg[0], program);
    656          src1 = src_vector(p, &inst->SrcReg[1], program);
    657          src2 = src_vector(p, &inst->SrcReg[2], program);
    658          flags = get_result_flags(inst);
    659          tmp = i915_get_utemp(p);
    660 
    661          /* b*a + c*(1-a)
    662           *
    663           * b*a + c - ca
    664           *
    665           * tmp = b*a + c,
    666           * result = (-c)*a + tmp
    667           */
    668          i915_emit_arith(p, A0_MAD, tmp,
    669                          flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);
    670 
    671          i915_emit_arith(p, A0_MAD,
    672                          get_result_vector(p, inst),
    673                          flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
    674          break;
    675 
    676       case OPCODE_MAD:
    677          EMIT_3ARG_ARITH(A0_MAD);
    678          break;
    679 
    680       case OPCODE_MAX:
    681          EMIT_2ARG_ARITH(A0_MAX);
    682          break;
    683 
    684       case OPCODE_MIN:
    685          src0 = src_vector(p, &inst->SrcReg[0], program);
    686          src1 = src_vector(p, &inst->SrcReg[1], program);
    687          tmp = i915_get_utemp(p);
    688          flags = get_result_flags(inst);
    689 
    690          i915_emit_arith(p,
    691                          A0_MAX,
    692                          tmp, flags & A0_DEST_CHANNEL_ALL, 0,
    693                          negate(src0, 1, 1, 1, 1),
    694                          negate(src1, 1, 1, 1, 1), 0);
    695 
    696          i915_emit_arith(p,
    697                          A0_MOV,
    698                          get_result_vector(p, inst),
    699                          flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0);
    700          break;
    701 
    702       case OPCODE_MOV:
    703          EMIT_1ARG_ARITH(A0_MOV);
    704          break;
    705 
    706       case OPCODE_MUL:
    707          EMIT_2ARG_ARITH(A0_MUL);
    708          break;
    709 
    710       case OPCODE_POW:
    711          src0 = src_vector(p, &inst->SrcReg[0], program);
    712          src1 = src_vector(p, &inst->SrcReg[1], program);
    713          tmp = i915_get_utemp(p);
    714          flags = get_result_flags(inst);
    715 
    716          /* XXX: masking on intermediate values, here and elsewhere.
    717           */
    718          i915_emit_arith(p,
    719                          A0_LOG,
    720                          tmp, A0_DEST_CHANNEL_X, 0,
    721                          swizzle(src0, X, X, X, X), 0, 0);
    722 
    723          i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
    724 
    725 
    726          i915_emit_arith(p,
    727                          A0_EXP,
    728                          get_result_vector(p, inst),
    729                          flags, 0, swizzle(tmp, X, X, X, X), 0, 0);
    730 
    731          break;
    732 
    733       case OPCODE_RCP:
    734          src0 = src_vector(p, &inst->SrcReg[0], program);
    735 
    736          i915_emit_arith(p,
    737                          A0_RCP,
    738                          get_result_vector(p, inst),
    739                          get_result_flags(inst), 0,
    740                          swizzle(src0, X, X, X, X), 0, 0);
    741          break;
    742 
    743       case OPCODE_RSQ:
    744 
    745          src0 = src_vector(p, &inst->SrcReg[0], program);
    746 
    747          i915_emit_arith(p,
    748                          A0_RSQ,
    749                          get_result_vector(p, inst),
    750                          get_result_flags(inst), 0,
    751                          swizzle(src0, X, X, X, X), 0, 0);
    752          break;
    753 
    754       case OPCODE_SCS:
    755          src0 = src_vector(p, &inst->SrcReg[0], program);
    756          tmp = i915_get_utemp(p);
    757 
    758          /*
    759           * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
    760           * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
    761           * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
    762           * scs.x = DP4 t1, sin_constants
    763           * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
    764           * scs.y = DP4 t1, cos_constants
    765           */
    766          i915_emit_arith(p,
    767                          A0_MUL,
    768                          tmp, A0_DEST_CHANNEL_XY, 0,
    769                          swizzle(src0, X, X, ONE, ONE),
    770                          swizzle(src0, X, ONE, ONE, ONE), 0);
    771 
    772          i915_emit_arith(p,
    773                          A0_MUL,
    774                          tmp, A0_DEST_CHANNEL_ALL, 0,
    775                          swizzle(tmp, X, Y, X, Y),
    776                          swizzle(tmp, X, X, ONE, ONE), 0);
    777 
    778          if (inst->DstReg.WriteMask & WRITEMASK_Y) {
    779             GLuint tmp1;
    780 
    781             if (inst->DstReg.WriteMask & WRITEMASK_X)
    782                tmp1 = i915_get_utemp(p);
    783             else
    784                tmp1 = tmp;
    785 
    786             i915_emit_arith(p,
    787                             A0_MUL,
    788                             tmp1, A0_DEST_CHANNEL_ALL, 0,
    789                             swizzle(tmp, X, Y, Y, W),
    790                             swizzle(tmp, X, Z, ONE, ONE), 0);
    791 
    792             i915_emit_arith(p,
    793                             A0_DP4,
    794                             get_result_vector(p, inst),
    795                             A0_DEST_CHANNEL_Y, 0,
    796                             swizzle(tmp1, W, Z, Y, X),
    797                             i915_emit_const4fv(p, sin_constants), 0);
    798          }
    799 
    800          if (inst->DstReg.WriteMask & WRITEMASK_X) {
    801             i915_emit_arith(p,
    802                             A0_MUL,
    803                             tmp, A0_DEST_CHANNEL_XYZ, 0,
    804                             swizzle(tmp, X, X, Z, ONE),
    805                             swizzle(tmp, Z, ONE, ONE, ONE), 0);
    806 
    807             i915_emit_arith(p,
    808                             A0_DP4,
    809                             get_result_vector(p, inst),
    810                             A0_DEST_CHANNEL_X, 0,
    811                             swizzle(tmp, ONE, Z, Y, X),
    812                             i915_emit_const4fv(p, cos_constants), 0);
    813          }
    814          break;
    815 
    816       case OPCODE_SEQ:
    817 	 tmp = i915_get_utemp(p);
    818 	 flags = get_result_flags(inst);
    819 	 dst = get_result_vector(p, inst);
    820 
    821 	 /* tmp = src1 >= src2 */
    822 	 i915_emit_arith(p,
    823 			 A0_SGE,
    824 			 tmp,
    825 			 flags, 0,
    826 			 src_vector(p, &inst->SrcReg[0], program),
    827 			 src_vector(p, &inst->SrcReg[1], program),
    828 			 0);
    829 	 /* dst = src1 <= src2 */
    830 	 i915_emit_arith(p,
    831 			 A0_SGE,
    832 			 dst,
    833 			 flags, 0,
    834 			 negate(src_vector(p, &inst->SrcReg[0], program),
    835 				1, 1, 1, 1),
    836 			 negate(src_vector(p, &inst->SrcReg[1], program),
    837 				1, 1, 1, 1),
    838 			 0);
    839 	 /* dst = tmp && dst */
    840 	 i915_emit_arith(p,
    841 			 A0_MUL,
    842 			 dst,
    843 			 flags, 0,
    844 			 dst,
    845 			 tmp,
    846 			 0);
    847 	 break;
    848 
    849       case OPCODE_SIN:
    850          src0 = src_vector(p, &inst->SrcReg[0], program);
    851          tmp = i915_get_utemp(p);
    852 	 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]);
    853 	 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]);
    854 
    855 	 /* Reduce range from repeating about [-pi,pi] to [-1,1] */
    856          i915_emit_arith(p,
    857                          A0_MAD,
    858                          tmp, A0_DEST_CHANNEL_X, 0,
    859                          src0,
    860 			 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */
    861 			 swizzle(consts0, Z, ZERO, ZERO, ZERO)); /* .5 */
    862 
    863          i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
    864 
    865 	 i915_emit_arith(p,
    866 			 A0_MAD,
    867 			 tmp, A0_DEST_CHANNEL_X, 0,
    868 			 tmp,
    869 			 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */
    870 			 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */
    871 
    872 	 /* Compute sin using a quadratic and quartic.  It gives continuity
    873 	  * that repeating the Taylor series lacks every 2*pi, and has
    874 	  * reduced error.
    875 	  *
    876 	  * The idea was described at:
    877 	  * http://www.devmaster.net/forums/showthread.php?t=5784
    878 	  */
    879 
    880 	 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */
    881 	 i915_emit_arith(p,
    882                          A0_MAX,
    883 			 tmp, A0_DEST_CHANNEL_Y, 0,
    884 			 swizzle(tmp, ZERO, X, ZERO, ZERO),
    885 			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
    886 			 0);
    887 
    888 	 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */
    889 	 i915_emit_arith(p,
    890 			 A0_MUL,
    891 			 tmp, A0_DEST_CHANNEL_Y, 0,
    892 			 swizzle(tmp, ZERO, X, ZERO, ZERO),
    893 			 tmp,
    894 			 0);
    895 
    896 	 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */
    897          i915_emit_arith(p,
    898                          A0_DP3,
    899                          tmp, A0_DEST_CHANNEL_X, 0,
    900 			 tmp,
    901                          swizzle(consts1, X, Y, ZERO, ZERO),
    902 			 0);
    903 
    904 	 /* tmp.x now contains a first approximation (y).  Now, weight it
    905 	  * against tmp.y**2 to get closer.
    906 	  */
    907 	 i915_emit_arith(p,
    908                          A0_MAX,
    909 			 tmp, A0_DEST_CHANNEL_Y, 0,
    910 			 swizzle(tmp, ZERO, X, ZERO, ZERO),
    911 			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
    912 			 0);
    913 
    914 	 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */
    915 	 i915_emit_arith(p,
    916 			 A0_MAD,
    917 			 tmp, A0_DEST_CHANNEL_Y, 0,
    918 			 swizzle(tmp, ZERO, X, ZERO, ZERO),
    919 			 swizzle(tmp, ZERO, Y, ZERO, ZERO),
    920 			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0));
    921 
    922 	 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */
    923 	 i915_emit_arith(p,
    924 			 A0_MAD,
    925                          get_result_vector(p, inst),
    926                          get_result_flags(inst), 0,
    927 			 swizzle(consts1, W, W, W, W),
    928 			 swizzle(tmp, Y, Y, Y, Y),
    929 			 swizzle(tmp, X, X, X, X));
    930 
    931          break;
    932 
    933       case OPCODE_SGE:
    934 	 EMIT_2ARG_ARITH(A0_SGE);
    935 	 break;
    936 
    937       case OPCODE_SGT:
    938 	 i915_emit_arith(p,
    939 			 A0_SLT,
    940 			 get_result_vector( p, inst ),
    941 			 get_result_flags( inst ), 0,
    942 			 negate(src_vector( p, &inst->SrcReg[0], program),
    943 				1, 1, 1, 1),
    944 			 negate(src_vector( p, &inst->SrcReg[1], program),
    945 				1, 1, 1, 1),
    946 			 0);
    947          break;
    948 
    949       case OPCODE_SLE:
    950 	 i915_emit_arith(p,
    951 			 A0_SGE,
    952 			 get_result_vector( p, inst ),
    953 			 get_result_flags( inst ), 0,
    954 			 negate(src_vector( p, &inst->SrcReg[0], program),
    955 				1, 1, 1, 1),
    956 			 negate(src_vector( p, &inst->SrcReg[1], program),
    957 				1, 1, 1, 1),
    958 			 0);
    959          break;
    960 
    961       case OPCODE_SLT:
    962          EMIT_2ARG_ARITH(A0_SLT);
    963          break;
    964 
    965       case OPCODE_SNE:
    966 	 tmp = i915_get_utemp(p);
    967 	 flags = get_result_flags(inst);
    968 	 dst = get_result_vector(p, inst);
    969 
    970 	 /* tmp = src1 < src2 */
    971 	 i915_emit_arith(p,
    972 			 A0_SLT,
    973 			 tmp,
    974 			 flags, 0,
    975 			 src_vector(p, &inst->SrcReg[0], program),
    976 			 src_vector(p, &inst->SrcReg[1], program),
    977 			 0);
    978 	 /* dst = src1 > src2 */
    979 	 i915_emit_arith(p,
    980 			 A0_SLT,
    981 			 dst,
    982 			 flags, 0,
    983 			 negate(src_vector(p, &inst->SrcReg[0], program),
    984 				1, 1, 1, 1),
    985 			 negate(src_vector(p, &inst->SrcReg[1], program),
    986 				1, 1, 1, 1),
    987 			 0);
    988 	 /* dst = tmp || dst */
    989 	 i915_emit_arith(p,
    990 			 A0_ADD,
    991 			 dst,
    992 			 flags | A0_DEST_SATURATE, 0,
    993 			 dst,
    994 			 tmp,
    995 			 0);
    996          break;
    997 
    998       case OPCODE_SSG:
    999 	 dst = get_result_vector(p, inst);
   1000 	 flags = get_result_flags(inst);
   1001          src0 = src_vector(p, &inst->SrcReg[0], program);
   1002 	 tmp = i915_get_utemp(p);
   1003 
   1004 	 /* tmp = (src < 0.0) */
   1005 	 i915_emit_arith(p,
   1006 			 A0_SLT,
   1007 			 tmp,
   1008 			 flags, 0,
   1009 			 src0,
   1010 			 swizzle(src0, ZERO, ZERO, ZERO, ZERO),
   1011 			 0);
   1012 
   1013 	 /* dst = (0.0 < src) */
   1014 	 i915_emit_arith(p,
   1015 			 A0_SLT,
   1016 			 dst,
   1017 			 flags, 0,
   1018 			 swizzle(src0, ZERO, ZERO, ZERO, ZERO),
   1019 			 src0,
   1020 			 0);
   1021 
   1022 	 /* dst = (src > 0.0) - (src < 0.0) */
   1023 	 i915_emit_arith(p,
   1024 			 A0_ADD,
   1025 			 dst,
   1026 			 flags, 0,
   1027 			 dst,
   1028 			 negate(tmp, 1, 1, 1, 1),
   1029 			 0);
   1030 
   1031          break;
   1032 
   1033       case OPCODE_SUB:
   1034          src0 = src_vector(p, &inst->SrcReg[0], program);
   1035          src1 = src_vector(p, &inst->SrcReg[1], program);
   1036 
   1037          i915_emit_arith(p,
   1038                          A0_ADD,
   1039                          get_result_vector(p, inst),
   1040                          get_result_flags(inst), 0,
   1041                          src0, negate(src1, 1, 1, 1, 1), 0);
   1042          break;
   1043 
   1044       case OPCODE_SWZ:
   1045          EMIT_1ARG_ARITH(A0_MOV);       /* extended swizzle handled natively */
   1046          break;
   1047 
   1048       case OPCODE_TEX:
   1049          EMIT_TEX(T0_TEXLD);
   1050          break;
   1051 
   1052       case OPCODE_TXB:
   1053          EMIT_TEX(T0_TEXLDB);
   1054          break;
   1055 
   1056       case OPCODE_TXP:
   1057          EMIT_TEX(T0_TEXLDP);
   1058          break;
   1059 
   1060       case OPCODE_XPD:
   1061          /* Cross product:
   1062           *      result.x = src0.y * src1.z - src0.z * src1.y;
   1063           *      result.y = src0.z * src1.x - src0.x * src1.z;
   1064           *      result.z = src0.x * src1.y - src0.y * src1.x;
   1065           *      result.w = undef;
   1066           */
   1067          src0 = src_vector(p, &inst->SrcReg[0], program);
   1068          src1 = src_vector(p, &inst->SrcReg[1], program);
   1069          tmp = i915_get_utemp(p);
   1070 
   1071          i915_emit_arith(p,
   1072                          A0_MUL,
   1073                          tmp, A0_DEST_CHANNEL_ALL, 0,
   1074                          swizzle(src0, Z, X, Y, ONE),
   1075                          swizzle(src1, Y, Z, X, ONE), 0);
   1076 
   1077          i915_emit_arith(p,
   1078                          A0_MAD,
   1079                          get_result_vector(p, inst),
   1080                          get_result_flags(inst), 0,
   1081                          swizzle(src0, Y, Z, X, ONE),
   1082                          swizzle(src1, Z, X, Y, ONE),
   1083                          negate(tmp, 1, 1, 1, 0));
   1084          break;
   1085 
   1086       case OPCODE_END:
   1087          return;
   1088 
   1089       case OPCODE_BGNLOOP:
   1090       case OPCODE_BGNSUB:
   1091       case OPCODE_BRA:
   1092       case OPCODE_BRK:
   1093       case OPCODE_CAL:
   1094       case OPCODE_CONT:
   1095       case OPCODE_DDX:
   1096       case OPCODE_DDY:
   1097       case OPCODE_ELSE:
   1098       case OPCODE_ENDIF:
   1099       case OPCODE_ENDLOOP:
   1100       case OPCODE_ENDSUB:
   1101       case OPCODE_IF:
   1102       case OPCODE_RET:
   1103 	 p->error = 1;
   1104 	 i915_program_error(p, "Unsupported opcode: %s",
   1105 			    _mesa_opcode_string(inst->Opcode));
   1106 	 return;
   1107 
   1108       case OPCODE_EXP:
   1109       case OPCODE_LOG:
   1110 	 /* These opcodes are claimed as GLSL, NV_vp, and ARB_vp in
   1111 	  * prog_instruction.h, but apparently GLSL doesn't ever emit them.
   1112 	  * Instead, it translates to EX2 or LG2.
   1113 	  */
   1114       case OPCODE_TXD:
   1115       case OPCODE_TXL:
   1116 	 /* These opcodes are claimed by GLSL in prog_instruction.h, but
   1117 	  * only NV_vp/fp appears to emit them.
   1118 	  */
   1119       default:
   1120          i915_program_error(p, "bad opcode: %s",
   1121 			    _mesa_opcode_string(inst->Opcode));
   1122          return;
   1123       }
   1124 
   1125       inst++;
   1126       i915_release_utemps(p);
   1127    }
   1128 }
   1129 
   1130 /* Rather than trying to intercept and jiggle depth writes during
   1131  * emit, just move the value into its correct position at the end of
   1132  * the program:
   1133  */
   1134 static void
   1135 fixup_depth_write(struct i915_fragment_program *p)
   1136 {
   1137    if (p->depth_written) {
   1138       GLuint depth = UREG(REG_TYPE_OD, 0);
   1139 
   1140       i915_emit_arith(p,
   1141                       A0_MOV,
   1142                       depth, A0_DEST_CHANNEL_W, 0,
   1143                       swizzle(depth, X, Y, Z, Z), 0, 0);
   1144    }
   1145 }
   1146 
   1147 
   1148 static void
   1149 check_wpos(struct i915_fragment_program *p)
   1150 {
   1151    GLbitfield64 inputs = p->FragProg.Base.InputsRead;
   1152    GLint i;
   1153 
   1154    p->wpos_tex = -1;
   1155 
   1156    for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
   1157       if (inputs & (FRAG_BIT_TEX(i) | FRAG_BIT_VAR(i)))
   1158          continue;
   1159       else if (inputs & FRAG_BIT_WPOS) {
   1160          p->wpos_tex = i;
   1161          inputs &= ~FRAG_BIT_WPOS;
   1162       }
   1163    }
   1164 
   1165    if (inputs & FRAG_BIT_WPOS) {
   1166       i915_program_error(p, "No free texcoord for wpos value");
   1167    }
   1168 }
   1169 
   1170 
   1171 static void
   1172 translate_program(struct i915_fragment_program *p)
   1173 {
   1174    struct i915_context *i915 = I915_CONTEXT(p->ctx);
   1175 
   1176    if (INTEL_DEBUG & DEBUG_WM) {
   1177       printf("fp:\n");
   1178       _mesa_print_program(&p->FragProg.Base);
   1179       printf("\n");
   1180    }
   1181 
   1182    i915_init_program(i915, p);
   1183    check_wpos(p);
   1184    upload_program(p);
   1185    fixup_depth_write(p);
   1186    i915_fini_program(p);
   1187 
   1188    p->translated = 1;
   1189 }
   1190 
   1191 
   1192 static void
   1193 track_params(struct i915_fragment_program *p)
   1194 {
   1195    GLint i;
   1196 
   1197    if (p->nr_params)
   1198       _mesa_load_state_parameters(p->ctx, p->FragProg.Base.Parameters);
   1199 
   1200    for (i = 0; i < p->nr_params; i++) {
   1201       GLint reg = p->param[i].reg;
   1202       COPY_4V(p->constant[reg], p->param[i].values);
   1203    }
   1204 
   1205    p->params_uptodate = 1;
   1206    p->on_hardware = 0;          /* overkill */
   1207 }
   1208 
   1209 
   1210 static void
   1211 i915BindProgram(struct gl_context * ctx, GLenum target, struct gl_program *prog)
   1212 {
   1213    if (target == GL_FRAGMENT_PROGRAM_ARB) {
   1214       struct i915_context *i915 = I915_CONTEXT(ctx);
   1215       struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
   1216 
   1217       if (i915->current_program == p)
   1218          return;
   1219 
   1220       if (i915->current_program) {
   1221          i915->current_program->on_hardware = 0;
   1222          i915->current_program->params_uptodate = 0;
   1223       }
   1224 
   1225       i915->current_program = p;
   1226 
   1227       assert(p->on_hardware == 0);
   1228       assert(p->params_uptodate == 0);
   1229 
   1230    }
   1231 }
   1232 
   1233 static struct gl_program *
   1234 i915NewProgram(struct gl_context * ctx, GLenum target, GLuint id)
   1235 {
   1236    switch (target) {
   1237    case GL_VERTEX_PROGRAM_ARB:
   1238       return _mesa_init_vertex_program(ctx, CALLOC_STRUCT(gl_vertex_program),
   1239                                        target, id);
   1240 
   1241    case GL_FRAGMENT_PROGRAM_ARB:{
   1242          struct i915_fragment_program *prog =
   1243             CALLOC_STRUCT(i915_fragment_program);
   1244          if (prog) {
   1245             i915_init_program(I915_CONTEXT(ctx), prog);
   1246 
   1247             return _mesa_init_fragment_program(ctx, &prog->FragProg,
   1248                                                target, id);
   1249          }
   1250          else
   1251             return NULL;
   1252       }
   1253 
   1254    default:
   1255       /* Just fallback:
   1256        */
   1257       return _mesa_new_program(ctx, target, id);
   1258    }
   1259 }
   1260 
   1261 static void
   1262 i915DeleteProgram(struct gl_context * ctx, struct gl_program *prog)
   1263 {
   1264    if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
   1265       struct i915_context *i915 = I915_CONTEXT(ctx);
   1266       struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
   1267 
   1268       if (i915->current_program == p)
   1269          i915->current_program = 0;
   1270    }
   1271 
   1272    _mesa_delete_program(ctx, prog);
   1273 }
   1274 
   1275 
   1276 static GLboolean
   1277 i915IsProgramNative(struct gl_context * ctx, GLenum target, struct gl_program *prog)
   1278 {
   1279    if (target == GL_FRAGMENT_PROGRAM_ARB) {
   1280       struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
   1281 
   1282       if (!p->translated)
   1283          translate_program(p);
   1284 
   1285       return !p->error;
   1286    }
   1287    else
   1288       return true;
   1289 }
   1290 
   1291 static GLboolean
   1292 i915ProgramStringNotify(struct gl_context * ctx,
   1293                         GLenum target, struct gl_program *prog)
   1294 {
   1295    if (target == GL_FRAGMENT_PROGRAM_ARB) {
   1296       struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
   1297       p->translated = 0;
   1298    }
   1299 
   1300    (void) _tnl_program_string(ctx, target, prog);
   1301 
   1302    /* XXX check if program is legal, within limits */
   1303    return true;
   1304 }
   1305 
   1306 static void
   1307 i915SamplerUniformChange(struct gl_context *ctx,
   1308                          GLenum target, struct gl_program *prog)
   1309 {
   1310    i915ProgramStringNotify(ctx, target, prog);
   1311 }
   1312 
   1313 void
   1314 i915_update_program(struct gl_context *ctx)
   1315 {
   1316    struct intel_context *intel = intel_context(ctx);
   1317    struct i915_context *i915 = i915_context(&intel->ctx);
   1318    struct i915_fragment_program *fp =
   1319       (struct i915_fragment_program *) ctx->FragmentProgram._Current;
   1320 
   1321    if (i915->current_program != fp) {
   1322       if (i915->current_program) {
   1323          i915->current_program->on_hardware = 0;
   1324          i915->current_program->params_uptodate = 0;
   1325       }
   1326 
   1327       i915->current_program = fp;
   1328    }
   1329 
   1330    if (!fp->translated)
   1331       translate_program(fp);
   1332 
   1333    FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, fp->error);
   1334 }
   1335 
   1336 void
   1337 i915ValidateFragmentProgram(struct i915_context *i915)
   1338 {
   1339    struct gl_context *ctx = &i915->intel.ctx;
   1340    struct intel_context *intel = intel_context(ctx);
   1341    TNLcontext *tnl = TNL_CONTEXT(ctx);
   1342    struct vertex_buffer *VB = &tnl->vb;
   1343 
   1344    struct i915_fragment_program *p =
   1345       (struct i915_fragment_program *) ctx->FragmentProgram._Current;
   1346 
   1347    const GLbitfield64 inputsRead = p->FragProg.Base.InputsRead;
   1348    GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
   1349    GLuint s2 = S2_TEXCOORD_NONE;
   1350    int i, offset = 0;
   1351 
   1352    /* Important:
   1353     */
   1354    VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
   1355 
   1356    if (!p->translated)
   1357       translate_program(p);
   1358 
   1359    intel->vertex_attr_count = 0;
   1360    intel->wpos_offset = 0;
   1361    intel->coloroffset = 0;
   1362    intel->specoffset = 0;
   1363 
   1364    if (inputsRead & FRAG_BITS_TEX_ANY || p->wpos_tex != -1) {
   1365       EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16);
   1366    }
   1367    else {
   1368       EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12);
   1369    }
   1370 
   1371    /* Handle gl_PointSize builtin var here */
   1372    if (ctx->Point._Attenuated || ctx->VertexProgram.PointSizeEnabled)
   1373       EMIT_ATTR(_TNL_ATTRIB_POINTSIZE, EMIT_1F, S4_VFMT_POINT_WIDTH, 4);
   1374 
   1375    if (inputsRead & FRAG_BIT_COL0) {
   1376       intel->coloroffset = offset / 4;
   1377       EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4);
   1378    }
   1379 
   1380    if (inputsRead & FRAG_BIT_COL1) {
   1381        intel->specoffset = offset / 4;
   1382        EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_4UB_4F_BGRA, S4_VFMT_SPEC_FOG, 4);
   1383    }
   1384 
   1385    if ((inputsRead & FRAG_BIT_FOGC)) {
   1386       EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4);
   1387    }
   1388 
   1389    for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
   1390       if (inputsRead & FRAG_BIT_TEX(i)) {
   1391          int sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size;
   1392 
   1393          s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
   1394          s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
   1395 
   1396          EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4);
   1397       }
   1398       else if (inputsRead & FRAG_BIT_VAR(i)) {
   1399          int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size;
   1400 
   1401          s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
   1402          s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
   1403 
   1404          EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4);
   1405       }
   1406       else if (i == p->wpos_tex) {
   1407 	 int wpos_size = 4 * sizeof(float);
   1408          /* If WPOS is required, duplicate the XYZ position data in an
   1409           * unused texture coordinate:
   1410           */
   1411          s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
   1412          s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(wpos_size));
   1413 
   1414          intel->wpos_offset = offset;
   1415          EMIT_PAD(wpos_size);
   1416       }
   1417    }
   1418 
   1419    if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] ||
   1420        s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
   1421       int k;
   1422 
   1423       I915_STATECHANGE(i915, I915_UPLOAD_CTX);
   1424 
   1425       /* Must do this *after* statechange, so as not to affect
   1426        * buffered vertices reliant on the old state:
   1427        */
   1428       intel->vertex_size = _tnl_install_attrs(&intel->ctx,
   1429                                               intel->vertex_attrs,
   1430                                               intel->vertex_attr_count,
   1431                                               intel->ViewportMatrix.m, 0);
   1432 
   1433       assert(intel->prim.current_offset == intel->prim.start_offset);
   1434       intel->prim.start_offset = (intel->prim.current_offset + intel->vertex_size-1) / intel->vertex_size * intel->vertex_size;
   1435       intel->prim.current_offset = intel->prim.start_offset;
   1436 
   1437       intel->vertex_size >>= 2;
   1438 
   1439       i915->state.Ctx[I915_CTXREG_LIS2] = s2;
   1440       i915->state.Ctx[I915_CTXREG_LIS4] = s4;
   1441 
   1442       k = intel->vtbl.check_vertex_size(intel, intel->vertex_size);
   1443       assert(k);
   1444    }
   1445 
   1446    if (!p->params_uptodate)
   1447       track_params(p);
   1448 
   1449    if (!p->on_hardware)
   1450       i915_upload_program(i915, p);
   1451 
   1452    if (INTEL_DEBUG & DEBUG_WM) {
   1453       printf("i915:\n");
   1454       i915_disassemble_program(i915->state.Program, i915->state.ProgramSize);
   1455    }
   1456 }
   1457 
   1458 void
   1459 i915InitFragProgFuncs(struct dd_function_table *functions)
   1460 {
   1461    functions->BindProgram = i915BindProgram;
   1462    functions->NewProgram = i915NewProgram;
   1463    functions->DeleteProgram = i915DeleteProgram;
   1464    functions->IsProgramNative = i915IsProgramNative;
   1465    functions->ProgramStringNotify = i915ProgramStringNotify;
   1466    functions->SamplerUniformChange = i915SamplerUniformChange;
   1467 }
   1468