Home | History | Annotate | Download | only in i915
      1 /**************************************************************************
      2  *
      3  * Copyright 2003 VMware, Inc.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 #include "main/glheader.h"
     29 #include "main/macros.h"
     30 #include "main/enums.h"
     31 
     32 #include "program/prog_instruction.h"
     33 #include "program/prog_parameter.h"
     34 #include "program/program.h"
     35 #include "program/programopt.h"
     36 #include "program/prog_print.h"
     37 
     38 #include "tnl/tnl.h"
     39 #include "tnl/t_context.h"
     40 
     41 #include "intel_batchbuffer.h"
     42 
     43 #include "i915_reg.h"
     44 #include "i915_context.h"
     45 #include "i915_program.h"
     46 
     47 static const GLfloat sin_quad_constants[2][4] = {
     48    {
     49       2.0,
     50       -1.0,
     51       .5,
     52       .75
     53    },
     54    {
     55       4.0,
     56       -4.0,
     57       1.0 / (2.0 * M_PI),
     58       .2225
     59    }
     60 };
     61 
     62 static const GLfloat sin_constants[4] = { 1.0,
     63    -1.0 / (3 * 2 * 1),
     64    1.0 / (5 * 4 * 3 * 2 * 1),
     65    -1.0 / (7 * 6 * 5 * 4 * 3 * 2 * 1)
     66 };
     67 
     68 /* 1, -1/2!, 1/4!, -1/6! */
     69 static const GLfloat cos_constants[4] = { 1.0,
     70    -1.0 / (2 * 1),
     71    1.0 / (4 * 3 * 2 * 1),
     72    -1.0 / (6 * 5 * 4 * 3 * 2 * 1)
     73 };
     74 
     75 /* texcoord_mapping[unit] = index | TEXCOORD_{TEX,VAR} */
     76 #define TEXCOORD_TEX (0<<7)
     77 #define TEXCOORD_VAR (1<<7)
     78 
     79 static unsigned
     80 get_texcoord_mapping(struct i915_fragment_program *p, uint8_t texcoord)
     81 {
     82    for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
     83       if (p->texcoord_mapping[i] == texcoord)
     84          return i;
     85    }
     86 
     87    /* blah */
     88    return p->ctx->Const.MaxTextureCoordUnits - 1;
     89 }
     90 
     91 /**
     92  * Retrieve a ureg for the given source register.  Will emit
     93  * constants, apply swizzling and negation as needed.
     94  */
     95 static GLuint
     96 src_vector(struct i915_fragment_program *p,
     97            const struct prog_src_register *source,
     98            const struct gl_program *program)
     99 {
    100    GLuint src;
    101    unsigned unit;
    102 
    103    switch (source->File) {
    104 
    105       /* Registers:
    106        */
    107    case PROGRAM_TEMPORARY:
    108       if (source->Index >= I915_MAX_TEMPORARY) {
    109          i915_program_error(p, "Exceeded max temporary reg: %d/%d",
    110 			    source->Index, I915_MAX_TEMPORARY);
    111          return 0;
    112       }
    113       src = UREG(REG_TYPE_R, source->Index);
    114       break;
    115    case PROGRAM_INPUT:
    116       switch (source->Index) {
    117       case VARYING_SLOT_POS:
    118          src = i915_emit_decl(p, REG_TYPE_T, p->wpos_tex, D0_CHANNEL_ALL);
    119          break;
    120       case VARYING_SLOT_COL0:
    121          src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
    122          break;
    123       case VARYING_SLOT_COL1:
    124          src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
    125          src = swizzle(src, X, Y, Z, ONE);
    126          break;
    127       case VARYING_SLOT_FOGC:
    128          src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
    129          src = swizzle(src, W, ZERO, ZERO, ONE);
    130          break;
    131       case VARYING_SLOT_TEX0:
    132       case VARYING_SLOT_TEX1:
    133       case VARYING_SLOT_TEX2:
    134       case VARYING_SLOT_TEX3:
    135       case VARYING_SLOT_TEX4:
    136       case VARYING_SLOT_TEX5:
    137       case VARYING_SLOT_TEX6:
    138       case VARYING_SLOT_TEX7:
    139          unit = get_texcoord_mapping(p, (source->Index -
    140                                          VARYING_SLOT_TEX0) | TEXCOORD_TEX);
    141          src = i915_emit_decl(p, REG_TYPE_T,
    142                               T_TEX0 + unit,
    143                               D0_CHANNEL_ALL);
    144 	 break;
    145 
    146       case VARYING_SLOT_VAR0:
    147       case VARYING_SLOT_VAR0 + 1:
    148       case VARYING_SLOT_VAR0 + 2:
    149       case VARYING_SLOT_VAR0 + 3:
    150       case VARYING_SLOT_VAR0 + 4:
    151       case VARYING_SLOT_VAR0 + 5:
    152       case VARYING_SLOT_VAR0 + 6:
    153       case VARYING_SLOT_VAR0 + 7:
    154          unit = get_texcoord_mapping(p, (source->Index -
    155                                          VARYING_SLOT_VAR0) | TEXCOORD_VAR);
    156          src = i915_emit_decl(p, REG_TYPE_T,
    157                               T_TEX0 + unit,
    158                               D0_CHANNEL_ALL);
    159          break;
    160 
    161       default:
    162          i915_program_error(p, "Bad source->Index: %d", source->Index);
    163          return 0;
    164       }
    165       break;
    166 
    167    case PROGRAM_OUTPUT:
    168       switch (source->Index) {
    169       case FRAG_RESULT_COLOR:
    170       case FRAG_RESULT_DATA0:
    171 	 src = UREG(REG_TYPE_OC, 0);
    172 	 break;
    173       case FRAG_RESULT_DEPTH:
    174 	 src = UREG(REG_TYPE_OD, 0);
    175 	 break;
    176       default:
    177 	 i915_program_error(p, "Bad source->Index: %d", source->Index);
    178 	 return 0;
    179       }
    180       break;
    181 
    182       /* Various paramters and env values.  All emitted to
    183        * hardware as program constants.
    184        */
    185    case PROGRAM_CONSTANT:
    186    case PROGRAM_STATE_VAR:
    187    case PROGRAM_UNIFORM:
    188       src = i915_emit_param4fv(p,
    189 	 &program->Parameters->ParameterValues[source->Index][0].f);
    190       break;
    191 
    192    default:
    193       i915_program_error(p, "Bad source->File: %d", source->File);
    194       return 0;
    195    }
    196 
    197    src = swizzle(src,
    198                  GET_SWZ(source->Swizzle, 0),
    199                  GET_SWZ(source->Swizzle, 1),
    200                  GET_SWZ(source->Swizzle, 2), GET_SWZ(source->Swizzle, 3));
    201 
    202    if (source->Negate)
    203       src = negate(src,
    204                    GET_BIT(source->Negate, 0),
    205                    GET_BIT(source->Negate, 1),
    206                    GET_BIT(source->Negate, 2),
    207                    GET_BIT(source->Negate, 3));
    208 
    209    return src;
    210 }
    211 
    212 
    213 static GLuint
    214 get_result_vector(struct i915_fragment_program *p,
    215                   const struct prog_instruction *inst)
    216 {
    217    switch (inst->DstReg.File) {
    218    case PROGRAM_OUTPUT:
    219       switch (inst->DstReg.Index) {
    220       case FRAG_RESULT_COLOR:
    221       case FRAG_RESULT_DATA0:
    222          return UREG(REG_TYPE_OC, 0);
    223       case FRAG_RESULT_DEPTH:
    224          p->depth_written = 1;
    225          return UREG(REG_TYPE_OD, 0);
    226       default:
    227          i915_program_error(p, "Bad inst->DstReg.Index: %d",
    228 			    inst->DstReg.Index);
    229          return 0;
    230       }
    231    case PROGRAM_TEMPORARY:
    232       return UREG(REG_TYPE_R, inst->DstReg.Index);
    233    default:
    234       i915_program_error(p, "Bad inst->DstReg.File: %d", inst->DstReg.File);
    235       return 0;
    236    }
    237 }
    238 
    239 static GLuint
    240 get_result_flags(const struct prog_instruction *inst)
    241 {
    242    GLuint flags = 0;
    243 
    244    if (inst->Saturate)
    245       flags |= A0_DEST_SATURATE;
    246    if (inst->DstReg.WriteMask & WRITEMASK_X)
    247       flags |= A0_DEST_CHANNEL_X;
    248    if (inst->DstReg.WriteMask & WRITEMASK_Y)
    249       flags |= A0_DEST_CHANNEL_Y;
    250    if (inst->DstReg.WriteMask & WRITEMASK_Z)
    251       flags |= A0_DEST_CHANNEL_Z;
    252    if (inst->DstReg.WriteMask & WRITEMASK_W)
    253       flags |= A0_DEST_CHANNEL_W;
    254 
    255    return flags;
    256 }
    257 
    258 static GLuint
    259 translate_tex_src_target(struct i915_fragment_program *p, GLubyte bit)
    260 {
    261    switch (bit) {
    262    case TEXTURE_1D_INDEX:
    263       return D0_SAMPLE_TYPE_2D;
    264    case TEXTURE_2D_INDEX:
    265       return D0_SAMPLE_TYPE_2D;
    266    case TEXTURE_RECT_INDEX:
    267       return D0_SAMPLE_TYPE_2D;
    268    case TEXTURE_3D_INDEX:
    269       return D0_SAMPLE_TYPE_VOLUME;
    270    case TEXTURE_CUBE_INDEX:
    271       return D0_SAMPLE_TYPE_CUBE;
    272    default:
    273       i915_program_error(p, "TexSrcBit: %d", bit);
    274       return 0;
    275    }
    276 }
    277 
    278 #define EMIT_TEX( OP )						\
    279 do {								\
    280    GLuint dim = translate_tex_src_target( p, inst->TexSrcTarget );	\
    281    const struct gl_program *program = &p->FragProg;	\
    282    GLuint unit = program->SamplerUnits[inst->TexSrcUnit];	\
    283    GLuint sampler = i915_emit_decl(p, REG_TYPE_S,		\
    284 				   unit, dim);			\
    285    GLuint coord = src_vector( p, &inst->SrcReg[0], program);	\
    286    /* Texel lookup */						\
    287 								\
    288    i915_emit_texld( p, get_live_regs(p, inst),						\
    289 	       get_result_vector( p, inst ),			\
    290 	       get_result_flags( inst ),			\
    291 	       sampler,						\
    292 	       coord,						\
    293 	       OP);						\
    294 } while (0)
    295 
    296 #define EMIT_ARITH( OP, N )						\
    297 do {									\
    298    i915_emit_arith( p,							\
    299 	       OP,							\
    300 	       get_result_vector( p, inst ), 				\
    301 	       get_result_flags( inst ), 0,			\
    302 	       (N<1)?0:src_vector( p, &inst->SrcReg[0], program),	\
    303 	       (N<2)?0:src_vector( p, &inst->SrcReg[1], program),	\
    304 	       (N<3)?0:src_vector( p, &inst->SrcReg[2], program));	\
    305 } while (0)
    306 
    307 #define EMIT_1ARG_ARITH( OP ) EMIT_ARITH( OP, 1 )
    308 #define EMIT_2ARG_ARITH( OP ) EMIT_ARITH( OP, 2 )
    309 #define EMIT_3ARG_ARITH( OP ) EMIT_ARITH( OP, 3 )
    310 
    311 /*
    312  * TODO: consider moving this into core
    313  */
    314 static bool calc_live_regs( struct i915_fragment_program *p )
    315 {
    316     const struct gl_program *program = &p->FragProg;
    317     GLuint regsUsed = ~((1 << I915_MAX_TEMPORARY) - 1);
    318     uint8_t live_components[I915_MAX_TEMPORARY] = { 0, };
    319     GLint i;
    320 
    321     for (i = program->arb.NumInstructions - 1; i >= 0; i--) {
    322         struct prog_instruction *inst = &program->arb.Instructions[i];
    323         int opArgs = _mesa_num_inst_src_regs(inst->Opcode);
    324         int a;
    325 
    326         /* Register is written to: unmark as live for this and preceeding ops */
    327         if (inst->DstReg.File == PROGRAM_TEMPORARY) {
    328 	    if (inst->DstReg.Index >= I915_MAX_TEMPORARY)
    329 	       return false;
    330 
    331             live_components[inst->DstReg.Index] &= ~inst->DstReg.WriteMask;
    332             if (live_components[inst->DstReg.Index] == 0)
    333                 regsUsed &= ~(1 << inst->DstReg.Index);
    334         }
    335 
    336         for (a = 0; a < opArgs; a++) {
    337             /* Register is read from: mark as live for this and preceeding ops */
    338             if (inst->SrcReg[a].File == PROGRAM_TEMPORARY) {
    339                 unsigned c;
    340 
    341 		if (inst->SrcReg[a].Index >= I915_MAX_TEMPORARY)
    342 		   return false;
    343 
    344                 regsUsed |= 1 << inst->SrcReg[a].Index;
    345 
    346                 for (c = 0; c < 4; c++) {
    347                     const unsigned field = GET_SWZ(inst->SrcReg[a].Swizzle, c);
    348 
    349                     if (field <= SWIZZLE_W)
    350                         live_components[inst->SrcReg[a].Index] |= (1U << field);
    351                 }
    352             }
    353         }
    354 
    355         p->usedRegs[i] = regsUsed;
    356     }
    357 
    358     return true;
    359 }
    360 
    361 static GLuint get_live_regs( struct i915_fragment_program *p,
    362                              const struct prog_instruction *inst )
    363 {
    364     const struct gl_program *program = &p->FragProg;
    365     GLuint nr = inst - program->arb.Instructions;
    366 
    367     return p->usedRegs[nr];
    368 }
    369 
    370 
    371 /* Possible concerns:
    372  *
    373  * SIN, COS -- could use another taylor step?
    374  * LIT      -- results seem a little different to sw mesa
    375  * LOG      -- different to mesa on negative numbers, but this is conformant.
    376  *
    377  * Parse failures -- Mesa doesn't currently give a good indication
    378  * internally whether a particular program string parsed or not.  This
    379  * can lead to confusion -- hopefully we cope with it ok now.
    380  *
    381  */
    382 static void
    383 upload_program(struct i915_fragment_program *p)
    384 {
    385    const struct gl_program *program = &p->FragProg;
    386    const struct prog_instruction *inst = program->arb.Instructions;
    387 
    388    if (INTEL_DEBUG & DEBUG_WM)
    389       _mesa_print_program(program);
    390 
    391    /* Is this a parse-failed program?  Ensure a valid program is
    392     * loaded, as the flagging of an error isn't sufficient to stop
    393     * this being uploaded to hardware.
    394     */
    395    if (inst[0].Opcode == OPCODE_END) {
    396       GLuint tmp = i915_get_utemp(p);
    397       i915_emit_arith(p,
    398                       A0_MOV,
    399                       UREG(REG_TYPE_OC, 0),
    400                       A0_DEST_CHANNEL_ALL, 0,
    401                       swizzle(tmp, ONE, ZERO, ONE, ONE), 0, 0);
    402       return;
    403    }
    404 
    405    if (program->arb.NumInstructions > I915_MAX_INSN) {
    406       i915_program_error(p, "Exceeded max instructions (%d out of %d)",
    407                          program->arb.NumInstructions, I915_MAX_INSN);
    408       return;
    409    }
    410 
    411    /* Not always needed:
    412     */
    413    if (!calc_live_regs(p)) {
    414       i915_program_error(p, "Could not allocate registers");
    415       return;
    416    }
    417 
    418    while (1) {
    419       GLuint src0, src1, src2, flags;
    420       GLuint tmp = 0, dst, consts0 = 0, consts1 = 0;
    421 
    422       switch (inst->Opcode) {
    423       case OPCODE_ABS:
    424          src0 = src_vector(p, &inst->SrcReg[0], program);
    425          i915_emit_arith(p,
    426                          A0_MAX,
    427                          get_result_vector(p, inst),
    428                          get_result_flags(inst), 0,
    429                          src0, negate(src0, 1, 1, 1, 1), 0);
    430          break;
    431 
    432       case OPCODE_ADD:
    433          EMIT_2ARG_ARITH(A0_ADD);
    434          break;
    435 
    436       case OPCODE_CMP:
    437          src0 = src_vector(p, &inst->SrcReg[0], program);
    438          src1 = src_vector(p, &inst->SrcReg[1], program);
    439          src2 = src_vector(p, &inst->SrcReg[2], program);
    440          i915_emit_arith(p, A0_CMP, get_result_vector(p, inst), get_result_flags(inst), 0, src0, src2, src1);   /* NOTE: order of src2, src1 */
    441          break;
    442 
    443       case OPCODE_COS:
    444          src0 = src_vector(p, &inst->SrcReg[0], program);
    445          tmp = i915_get_utemp(p);
    446 	 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]);
    447 	 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]);
    448 
    449 	 /* Reduce range from repeating about [-pi,pi] to [-1,1] */
    450          i915_emit_arith(p,
    451                          A0_MAD,
    452                          tmp, A0_DEST_CHANNEL_X, 0,
    453                          src0,
    454 			 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */
    455 			 swizzle(consts0, W, ZERO, ZERO, ZERO)); /* .75 */
    456 
    457          i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
    458 
    459 	 i915_emit_arith(p,
    460 			 A0_MAD,
    461 			 tmp, A0_DEST_CHANNEL_X, 0,
    462 			 tmp,
    463 			 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */
    464 			 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */
    465 
    466 	 /* Compute COS with the same calculation used for SIN, but a
    467 	  * different source range has been mapped to [-1,1] this time.
    468 	  */
    469 
    470 	 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */
    471 	 i915_emit_arith(p,
    472                          A0_MAX,
    473 			 tmp, A0_DEST_CHANNEL_Y, 0,
    474 			 swizzle(tmp, ZERO, X, ZERO, ZERO),
    475 			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
    476 			 0);
    477 
    478 	 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */
    479 	 i915_emit_arith(p,
    480 			 A0_MUL,
    481 			 tmp, A0_DEST_CHANNEL_Y, 0,
    482 			 swizzle(tmp, ZERO, X, ZERO, ZERO),
    483 			 tmp,
    484 			 0);
    485 
    486 	 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */
    487          i915_emit_arith(p,
    488                          A0_DP3,
    489                          tmp, A0_DEST_CHANNEL_X, 0,
    490 			 tmp,
    491                          swizzle(consts1, X, Y, ZERO, ZERO),
    492 			 0);
    493 
    494 	 /* tmp.x now contains a first approximation (y).  Now, weight it
    495 	  * against tmp.y**2 to get closer.
    496 	  */
    497 	 i915_emit_arith(p,
    498                          A0_MAX,
    499 			 tmp, A0_DEST_CHANNEL_Y, 0,
    500 			 swizzle(tmp, ZERO, X, ZERO, ZERO),
    501 			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
    502 			 0);
    503 
    504 	 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */
    505 	 i915_emit_arith(p,
    506 			 A0_MAD,
    507 			 tmp, A0_DEST_CHANNEL_Y, 0,
    508 			 swizzle(tmp, ZERO, X, ZERO, ZERO),
    509 			 swizzle(tmp, ZERO, Y, ZERO, ZERO),
    510 			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0));
    511 
    512 	 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */
    513 	 i915_emit_arith(p,
    514 			 A0_MAD,
    515                          get_result_vector(p, inst),
    516                          get_result_flags(inst), 0,
    517 			 swizzle(consts1, W, W, W, W),
    518 			 swizzle(tmp, Y, Y, Y, Y),
    519 			 swizzle(tmp, X, X, X, X));
    520          break;
    521 
    522       case OPCODE_DP2:
    523          src0 = src_vector(p, &inst->SrcReg[0], program);
    524          src1 = src_vector(p, &inst->SrcReg[1], program);
    525 	 i915_emit_arith(p,
    526 			 A0_DP3,
    527                          get_result_vector(p, inst),
    528                          get_result_flags(inst), 0,
    529 			 swizzle(src0, X, Y, ZERO, ZERO),
    530 			 swizzle(src1, X, Y, ZERO, ZERO),
    531 			 0);
    532          break;
    533 
    534       case OPCODE_DP3:
    535          EMIT_2ARG_ARITH(A0_DP3);
    536          break;
    537 
    538       case OPCODE_DP4:
    539          EMIT_2ARG_ARITH(A0_DP4);
    540          break;
    541 
    542       case OPCODE_DPH:
    543          src0 = src_vector(p, &inst->SrcReg[0], program);
    544          src1 = src_vector(p, &inst->SrcReg[1], program);
    545 
    546          i915_emit_arith(p,
    547                          A0_DP4,
    548                          get_result_vector(p, inst),
    549                          get_result_flags(inst), 0,
    550                          swizzle(src0, X, Y, Z, ONE), src1, 0);
    551          break;
    552 
    553       case OPCODE_DST:
    554          src0 = src_vector(p, &inst->SrcReg[0], program);
    555          src1 = src_vector(p, &inst->SrcReg[1], program);
    556 
    557          /* result[0] = 1    * 1;
    558           * result[1] = a[1] * b[1];
    559           * result[2] = a[2] * 1;
    560           * result[3] = 1    * b[3];
    561           */
    562          i915_emit_arith(p,
    563                          A0_MUL,
    564                          get_result_vector(p, inst),
    565                          get_result_flags(inst), 0,
    566                          swizzle(src0, ONE, Y, Z, ONE),
    567                          swizzle(src1, ONE, Y, ONE, W), 0);
    568          break;
    569 
    570       case OPCODE_EX2:
    571          src0 = src_vector(p, &inst->SrcReg[0], program);
    572 
    573          i915_emit_arith(p,
    574                          A0_EXP,
    575                          get_result_vector(p, inst),
    576                          get_result_flags(inst), 0,
    577                          swizzle(src0, X, X, X, X), 0, 0);
    578          break;
    579 
    580       case OPCODE_FLR:
    581          EMIT_1ARG_ARITH(A0_FLR);
    582          break;
    583 
    584       case OPCODE_TRUNC:
    585 	 EMIT_1ARG_ARITH(A0_TRC);
    586 	 break;
    587 
    588       case OPCODE_FRC:
    589          EMIT_1ARG_ARITH(A0_FRC);
    590          break;
    591 
    592       case OPCODE_KIL:
    593          src0 = src_vector(p, &inst->SrcReg[0], program);
    594          tmp = i915_get_utemp(p);
    595 
    596          i915_emit_texld(p, get_live_regs(p, inst),
    597                          tmp, A0_DEST_CHANNEL_ALL,   /* use a dummy dest reg */
    598                          0, src0, T0_TEXKILL);
    599          break;
    600 
    601       case OPCODE_LG2:
    602          src0 = src_vector(p, &inst->SrcReg[0], program);
    603 
    604          i915_emit_arith(p,
    605                          A0_LOG,
    606                          get_result_vector(p, inst),
    607                          get_result_flags(inst), 0,
    608                          swizzle(src0, X, X, X, X), 0, 0);
    609          break;
    610 
    611       case OPCODE_LIT:
    612          src0 = src_vector(p, &inst->SrcReg[0], program);
    613          tmp = i915_get_utemp(p);
    614 
    615          /* tmp = max( a.xyzw, a.00zw )
    616           * XXX: Clamp tmp.w to -128..128
    617           * tmp.y = log(tmp.y)
    618           * tmp.y = tmp.w * tmp.y
    619           * tmp.y = exp(tmp.y)
    620           * result = cmp (a.11-x1, a.1x01, a.1xy1 )
    621           */
    622          i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
    623                          src0, swizzle(src0, ZERO, ZERO, Z, W), 0);
    624 
    625          i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
    626                          swizzle(tmp, Y, Y, Y, Y), 0, 0);
    627 
    628          i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
    629                          swizzle(tmp, ZERO, Y, ZERO, ZERO),
    630                          swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
    631 
    632          i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
    633                          swizzle(tmp, Y, Y, Y, Y), 0, 0);
    634 
    635          i915_emit_arith(p, A0_CMP,
    636                          get_result_vector(p, inst),
    637                          get_result_flags(inst), 0,
    638                          negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
    639                          swizzle(tmp, ONE, X, ZERO, ONE),
    640                          swizzle(tmp, ONE, X, Y, ONE));
    641 
    642          break;
    643 
    644       case OPCODE_LRP:
    645          src0 = src_vector(p, &inst->SrcReg[0], program);
    646          src1 = src_vector(p, &inst->SrcReg[1], program);
    647          src2 = src_vector(p, &inst->SrcReg[2], program);
    648          flags = get_result_flags(inst);
    649          tmp = i915_get_utemp(p);
    650 
    651          /* b*a + c*(1-a)
    652           *
    653           * b*a + c - ca
    654           *
    655           * tmp = b*a + c,
    656           * result = (-c)*a + tmp
    657           */
    658          i915_emit_arith(p, A0_MAD, tmp,
    659                          flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);
    660 
    661          i915_emit_arith(p, A0_MAD,
    662                          get_result_vector(p, inst),
    663                          flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
    664          break;
    665 
    666       case OPCODE_MAD:
    667          EMIT_3ARG_ARITH(A0_MAD);
    668          break;
    669 
    670       case OPCODE_MAX:
    671          EMIT_2ARG_ARITH(A0_MAX);
    672          break;
    673 
    674       case OPCODE_MIN:
    675          EMIT_2ARG_ARITH(A0_MIN);
    676          break;
    677 
    678       case OPCODE_MOV:
    679          EMIT_1ARG_ARITH(A0_MOV);
    680          break;
    681 
    682       case OPCODE_MUL:
    683          EMIT_2ARG_ARITH(A0_MUL);
    684          break;
    685 
    686       case OPCODE_POW:
    687          src0 = src_vector(p, &inst->SrcReg[0], program);
    688          src1 = src_vector(p, &inst->SrcReg[1], program);
    689          tmp = i915_get_utemp(p);
    690          flags = get_result_flags(inst);
    691 
    692          /* XXX: masking on intermediate values, here and elsewhere.
    693           */
    694          i915_emit_arith(p,
    695                          A0_LOG,
    696                          tmp, A0_DEST_CHANNEL_X, 0,
    697                          swizzle(src0, X, X, X, X), 0, 0);
    698 
    699          i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
    700 
    701 
    702          i915_emit_arith(p,
    703                          A0_EXP,
    704                          get_result_vector(p, inst),
    705                          flags, 0, swizzle(tmp, X, X, X, X), 0, 0);
    706 
    707          break;
    708 
    709       case OPCODE_RCP:
    710          src0 = src_vector(p, &inst->SrcReg[0], program);
    711 
    712          i915_emit_arith(p,
    713                          A0_RCP,
    714                          get_result_vector(p, inst),
    715                          get_result_flags(inst), 0,
    716                          swizzle(src0, X, X, X, X), 0, 0);
    717          break;
    718 
    719       case OPCODE_RSQ:
    720 
    721          src0 = src_vector(p, &inst->SrcReg[0], program);
    722 
    723          i915_emit_arith(p,
    724                          A0_RSQ,
    725                          get_result_vector(p, inst),
    726                          get_result_flags(inst), 0,
    727                          swizzle(src0, X, X, X, X), 0, 0);
    728          break;
    729 
    730       case OPCODE_SCS:
    731          src0 = src_vector(p, &inst->SrcReg[0], program);
    732          tmp = i915_get_utemp(p);
    733 
    734          /*
    735           * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
    736           * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
    737           * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
    738           * scs.x = DP4 t1, sin_constants
    739           * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
    740           * scs.y = DP4 t1, cos_constants
    741           */
    742          i915_emit_arith(p,
    743                          A0_MUL,
    744                          tmp, A0_DEST_CHANNEL_XY, 0,
    745                          swizzle(src0, X, X, ONE, ONE),
    746                          swizzle(src0, X, ONE, ONE, ONE), 0);
    747 
    748          i915_emit_arith(p,
    749                          A0_MUL,
    750                          tmp, A0_DEST_CHANNEL_ALL, 0,
    751                          swizzle(tmp, X, Y, X, Y),
    752                          swizzle(tmp, X, X, ONE, ONE), 0);
    753 
    754          if (inst->DstReg.WriteMask & WRITEMASK_Y) {
    755             GLuint tmp1;
    756 
    757             if (inst->DstReg.WriteMask & WRITEMASK_X)
    758                tmp1 = i915_get_utemp(p);
    759             else
    760                tmp1 = tmp;
    761 
    762             i915_emit_arith(p,
    763                             A0_MUL,
    764                             tmp1, A0_DEST_CHANNEL_ALL, 0,
    765                             swizzle(tmp, X, Y, Y, W),
    766                             swizzle(tmp, X, Z, ONE, ONE), 0);
    767 
    768             i915_emit_arith(p,
    769                             A0_DP4,
    770                             get_result_vector(p, inst),
    771                             A0_DEST_CHANNEL_Y, 0,
    772                             swizzle(tmp1, W, Z, Y, X),
    773                             i915_emit_const4fv(p, sin_constants), 0);
    774          }
    775 
    776          if (inst->DstReg.WriteMask & WRITEMASK_X) {
    777             i915_emit_arith(p,
    778                             A0_MUL,
    779                             tmp, A0_DEST_CHANNEL_XYZ, 0,
    780                             swizzle(tmp, X, X, Z, ONE),
    781                             swizzle(tmp, Z, ONE, ONE, ONE), 0);
    782 
    783             i915_emit_arith(p,
    784                             A0_DP4,
    785                             get_result_vector(p, inst),
    786                             A0_DEST_CHANNEL_X, 0,
    787                             swizzle(tmp, ONE, Z, Y, X),
    788                             i915_emit_const4fv(p, cos_constants), 0);
    789          }
    790          break;
    791 
    792       case OPCODE_SIN:
    793          src0 = src_vector(p, &inst->SrcReg[0], program);
    794          tmp = i915_get_utemp(p);
    795 	 consts0 = i915_emit_const4fv(p, sin_quad_constants[0]);
    796 	 consts1 = i915_emit_const4fv(p, sin_quad_constants[1]);
    797 
    798 	 /* Reduce range from repeating about [-pi,pi] to [-1,1] */
    799          i915_emit_arith(p,
    800                          A0_MAD,
    801                          tmp, A0_DEST_CHANNEL_X, 0,
    802                          src0,
    803 			 swizzle(consts1, Z, ZERO, ZERO, ZERO), /* 1/(2pi) */
    804 			 swizzle(consts0, Z, ZERO, ZERO, ZERO)); /* .5 */
    805 
    806          i915_emit_arith(p, A0_FRC, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
    807 
    808 	 i915_emit_arith(p,
    809 			 A0_MAD,
    810 			 tmp, A0_DEST_CHANNEL_X, 0,
    811 			 tmp,
    812 			 swizzle(consts0, X, ZERO, ZERO, ZERO), /* 2 */
    813 			 swizzle(consts0, Y, ZERO, ZERO, ZERO)); /* -1 */
    814 
    815 	 /* Compute sin using a quadratic and quartic.  It gives continuity
    816 	  * that repeating the Taylor series lacks every 2*pi, and has
    817 	  * reduced error.
    818 	  *
    819 	  * The idea was described at:
    820 	  * http://www.devmaster.net/forums/showthread.php?t=5784
    821 	  */
    822 
    823 	 /* tmp.y = abs(tmp.x); {x, abs(x), 0, 0} */
    824 	 i915_emit_arith(p,
    825                          A0_MAX,
    826 			 tmp, A0_DEST_CHANNEL_Y, 0,
    827 			 swizzle(tmp, ZERO, X, ZERO, ZERO),
    828 			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
    829 			 0);
    830 
    831 	 /* tmp.y = tmp.y * tmp.x; {x, x * abs(x), 0, 0} */
    832 	 i915_emit_arith(p,
    833 			 A0_MUL,
    834 			 tmp, A0_DEST_CHANNEL_Y, 0,
    835 			 swizzle(tmp, ZERO, X, ZERO, ZERO),
    836 			 tmp,
    837 			 0);
    838 
    839 	 /* tmp.x = tmp.xy DP sin_quad_constants[2].xy */
    840          i915_emit_arith(p,
    841                          A0_DP3,
    842                          tmp, A0_DEST_CHANNEL_X, 0,
    843 			 tmp,
    844                          swizzle(consts1, X, Y, ZERO, ZERO),
    845 			 0);
    846 
    847 	 /* tmp.x now contains a first approximation (y).  Now, weight it
    848 	  * against tmp.y**2 to get closer.
    849 	  */
    850 	 i915_emit_arith(p,
    851                          A0_MAX,
    852 			 tmp, A0_DEST_CHANNEL_Y, 0,
    853 			 swizzle(tmp, ZERO, X, ZERO, ZERO),
    854 			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0),
    855 			 0);
    856 
    857 	 /* tmp.y = tmp.x * tmp.y - tmp.x; {y, y * abs(y) - y, 0, 0} */
    858 	 i915_emit_arith(p,
    859 			 A0_MAD,
    860 			 tmp, A0_DEST_CHANNEL_Y, 0,
    861 			 swizzle(tmp, ZERO, X, ZERO, ZERO),
    862 			 swizzle(tmp, ZERO, Y, ZERO, ZERO),
    863 			 negate(swizzle(tmp, ZERO, X, ZERO, ZERO), 0, 1, 0, 0));
    864 
    865 	 /* result = .2225 * tmp.y + tmp.x =.2225(y * abs(y) - y) + y= */
    866 	 i915_emit_arith(p,
    867 			 A0_MAD,
    868                          get_result_vector(p, inst),
    869                          get_result_flags(inst), 0,
    870 			 swizzle(consts1, W, W, W, W),
    871 			 swizzle(tmp, Y, Y, Y, Y),
    872 			 swizzle(tmp, X, X, X, X));
    873 
    874          break;
    875 
    876       case OPCODE_SGE:
    877 	 EMIT_2ARG_ARITH(A0_SGE);
    878 	 break;
    879 
    880       case OPCODE_SLT:
    881          EMIT_2ARG_ARITH(A0_SLT);
    882          break;
    883 
    884       case OPCODE_SSG:
    885 	 dst = get_result_vector(p, inst);
    886 	 flags = get_result_flags(inst);
    887          src0 = src_vector(p, &inst->SrcReg[0], program);
    888 	 tmp = i915_get_utemp(p);
    889 
    890 	 /* tmp = (src < 0.0) */
    891 	 i915_emit_arith(p,
    892 			 A0_SLT,
    893 			 tmp,
    894 			 flags, 0,
    895 			 src0,
    896 			 swizzle(src0, ZERO, ZERO, ZERO, ZERO),
    897 			 0);
    898 
    899 	 /* dst = (0.0 < src) */
    900 	 i915_emit_arith(p,
    901 			 A0_SLT,
    902 			 dst,
    903 			 flags, 0,
    904 			 swizzle(src0, ZERO, ZERO, ZERO, ZERO),
    905 			 src0,
    906 			 0);
    907 
    908 	 /* dst = (src > 0.0) - (src < 0.0) */
    909 	 i915_emit_arith(p,
    910 			 A0_ADD,
    911 			 dst,
    912 			 flags, 0,
    913 			 dst,
    914 			 negate(tmp, 1, 1, 1, 1),
    915 			 0);
    916 
    917          break;
    918 
    919       case OPCODE_SUB:
    920          src0 = src_vector(p, &inst->SrcReg[0], program);
    921          src1 = src_vector(p, &inst->SrcReg[1], program);
    922 
    923          i915_emit_arith(p,
    924                          A0_ADD,
    925                          get_result_vector(p, inst),
    926                          get_result_flags(inst), 0,
    927                          src0, negate(src1, 1, 1, 1, 1), 0);
    928          break;
    929 
    930       case OPCODE_SWZ:
    931          EMIT_1ARG_ARITH(A0_MOV);       /* extended swizzle handled natively */
    932          break;
    933 
    934       case OPCODE_TEX:
    935          EMIT_TEX(T0_TEXLD);
    936          break;
    937 
    938       case OPCODE_TXB:
    939          EMIT_TEX(T0_TEXLDB);
    940          break;
    941 
    942       case OPCODE_TXP:
    943          EMIT_TEX(T0_TEXLDP);
    944          break;
    945 
    946       case OPCODE_XPD:
    947          /* Cross product:
    948           *      result.x = src0.y * src1.z - src0.z * src1.y;
    949           *      result.y = src0.z * src1.x - src0.x * src1.z;
    950           *      result.z = src0.x * src1.y - src0.y * src1.x;
    951           *      result.w = undef;
    952           */
    953          src0 = src_vector(p, &inst->SrcReg[0], program);
    954          src1 = src_vector(p, &inst->SrcReg[1], program);
    955          tmp = i915_get_utemp(p);
    956 
    957          i915_emit_arith(p,
    958                          A0_MUL,
    959                          tmp, A0_DEST_CHANNEL_ALL, 0,
    960                          swizzle(src0, Z, X, Y, ONE),
    961                          swizzle(src1, Y, Z, X, ONE), 0);
    962 
    963          i915_emit_arith(p,
    964                          A0_MAD,
    965                          get_result_vector(p, inst),
    966                          get_result_flags(inst), 0,
    967                          swizzle(src0, Y, Z, X, ONE),
    968                          swizzle(src1, Z, X, Y, ONE),
    969                          negate(tmp, 1, 1, 1, 0));
    970          break;
    971 
    972       case OPCODE_END:
    973          return;
    974 
    975       case OPCODE_BGNLOOP:
    976       case OPCODE_BGNSUB:
    977       case OPCODE_BRK:
    978       case OPCODE_CAL:
    979       case OPCODE_CONT:
    980       case OPCODE_DDX:
    981       case OPCODE_DDY:
    982       case OPCODE_ELSE:
    983       case OPCODE_ENDIF:
    984       case OPCODE_ENDLOOP:
    985       case OPCODE_ENDSUB:
    986       case OPCODE_IF:
    987       case OPCODE_RET:
    988 	 p->error = 1;
    989 	 i915_program_error(p, "Unsupported opcode: %s",
    990 			    _mesa_opcode_string(inst->Opcode));
    991 	 return;
    992 
    993       case OPCODE_EXP:
    994       case OPCODE_LOG:
    995 	 /* These opcodes are claimed as GLSL, NV_vp, and ARB_vp in
    996 	  * prog_instruction.h, but apparently GLSL doesn't ever emit them.
    997 	  * Instead, it translates to EX2 or LG2.
    998 	  */
    999       case OPCODE_TXD:
   1000       case OPCODE_TXL:
   1001 	 /* These opcodes are claimed by GLSL in prog_instruction.h, but
   1002 	  * only NV_vp/fp appears to emit them.
   1003 	  */
   1004       default:
   1005          i915_program_error(p, "bad opcode: %s",
   1006 			    _mesa_opcode_string(inst->Opcode));
   1007          return;
   1008       }
   1009 
   1010       inst++;
   1011       i915_release_utemps(p);
   1012    }
   1013 }
   1014 
   1015 /* Rather than trying to intercept and jiggle depth writes during
   1016  * emit, just move the value into its correct position at the end of
   1017  * the program:
   1018  */
   1019 static void
   1020 fixup_depth_write(struct i915_fragment_program *p)
   1021 {
   1022    if (p->depth_written) {
   1023       GLuint depth = UREG(REG_TYPE_OD, 0);
   1024 
   1025       i915_emit_arith(p,
   1026                       A0_MOV,
   1027                       depth, A0_DEST_CHANNEL_W, 0,
   1028                       swizzle(depth, X, Y, Z, Z), 0, 0);
   1029    }
   1030 }
   1031 
   1032 static void
   1033 check_texcoord_mapping(struct i915_fragment_program *p)
   1034 {
   1035    GLbitfield64 inputs = p->FragProg.info.inputs_read;
   1036    unsigned unit = 0;
   1037 
   1038    for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
   1039       if (inputs & VARYING_BIT_TEX(i)) {
   1040          if (unit >= p->ctx->Const.MaxTextureCoordUnits) {
   1041             unit++;
   1042             break;
   1043          }
   1044          p->texcoord_mapping[unit++] = i | TEXCOORD_TEX;
   1045       }
   1046       if (inputs & VARYING_BIT_VAR(i)) {
   1047          if (unit >= p->ctx->Const.MaxTextureCoordUnits) {
   1048             unit++;
   1049             break;
   1050          }
   1051          p->texcoord_mapping[unit++] = i | TEXCOORD_VAR;
   1052       }
   1053    }
   1054 
   1055    if (unit > p->ctx->Const.MaxTextureCoordUnits)
   1056       i915_program_error(p, "Too many texcoord units");
   1057 }
   1058 
   1059 static void
   1060 check_wpos(struct i915_fragment_program *p)
   1061 {
   1062    GLbitfield64 inputs = p->FragProg.info.inputs_read;
   1063    GLint i;
   1064    unsigned unit = 0;
   1065 
   1066    p->wpos_tex = -1;
   1067 
   1068    if ((inputs & VARYING_BIT_POS) == 0)
   1069       return;
   1070 
   1071    for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
   1072       unit += !!(inputs & VARYING_BIT_TEX(i));
   1073       unit += !!(inputs & VARYING_BIT_VAR(i));
   1074    }
   1075 
   1076    if (unit < p->ctx->Const.MaxTextureCoordUnits)
   1077       p->wpos_tex = unit;
   1078    else
   1079       i915_program_error(p, "No free texcoord for wpos value");
   1080 }
   1081 
   1082 
   1083 static void
   1084 translate_program(struct i915_fragment_program *p)
   1085 {
   1086    struct i915_context *i915 = I915_CONTEXT(p->ctx);
   1087 
   1088    if (INTEL_DEBUG & DEBUG_WM) {
   1089       printf("fp:\n");
   1090       _mesa_print_program(&p->FragProg);
   1091       printf("\n");
   1092    }
   1093 
   1094    i915_init_program(i915, p);
   1095    check_texcoord_mapping(p);
   1096    check_wpos(p);
   1097    upload_program(p);
   1098    fixup_depth_write(p);
   1099    i915_fini_program(p);
   1100 
   1101    p->translated = 1;
   1102 }
   1103 
   1104 
   1105 static void
   1106 track_params(struct i915_fragment_program *p)
   1107 {
   1108    GLint i;
   1109 
   1110    if (p->nr_params)
   1111       _mesa_load_state_parameters(p->ctx, p->FragProg.Parameters);
   1112 
   1113    for (i = 0; i < p->nr_params; i++) {
   1114       GLint reg = p->param[i].reg;
   1115       COPY_4V(p->constant[reg], p->param[i].values);
   1116    }
   1117 
   1118    p->params_uptodate = 1;
   1119    p->on_hardware = 0;          /* overkill */
   1120 }
   1121 
   1122 
   1123 static void
   1124 i915BindProgram(struct gl_context * ctx, GLenum target, struct gl_program *prog)
   1125 {
   1126    if (target == GL_FRAGMENT_PROGRAM_ARB) {
   1127       struct i915_context *i915 = I915_CONTEXT(ctx);
   1128       struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
   1129 
   1130       if (i915->current_program == p)
   1131          return;
   1132 
   1133       if (i915->current_program) {
   1134          i915->current_program->on_hardware = 0;
   1135          i915->current_program->params_uptodate = 0;
   1136       }
   1137 
   1138       i915->current_program = p;
   1139 
   1140       assert(p->on_hardware == 0);
   1141       assert(p->params_uptodate == 0);
   1142 
   1143    }
   1144 }
   1145 
   1146 static struct gl_program *
   1147 i915NewProgram(struct gl_context * ctx, GLenum target, GLuint id,
   1148                bool is_arb_asm)
   1149 {
   1150    switch (target) {
   1151    case GL_VERTEX_PROGRAM_ARB: {
   1152       struct gl_program *prog = rzalloc(NULL, struct gl_program);
   1153       return _mesa_init_gl_program(prog, target, id, is_arb_asm);
   1154    }
   1155 
   1156    case GL_FRAGMENT_PROGRAM_ARB:{
   1157          struct i915_fragment_program *prog =
   1158             rzalloc(NULL, struct i915_fragment_program);
   1159          if (prog) {
   1160             i915_init_program(I915_CONTEXT(ctx), prog);
   1161 
   1162             return _mesa_init_gl_program(&prog->FragProg, target, id,
   1163                                          is_arb_asm);
   1164          }
   1165          else
   1166             return NULL;
   1167       }
   1168 
   1169    default:
   1170       /* Just fallback:
   1171        */
   1172       return _mesa_new_program(ctx, target, id, is_arb_asm);
   1173    }
   1174 }
   1175 
   1176 static void
   1177 i915DeleteProgram(struct gl_context * ctx, struct gl_program *prog)
   1178 {
   1179    if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
   1180       struct i915_context *i915 = I915_CONTEXT(ctx);
   1181       struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
   1182 
   1183       if (i915->current_program == p)
   1184          i915->current_program = 0;
   1185    }
   1186 
   1187    _mesa_delete_program(ctx, prog);
   1188 }
   1189 
   1190 
   1191 static GLboolean
   1192 i915IsProgramNative(struct gl_context * ctx, GLenum target, struct gl_program *prog)
   1193 {
   1194    if (target == GL_FRAGMENT_PROGRAM_ARB) {
   1195       struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
   1196 
   1197       if (!p->translated)
   1198          translate_program(p);
   1199 
   1200       return !p->error;
   1201    }
   1202    else
   1203       return true;
   1204 }
   1205 
   1206 static GLboolean
   1207 i915ProgramStringNotify(struct gl_context * ctx,
   1208                         GLenum target, struct gl_program *prog)
   1209 {
   1210    if (target == GL_FRAGMENT_PROGRAM_ARB) {
   1211       struct i915_fragment_program *p = (struct i915_fragment_program *) prog;
   1212       p->translated = 0;
   1213    }
   1214 
   1215    (void) _tnl_program_string(ctx, target, prog);
   1216 
   1217    /* XXX check if program is legal, within limits */
   1218    return true;
   1219 }
   1220 
   1221 static void
   1222 i915SamplerUniformChange(struct gl_context *ctx,
   1223                          GLenum target, struct gl_program *prog)
   1224 {
   1225    i915ProgramStringNotify(ctx, target, prog);
   1226 }
   1227 
   1228 void
   1229 i915_update_program(struct gl_context *ctx)
   1230 {
   1231    struct intel_context *intel = intel_context(ctx);
   1232    struct i915_context *i915 = i915_context(&intel->ctx);
   1233    struct i915_fragment_program *fp =
   1234       (struct i915_fragment_program *) ctx->FragmentProgram._Current;
   1235 
   1236    if (i915->current_program != fp) {
   1237       if (i915->current_program) {
   1238          i915->current_program->on_hardware = 0;
   1239          i915->current_program->params_uptodate = 0;
   1240       }
   1241 
   1242       i915->current_program = fp;
   1243    }
   1244 
   1245    if (!fp->translated)
   1246       translate_program(fp);
   1247 
   1248    FALLBACK(&i915->intel, I915_FALLBACK_PROGRAM, fp->error);
   1249 }
   1250 
   1251 void
   1252 i915ValidateFragmentProgram(struct i915_context *i915)
   1253 {
   1254    struct gl_context *ctx = &i915->intel.ctx;
   1255    struct intel_context *intel = intel_context(ctx);
   1256    TNLcontext *tnl = TNL_CONTEXT(ctx);
   1257    struct vertex_buffer *VB = &tnl->vb;
   1258 
   1259    struct i915_fragment_program *p =
   1260       (struct i915_fragment_program *) ctx->FragmentProgram._Current;
   1261 
   1262    const GLbitfield64 inputsRead = p->FragProg.info.inputs_read;
   1263    GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
   1264    GLuint s2 = S2_TEXCOORD_NONE;
   1265    int i, offset = 0;
   1266 
   1267    /* Important:
   1268     */
   1269    VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
   1270 
   1271    if (!p->translated)
   1272       translate_program(p);
   1273 
   1274    intel->vertex_attr_count = 0;
   1275    intel->wpos_offset = 0;
   1276    intel->coloroffset = 0;
   1277    intel->specoffset = 0;
   1278 
   1279    if (inputsRead & VARYING_BITS_TEX_ANY || p->wpos_tex != -1) {
   1280       EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16);
   1281    }
   1282    else {
   1283       EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12);
   1284    }
   1285 
   1286    /* Handle gl_PointSize builtin var here */
   1287    if (ctx->Point._Attenuated || ctx->VertexProgram.PointSizeEnabled)
   1288       EMIT_ATTR(_TNL_ATTRIB_POINTSIZE, EMIT_1F, S4_VFMT_POINT_WIDTH, 4);
   1289 
   1290    if (inputsRead & VARYING_BIT_COL0) {
   1291       intel->coloroffset = offset / 4;
   1292       EMIT_ATTR(_TNL_ATTRIB_COLOR0, EMIT_4UB_4F_BGRA, S4_VFMT_COLOR, 4);
   1293    }
   1294 
   1295    if (inputsRead & VARYING_BIT_COL1) {
   1296        intel->specoffset = offset / 4;
   1297        EMIT_ATTR(_TNL_ATTRIB_COLOR1, EMIT_4UB_4F_BGRA, S4_VFMT_SPEC_FOG, 4);
   1298    }
   1299 
   1300    if ((inputsRead & VARYING_BIT_FOGC)) {
   1301       EMIT_ATTR(_TNL_ATTRIB_FOG, EMIT_1F, S4_VFMT_FOG_PARAM, 4);
   1302    }
   1303 
   1304    for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
   1305       if (inputsRead & VARYING_BIT_TEX(i)) {
   1306          int unit = get_texcoord_mapping(p, i | TEXCOORD_TEX);
   1307          int sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size;
   1308 
   1309          s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK);
   1310          s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz));
   1311 
   1312          EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4);
   1313       }
   1314       if (inputsRead & VARYING_BIT_VAR(i)) {
   1315          int unit = get_texcoord_mapping(p, i | TEXCOORD_VAR);
   1316          int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size;
   1317 
   1318          s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK);
   1319          s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz));
   1320 
   1321          EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4);
   1322       }
   1323       if (i == p->wpos_tex) {
   1324 	 int wpos_size = 4 * sizeof(float);
   1325          /* If WPOS is required, duplicate the XYZ position data in an
   1326           * unused texture coordinate:
   1327           */
   1328          s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
   1329          s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(wpos_size));
   1330 
   1331          intel->wpos_offset = offset;
   1332          EMIT_PAD(wpos_size);
   1333       }
   1334    }
   1335 
   1336    if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] ||
   1337        s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
   1338       I915_STATECHANGE(i915, I915_UPLOAD_CTX);
   1339 
   1340       /* Must do this *after* statechange, so as not to affect
   1341        * buffered vertices reliant on the old state:
   1342        */
   1343       intel->vertex_size = _tnl_install_attrs(&intel->ctx,
   1344                                               intel->vertex_attrs,
   1345                                               intel->vertex_attr_count,
   1346                                               intel->ViewportMatrix.m, 0);
   1347 
   1348       assert(intel->prim.current_offset == intel->prim.start_offset);
   1349       intel->prim.start_offset = (intel->prim.current_offset + intel->vertex_size-1) / intel->vertex_size * intel->vertex_size;
   1350       intel->prim.current_offset = intel->prim.start_offset;
   1351 
   1352       intel->vertex_size >>= 2;
   1353 
   1354       i915->state.Ctx[I915_CTXREG_LIS2] = s2;
   1355       i915->state.Ctx[I915_CTXREG_LIS4] = s4;
   1356 
   1357       assert(intel->vtbl.check_vertex_size(intel, intel->vertex_size));
   1358    }
   1359 
   1360    if (!p->params_uptodate)
   1361       track_params(p);
   1362 
   1363    if (!p->on_hardware)
   1364       i915_upload_program(i915, p);
   1365 
   1366    if (INTEL_DEBUG & DEBUG_WM) {
   1367       printf("i915:\n");
   1368       i915_disassemble_program(i915->state.Program, i915->state.ProgramSize);
   1369    }
   1370 }
   1371 
   1372 void
   1373 i915InitFragProgFuncs(struct dd_function_table *functions)
   1374 {
   1375    functions->BindProgram = i915BindProgram;
   1376    functions->NewProgram = i915NewProgram;
   1377    functions->DeleteProgram = i915DeleteProgram;
   1378    functions->IsProgramNative = i915IsProgramNative;
   1379    functions->ProgramStringNotify = i915ProgramStringNotify;
   1380    functions->SamplerUniformChange = i915SamplerUniformChange;
   1381 }
   1382