Home | History | Annotate | Download | only in i915
      1 /**************************************************************************
      2  *
      3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 
     29 #include <stdarg.h>
     30 
     31 #include "i915_reg.h"
     32 #include "i915_context.h"
     33 #include "i915_fpc.h"
     34 
     35 #include "pipe/p_shader_tokens.h"
     36 #include "util/u_math.h"
     37 #include "util/u_memory.h"
     38 #include "util/u_string.h"
     39 #include "tgsi/tgsi_parse.h"
     40 #include "tgsi/tgsi_dump.h"
     41 
     42 #include "draw/draw_vertex.h"
     43 
     44 #ifndef M_PI
     45 #define M_PI 3.14159265358979323846
     46 #endif
     47 
     48 /**
     49  * Simple pass-through fragment shader to use when we don't have
     50  * a real shader (or it fails to compile for some reason).
     51  */
     52 static unsigned passthrough_decl[] =
     53 {
     54    _3DSTATE_PIXEL_SHADER_PROGRAM | ((2*3)-1),
     55 
     56    /* declare input color:
     57     */
     58    (D0_DCL |
     59     (REG_TYPE_T << D0_TYPE_SHIFT) |
     60     (T_DIFFUSE << D0_NR_SHIFT) |
     61     D0_CHANNEL_ALL),
     62    0,
     63    0,
     64 };
     65 
     66 static unsigned passthrough_program[] =
     67 {
     68    /* move to output color:
     69     */
     70    (A0_MOV |
     71     (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) |
     72     A0_DEST_CHANNEL_ALL |
     73     (REG_TYPE_T << A0_SRC0_TYPE_SHIFT) |
     74     (T_DIFFUSE << A0_SRC0_NR_SHIFT)),
     75    0x01230000,			/* .xyzw */
     76    0
     77 };
     78 
     79 
     80 /* 1, -1/3!, 1/5!, -1/7! */
     81 static const float scs_sin_constants[4] = { 1.0,
     82    -1.0f / (3 * 2 * 1),
     83    1.0f / (5 * 4 * 3 * 2 * 1),
     84    -1.0f / (7 * 6 * 5 * 4 * 3 * 2 * 1)
     85 };
     86 
     87 /* 1, -1/2!, 1/4!, -1/6! */
     88 static const float scs_cos_constants[4] = { 1.0,
     89    -1.0f / (2 * 1),
     90    1.0f / (4 * 3 * 2 * 1),
     91    -1.0f / (6 * 5 * 4 * 3 * 2 * 1)
     92 };
     93 
     94 /* 2*pi, -(2*pi)^3/3!, (2*pi)^5/5!, -(2*pi)^7/7! */
     95 static const float sin_constants[4] = { 2.0 * M_PI,
     96    -8.0f * M_PI * M_PI * M_PI / (3 * 2 * 1),
     97    32.0f * M_PI * M_PI * M_PI * M_PI * M_PI / (5 * 4 * 3 * 2 * 1),
     98    -128.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (7 * 6 * 5 * 4 * 3 * 2 * 1)
     99 };
    100 
    101 /* 1, -(2*pi)^2/2!, (2*pi)^4/4!, -(2*pi)^6/6! */
    102 static const float cos_constants[4] = { 1.0,
    103    -4.0f * M_PI * M_PI / (2 * 1),
    104    16.0f * M_PI * M_PI * M_PI * M_PI / (4 * 3 * 2 * 1),
    105    -64.0f * M_PI * M_PI * M_PI * M_PI * M_PI * M_PI / (6 * 5 * 4 * 3 * 2 * 1)
    106 };
    107 
    108 
    109 
    110 /**
    111  * component-wise negation of ureg
    112  */
    113 static INLINE int
    114 negate(int reg, int x, int y, int z, int w)
    115 {
    116    /* Another neat thing about the UREG representation */
    117    return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |
    118                  ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |
    119                  ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |
    120                  ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));
    121 }
    122 
    123 
    124 /**
    125  * In the event of a translation failure, we'll generate a simple color
    126  * pass-through program.
    127  */
    128 static void
    129 i915_use_passthrough_shader(struct i915_fragment_shader *fs)
    130 {
    131    fs->program = (uint *) MALLOC(sizeof(passthrough_program));
    132    fs->decl = (uint *) MALLOC(sizeof(passthrough_decl));
    133    if (fs->program) {
    134       memcpy(fs->program, passthrough_program, sizeof(passthrough_program));
    135       memcpy(fs->decl, passthrough_decl, sizeof(passthrough_decl));
    136       fs->program_len = Elements(passthrough_program);
    137       fs->decl_len = Elements(passthrough_decl);
    138    }
    139    fs->num_constants = 0;
    140 }
    141 
    142 
    143 void
    144 i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
    145 {
    146    va_list args;
    147    char buffer[1024];
    148 
    149    debug_printf("i915_program_error: ");
    150    va_start( args, msg );
    151    util_vsnprintf( buffer, sizeof(buffer), msg, args );
    152    va_end( args );
    153    debug_printf("%s", buffer);
    154    debug_printf("\n");
    155 
    156    p->error = 1;
    157 }
    158 
    159 static uint get_mapping(struct i915_fragment_shader* fs, int unit)
    160 {
    161    int i;
    162    for (i = 0; i < I915_TEX_UNITS; i++)
    163    {
    164       if (fs->generic_mapping[i] == -1) {
    165          fs->generic_mapping[i] = unit;
    166          return i;
    167       }
    168       if (fs->generic_mapping[i] == unit)
    169          return i;
    170    }
    171    debug_printf("Exceeded max generics\n");
    172    return 0;
    173 }
    174 
    175 /**
    176  * Construct a ureg for the given source register.  Will emit
    177  * constants, apply swizzling and negation as needed.
    178  */
    179 static uint
    180 src_vector(struct i915_fp_compile *p,
    181            const struct i915_full_src_register *source,
    182            struct i915_fragment_shader* fs)
    183 {
    184    uint index = source->Register.Index;
    185    uint src = 0, sem_name, sem_ind;
    186 
    187    switch (source->Register.File) {
    188    case TGSI_FILE_TEMPORARY:
    189       if (source->Register.Index >= I915_MAX_TEMPORARY) {
    190          i915_program_error(p, "Exceeded max temporary reg");
    191          return 0;
    192       }
    193       src = UREG(REG_TYPE_R, index);
    194       break;
    195    case TGSI_FILE_INPUT:
    196       /* XXX: Packing COL1, FOGC into a single attribute works for
    197        * texenv programs, but will fail for real fragment programs
    198        * that use these attributes and expect them to be a full 4
    199        * components wide.  Could use a texcoord to pass these
    200        * attributes if necessary, but that won't work in the general
    201        * case.
    202        *
    203        * We also use a texture coordinate to pass wpos when possible.
    204        */
    205 
    206       sem_name = p->shader->info.input_semantic_name[index];
    207       sem_ind = p->shader->info.input_semantic_index[index];
    208 
    209       switch (sem_name) {
    210       case TGSI_SEMANTIC_POSITION:
    211          {
    212             /* for fragcoord */
    213             int real_tex_unit = get_mapping(fs, I915_SEMANTIC_POS);
    214             src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL);
    215             break;
    216          }
    217       case TGSI_SEMANTIC_COLOR:
    218          if (sem_ind == 0) {
    219             src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
    220          }
    221          else {
    222             /* secondary color */
    223             assert(sem_ind == 1);
    224             src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
    225             src = swizzle(src, X, Y, Z, ONE);
    226          }
    227          break;
    228       case TGSI_SEMANTIC_FOG:
    229          src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
    230          src = swizzle(src, W, W, W, W);
    231          break;
    232       case TGSI_SEMANTIC_GENERIC:
    233          {
    234             int real_tex_unit = get_mapping(fs, sem_ind);
    235             src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_ALL);
    236             break;
    237          }
    238       case TGSI_SEMANTIC_FACE:
    239          {
    240             /* for back/front faces */
    241             int real_tex_unit = get_mapping(fs, I915_SEMANTIC_FACE);
    242             src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X);
    243             break;
    244          }
    245       default:
    246          i915_program_error(p, "Bad source->Index");
    247          return 0;
    248       }
    249       break;
    250 
    251    case TGSI_FILE_IMMEDIATE:
    252       assert(index < p->num_immediates);
    253       index = p->immediates_map[index];
    254       /* fall-through */
    255    case TGSI_FILE_CONSTANT:
    256       src = UREG(REG_TYPE_CONST, index);
    257       break;
    258 
    259    default:
    260       i915_program_error(p, "Bad source->File");
    261       return 0;
    262    }
    263 
    264    src = swizzle(src,
    265 		 source->Register.SwizzleX,
    266 		 source->Register.SwizzleY,
    267 		 source->Register.SwizzleZ,
    268 		 source->Register.SwizzleW);
    269 
    270    /* There's both negate-all-components and per-component negation.
    271     * Try to handle both here.
    272     */
    273    {
    274       int n = source->Register.Negate;
    275       src = negate(src, n, n, n, n);
    276    }
    277 
    278    /* no abs() */
    279 #if 0
    280    /* XXX assertions disabled to allow arbfplight.c to run */
    281    /* XXX enable these assertions, or fix things */
    282    assert(!source->Register.Absolute);
    283 #endif
    284    if (source->Register.Absolute)
    285       debug_printf("Unhandled absolute value\n");
    286 
    287    return src;
    288 }
    289 
    290 
    291 /**
    292  * Construct a ureg for a destination register.
    293  */
    294 static uint
    295 get_result_vector(struct i915_fp_compile *p,
    296                   const struct i915_full_dst_register *dest)
    297 {
    298    switch (dest->Register.File) {
    299    case TGSI_FILE_OUTPUT:
    300       {
    301          uint sem_name = p->shader->info.output_semantic_name[dest->Register.Index];
    302          switch (sem_name) {
    303          case TGSI_SEMANTIC_POSITION:
    304             return UREG(REG_TYPE_OD, 0);
    305          case TGSI_SEMANTIC_COLOR:
    306             return UREG(REG_TYPE_OC, 0);
    307          default:
    308             i915_program_error(p, "Bad inst->DstReg.Index/semantics");
    309             return 0;
    310          }
    311       }
    312    case TGSI_FILE_TEMPORARY:
    313       return UREG(REG_TYPE_R, dest->Register.Index);
    314    default:
    315       i915_program_error(p, "Bad inst->DstReg.File");
    316       return 0;
    317    }
    318 }
    319 
    320 
    321 /**
    322  * Compute flags for saturation and writemask.
    323  */
    324 static uint
    325 get_result_flags(const struct i915_full_instruction *inst)
    326 {
    327    const uint writeMask
    328       = inst->Dst[0].Register.WriteMask;
    329    uint flags = 0x0;
    330 
    331    if (inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE)
    332       flags |= A0_DEST_SATURATE;
    333 
    334    if (writeMask & TGSI_WRITEMASK_X)
    335       flags |= A0_DEST_CHANNEL_X;
    336    if (writeMask & TGSI_WRITEMASK_Y)
    337       flags |= A0_DEST_CHANNEL_Y;
    338    if (writeMask & TGSI_WRITEMASK_Z)
    339       flags |= A0_DEST_CHANNEL_Z;
    340    if (writeMask & TGSI_WRITEMASK_W)
    341       flags |= A0_DEST_CHANNEL_W;
    342 
    343    return flags;
    344 }
    345 
    346 
    347 /**
    348  * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token
    349  */
    350 static uint
    351 translate_tex_src_target(struct i915_fp_compile *p, uint tex)
    352 {
    353    switch (tex) {
    354    case TGSI_TEXTURE_SHADOW1D:
    355       /* fall-through */
    356    case TGSI_TEXTURE_1D:
    357       return D0_SAMPLE_TYPE_2D;
    358 
    359    case TGSI_TEXTURE_SHADOW2D:
    360       /* fall-through */
    361    case TGSI_TEXTURE_2D:
    362       return D0_SAMPLE_TYPE_2D;
    363 
    364    case TGSI_TEXTURE_SHADOWRECT:
    365       /* fall-through */
    366    case TGSI_TEXTURE_RECT:
    367       return D0_SAMPLE_TYPE_2D;
    368 
    369    case TGSI_TEXTURE_3D:
    370       return D0_SAMPLE_TYPE_VOLUME;
    371 
    372    case TGSI_TEXTURE_CUBE:
    373       return D0_SAMPLE_TYPE_CUBE;
    374 
    375    default:
    376       i915_program_error(p, "TexSrc type");
    377       return 0;
    378    }
    379 }
    380 
    381 /**
    382  * Return the number of coords needed to access a given TGSI_TEXTURE_*
    383  */
    384 static uint
    385 texture_num_coords(struct i915_fp_compile *p, uint tex)
    386 {
    387    switch (tex) {
    388    case TGSI_TEXTURE_SHADOW1D:
    389    case TGSI_TEXTURE_1D:
    390       return 1;
    391 
    392    case TGSI_TEXTURE_SHADOW2D:
    393    case TGSI_TEXTURE_2D:
    394    case TGSI_TEXTURE_SHADOWRECT:
    395    case TGSI_TEXTURE_RECT:
    396       return 2;
    397 
    398    case TGSI_TEXTURE_3D:
    399    case TGSI_TEXTURE_CUBE:
    400       return 3;
    401 
    402    default:
    403       i915_program_error(p, "Num coords");
    404       return 2;
    405    }
    406 }
    407 
    408 
    409 /**
    410  * Generate texel lookup instruction.
    411  */
    412 static void
    413 emit_tex(struct i915_fp_compile *p,
    414          const struct i915_full_instruction *inst,
    415          uint opcode,
    416          struct i915_fragment_shader* fs)
    417 {
    418    uint texture = inst->Texture.Texture;
    419    uint unit = inst->Src[1].Register.Index;
    420    uint tex = translate_tex_src_target( p, texture );
    421    uint sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
    422    uint coord = src_vector( p, &inst->Src[0], fs);
    423 
    424    i915_emit_texld( p,
    425                     get_result_vector( p, &inst->Dst[0] ),
    426                     get_result_flags( inst ),
    427                     sampler,
    428                     coord,
    429                     opcode,
    430                     texture_num_coords(p, texture) );
    431 }
    432 
    433 
    434 /**
    435  * Generate a simple arithmetic instruction
    436  * \param opcode  the i915 opcode
    437  * \param numArgs  the number of input/src arguments
    438  */
    439 static void
    440 emit_simple_arith(struct i915_fp_compile *p,
    441                   const struct i915_full_instruction *inst,
    442                   uint opcode, uint numArgs,
    443                   struct i915_fragment_shader* fs)
    444 {
    445    uint arg1, arg2, arg3;
    446 
    447    assert(numArgs <= 3);
    448 
    449    arg1 = (numArgs < 1) ? 0 : src_vector( p, &inst->Src[0], fs );
    450    arg2 = (numArgs < 2) ? 0 : src_vector( p, &inst->Src[1], fs );
    451    arg3 = (numArgs < 3) ? 0 : src_vector( p, &inst->Src[2], fs );
    452 
    453    i915_emit_arith( p,
    454                     opcode,
    455                     get_result_vector( p, &inst->Dst[0]),
    456                     get_result_flags( inst ), 0,
    457                     arg1,
    458                     arg2,
    459                     arg3 );
    460 }
    461 
    462 
    463 /** As above, but swap the first two src regs */
    464 static void
    465 emit_simple_arith_swap2(struct i915_fp_compile *p,
    466                         const struct i915_full_instruction *inst,
    467                         uint opcode, uint numArgs,
    468                         struct i915_fragment_shader* fs)
    469 {
    470    struct i915_full_instruction inst2;
    471 
    472    assert(numArgs == 2);
    473 
    474    /* transpose first two registers */
    475    inst2 = *inst;
    476    inst2.Src[0] = inst->Src[1];
    477    inst2.Src[1] = inst->Src[0];
    478 
    479    emit_simple_arith(p, &inst2, opcode, numArgs, fs);
    480 }
    481 
    482 /*
    483  * Translate TGSI instruction to i915 instruction.
    484  *
    485  * Possible concerns:
    486  *
    487  * DDX, DDY -- return 0
    488  * SIN, COS -- could use another taylor step?
    489  * LIT      -- results seem a little different to sw mesa
    490  * LOG      -- different to mesa on negative numbers, but this is conformant.
    491  */
    492 static void
    493 i915_translate_instruction(struct i915_fp_compile *p,
    494                            const struct i915_full_instruction *inst,
    495                            struct i915_fragment_shader *fs)
    496 {
    497    uint writemask;
    498    uint src0, src1, src2, flags;
    499    uint tmp = 0;
    500 
    501    switch (inst->Instruction.Opcode) {
    502    case TGSI_OPCODE_ABS:
    503       src0 = src_vector(p, &inst->Src[0], fs);
    504       i915_emit_arith(p,
    505                       A0_MAX,
    506                       get_result_vector(p, &inst->Dst[0]),
    507                       get_result_flags(inst), 0,
    508                       src0, negate(src0, 1, 1, 1, 1), 0);
    509       break;
    510 
    511    case TGSI_OPCODE_ADD:
    512       emit_simple_arith(p, inst, A0_ADD, 2, fs);
    513       break;
    514 
    515    case TGSI_OPCODE_CEIL:
    516       src0 = src_vector(p, &inst->Src[0], fs);
    517       tmp = i915_get_utemp(p);
    518       flags = get_result_flags(inst);
    519       i915_emit_arith(p,
    520                       A0_FLR,
    521                       tmp,
    522                       flags & A0_DEST_CHANNEL_ALL, 0,
    523                       negate(src0, 1, 1, 1, 1), 0, 0);
    524       i915_emit_arith(p,
    525                       A0_MOV,
    526                       get_result_vector(p, &inst->Dst[0]),
    527                       flags, 0,
    528                       negate(tmp, 1, 1, 1, 1), 0, 0);
    529       break;
    530 
    531    case TGSI_OPCODE_CMP:
    532       src0 = src_vector(p, &inst->Src[0], fs);
    533       src1 = src_vector(p, &inst->Src[1], fs);
    534       src2 = src_vector(p, &inst->Src[2], fs);
    535       i915_emit_arith(p, A0_CMP,
    536                       get_result_vector(p, &inst->Dst[0]),
    537                       get_result_flags(inst),
    538                       0, src0, src2, src1);   /* NOTE: order of src2, src1 */
    539       break;
    540 
    541    case TGSI_OPCODE_COS:
    542       src0 = src_vector(p, &inst->Src[0], fs);
    543       tmp = i915_get_utemp(p);
    544 
    545       i915_emit_arith(p,
    546                       A0_MUL,
    547                       tmp, A0_DEST_CHANNEL_X, 0,
    548                       src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0);
    549 
    550       i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
    551 
    552       /*
    553        * t0.xy = MUL x.xx11, x.x111  ; x^2, x, 1, 1
    554        * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
    555        * t0 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
    556        * result = DP4 t0, cos_constants
    557        */
    558       i915_emit_arith(p,
    559                       A0_MUL,
    560                       tmp, A0_DEST_CHANNEL_XY, 0,
    561                       swizzle(tmp, X, X, ONE, ONE),
    562                       swizzle(tmp, X, ONE, ONE, ONE), 0);
    563 
    564       i915_emit_arith(p,
    565                       A0_MUL,
    566                       tmp, A0_DEST_CHANNEL_XYZ, 0,
    567                       swizzle(tmp, X, Y, X, ONE),
    568                       swizzle(tmp, X, X, ONE, ONE), 0);
    569 
    570       i915_emit_arith(p,
    571                       A0_MUL,
    572                       tmp, A0_DEST_CHANNEL_XYZ, 0,
    573                       swizzle(tmp, X, X, Z, ONE),
    574                       swizzle(tmp, Z, ONE, ONE, ONE), 0);
    575 
    576       i915_emit_arith(p,
    577                       A0_DP4,
    578                       get_result_vector(p, &inst->Dst[0]),
    579                       get_result_flags(inst), 0,
    580                       swizzle(tmp, ONE, Z, Y, X),
    581                       i915_emit_const4fv(p, cos_constants), 0);
    582       break;
    583 
    584   case TGSI_OPCODE_DDX:
    585   case TGSI_OPCODE_DDY:
    586       /* XXX We just output 0 here */
    587       debug_printf("Punting DDX/DDX\n");
    588       src0 = get_result_vector(p, &inst->Dst[0]);
    589       i915_emit_arith(p,
    590                       A0_MOV,
    591                       get_result_vector(p, &inst->Dst[0]),
    592                       get_result_flags(inst), 0,
    593                       swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0);
    594       break;
    595 
    596   case TGSI_OPCODE_DP2:
    597       src0 = src_vector(p, &inst->Src[0], fs);
    598       src1 = src_vector(p, &inst->Src[1], fs);
    599 
    600       i915_emit_arith(p,
    601                       A0_DP3,
    602                       get_result_vector(p, &inst->Dst[0]),
    603                       get_result_flags(inst), 0,
    604                       swizzle(src0, X, Y, ZERO, ZERO), src1, 0);
    605       break;
    606 
    607    case TGSI_OPCODE_DP3:
    608       emit_simple_arith(p, inst, A0_DP3, 2, fs);
    609       break;
    610 
    611    case TGSI_OPCODE_DP4:
    612       emit_simple_arith(p, inst, A0_DP4, 2, fs);
    613       break;
    614 
    615    case TGSI_OPCODE_DPH:
    616       src0 = src_vector(p, &inst->Src[0], fs);
    617       src1 = src_vector(p, &inst->Src[1], fs);
    618 
    619       i915_emit_arith(p,
    620                       A0_DP4,
    621                       get_result_vector(p, &inst->Dst[0]),
    622                       get_result_flags(inst), 0,
    623                       swizzle(src0, X, Y, Z, ONE), src1, 0);
    624       break;
    625 
    626    case TGSI_OPCODE_DST:
    627       src0 = src_vector(p, &inst->Src[0], fs);
    628       src1 = src_vector(p, &inst->Src[1], fs);
    629 
    630       /* result[0] = 1    * 1;
    631        * result[1] = a[1] * b[1];
    632        * result[2] = a[2] * 1;
    633        * result[3] = 1    * b[3];
    634        */
    635       i915_emit_arith(p,
    636                       A0_MUL,
    637                       get_result_vector(p, &inst->Dst[0]),
    638                       get_result_flags(inst), 0,
    639                       swizzle(src0, ONE, Y, Z, ONE),
    640                       swizzle(src1, ONE, Y, ONE, W), 0);
    641       break;
    642 
    643    case TGSI_OPCODE_END:
    644       /* no-op */
    645       break;
    646 
    647    case TGSI_OPCODE_EX2:
    648       src0 = src_vector(p, &inst->Src[0], fs);
    649 
    650       i915_emit_arith(p,
    651                       A0_EXP,
    652                       get_result_vector(p, &inst->Dst[0]),
    653                       get_result_flags(inst), 0,
    654                       swizzle(src0, X, X, X, X), 0, 0);
    655       break;
    656 
    657    case TGSI_OPCODE_FLR:
    658       emit_simple_arith(p, inst, A0_FLR, 1, fs);
    659       break;
    660 
    661    case TGSI_OPCODE_FRC:
    662       emit_simple_arith(p, inst, A0_FRC, 1, fs);
    663       break;
    664 
    665    case TGSI_OPCODE_KIL:
    666       /* kill if src[0].x < 0 || src[0].y < 0 ... */
    667       src0 = src_vector(p, &inst->Src[0], fs);
    668       tmp = i915_get_utemp(p);
    669 
    670       i915_emit_texld(p,
    671                       tmp,                   /* dest reg: a dummy reg */
    672                       A0_DEST_CHANNEL_ALL,   /* dest writemask */
    673                       0,                     /* sampler */
    674                       src0,                  /* coord*/
    675                       T0_TEXKILL,            /* opcode */
    676                       1);                    /* num_coord */
    677       break;
    678 
    679    case TGSI_OPCODE_KILP:
    680       /* We emit an unconditional kill; we may want to revisit
    681        * if we ever implement conditionals.
    682        */
    683       tmp = i915_get_utemp(p);
    684 
    685       i915_emit_texld(p,
    686                       tmp,                                   /* dest reg: a dummy reg */
    687                       A0_DEST_CHANNEL_ALL,                   /* dest writemask */
    688                       0,                                     /* sampler */
    689                       negate(swizzle(0, ONE, ONE, ONE, ONE), 1, 1, 1, 1), /* coord */
    690                       T0_TEXKILL,                            /* opcode */
    691                       1);                                    /* num_coord */
    692       break;
    693 
    694    case TGSI_OPCODE_LG2:
    695       src0 = src_vector(p, &inst->Src[0], fs);
    696 
    697       i915_emit_arith(p,
    698                       A0_LOG,
    699                       get_result_vector(p, &inst->Dst[0]),
    700                       get_result_flags(inst), 0,
    701                       swizzle(src0, X, X, X, X), 0, 0);
    702       break;
    703 
    704    case TGSI_OPCODE_LIT:
    705       src0 = src_vector(p, &inst->Src[0], fs);
    706       tmp = i915_get_utemp(p);
    707 
    708       /* tmp = max( a.xyzw, a.00zw )
    709        * XXX: Clamp tmp.w to -128..128
    710        * tmp.y = log(tmp.y)
    711        * tmp.y = tmp.w * tmp.y
    712        * tmp.y = exp(tmp.y)
    713        * result = cmp (a.11-x1, a.1x01, a.1xy1 )
    714        */
    715       i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0,
    716                       src0, swizzle(src0, ZERO, ZERO, Z, W), 0);
    717 
    718       i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
    719                       swizzle(tmp, Y, Y, Y, Y), 0, 0);
    720 
    721       i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
    722                       swizzle(tmp, ZERO, Y, ZERO, ZERO),
    723                       swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
    724 
    725       i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
    726                       swizzle(tmp, Y, Y, Y, Y), 0, 0);
    727 
    728       i915_emit_arith(p, A0_CMP,
    729                       get_result_vector(p, &inst->Dst[0]),
    730                       get_result_flags(inst), 0,
    731                       negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
    732                       swizzle(tmp, ONE, X, ZERO, ONE),
    733                       swizzle(tmp, ONE, X, Y, ONE));
    734 
    735       break;
    736 
    737    case TGSI_OPCODE_LRP:
    738       src0 = src_vector(p, &inst->Src[0], fs);
    739       src1 = src_vector(p, &inst->Src[1], fs);
    740       src2 = src_vector(p, &inst->Src[2], fs);
    741       flags = get_result_flags(inst);
    742       tmp = i915_get_utemp(p);
    743 
    744       /* b*a + c*(1-a)
    745        *
    746        * b*a + c - ca
    747        *
    748        * tmp = b*a + c,
    749        * result = (-c)*a + tmp
    750        */
    751       i915_emit_arith(p, A0_MAD, tmp,
    752                       flags & A0_DEST_CHANNEL_ALL, 0, src1, src0, src2);
    753 
    754       i915_emit_arith(p, A0_MAD,
    755                       get_result_vector(p, &inst->Dst[0]),
    756                       flags, 0, negate(src2, 1, 1, 1, 1), src0, tmp);
    757       break;
    758 
    759    case TGSI_OPCODE_MAD:
    760       emit_simple_arith(p, inst, A0_MAD, 3, fs);
    761       break;
    762 
    763    case TGSI_OPCODE_MAX:
    764       emit_simple_arith(p, inst, A0_MAX, 2, fs);
    765       break;
    766 
    767    case TGSI_OPCODE_MIN:
    768       src0 = src_vector(p, &inst->Src[0], fs);
    769       src1 = src_vector(p, &inst->Src[1], fs);
    770       tmp = i915_get_utemp(p);
    771       flags = get_result_flags(inst);
    772 
    773       i915_emit_arith(p,
    774                       A0_MAX,
    775                       tmp, flags & A0_DEST_CHANNEL_ALL, 0,
    776                       negate(src0, 1, 1, 1, 1),
    777                       negate(src1, 1, 1, 1, 1), 0);
    778 
    779       i915_emit_arith(p,
    780                       A0_MOV,
    781                       get_result_vector(p, &inst->Dst[0]),
    782                       flags, 0, negate(tmp, 1, 1, 1, 1), 0, 0);
    783       break;
    784 
    785    case TGSI_OPCODE_MOV:
    786       emit_simple_arith(p, inst, A0_MOV, 1, fs);
    787       break;
    788 
    789    case TGSI_OPCODE_MUL:
    790       emit_simple_arith(p, inst, A0_MUL, 2, fs);
    791       break;
    792 
    793    case TGSI_OPCODE_NOP:
    794       break;
    795 
    796    case TGSI_OPCODE_POW:
    797       src0 = src_vector(p, &inst->Src[0], fs);
    798       src1 = src_vector(p, &inst->Src[1], fs);
    799       tmp = i915_get_utemp(p);
    800       flags = get_result_flags(inst);
    801 
    802       /* XXX: masking on intermediate values, here and elsewhere.
    803        */
    804       i915_emit_arith(p,
    805                       A0_LOG,
    806                       tmp, A0_DEST_CHANNEL_X, 0,
    807                       swizzle(src0, X, X, X, X), 0, 0);
    808 
    809       i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
    810 
    811       i915_emit_arith(p,
    812                       A0_EXP,
    813                       get_result_vector(p, &inst->Dst[0]),
    814                       flags, 0, swizzle(tmp, X, X, X, X), 0, 0);
    815       break;
    816 
    817    case TGSI_OPCODE_RET:
    818       /* XXX: no-op? */
    819       break;
    820 
    821    case TGSI_OPCODE_RCP:
    822       src0 = src_vector(p, &inst->Src[0], fs);
    823 
    824       i915_emit_arith(p,
    825                       A0_RCP,
    826                       get_result_vector(p, &inst->Dst[0]),
    827                       get_result_flags(inst), 0,
    828                       swizzle(src0, X, X, X, X), 0, 0);
    829       break;
    830 
    831    case TGSI_OPCODE_RSQ:
    832       src0 = src_vector(p, &inst->Src[0], fs);
    833 
    834       i915_emit_arith(p,
    835                       A0_RSQ,
    836                       get_result_vector(p, &inst->Dst[0]),
    837                       get_result_flags(inst), 0,
    838                       swizzle(src0, X, X, X, X), 0, 0);
    839       break;
    840 
    841    case TGSI_OPCODE_SCS:
    842       src0 = src_vector(p, &inst->Src[0], fs);
    843       tmp = i915_get_utemp(p);
    844 
    845       /*
    846        * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
    847        * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
    848        * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
    849        * scs.x = DP4 t1, scs_sin_constants
    850        * t1 = MUL t0.xxz1 t0.z111    ; x^6 x^4 x^2 1
    851        * scs.y = DP4 t1, scs_cos_constants
    852        */
    853       i915_emit_arith(p,
    854                       A0_MUL,
    855                       tmp, A0_DEST_CHANNEL_XY, 0,
    856                       swizzle(src0, X, X, ONE, ONE),
    857                       swizzle(src0, X, ONE, ONE, ONE), 0);
    858 
    859       i915_emit_arith(p,
    860                       A0_MUL,
    861                       tmp, A0_DEST_CHANNEL_ALL, 0,
    862                       swizzle(tmp, X, Y, X, Y),
    863                       swizzle(tmp, X, X, ONE, ONE), 0);
    864 
    865       writemask = inst->Dst[0].Register.WriteMask;
    866 
    867       if (writemask & TGSI_WRITEMASK_Y) {
    868          uint tmp1;
    869 
    870          if (writemask & TGSI_WRITEMASK_X)
    871             tmp1 = i915_get_utemp(p);
    872          else
    873             tmp1 = tmp;
    874 
    875          i915_emit_arith(p,
    876                          A0_MUL,
    877                          tmp1, A0_DEST_CHANNEL_ALL, 0,
    878                          swizzle(tmp, X, Y, Y, W),
    879                          swizzle(tmp, X, Z, ONE, ONE), 0);
    880 
    881          i915_emit_arith(p,
    882                          A0_DP4,
    883                          get_result_vector(p, &inst->Dst[0]),
    884                          A0_DEST_CHANNEL_Y, 0,
    885                          swizzle(tmp1, W, Z, Y, X),
    886                          i915_emit_const4fv(p, scs_sin_constants), 0);
    887       }
    888 
    889       if (writemask & TGSI_WRITEMASK_X) {
    890          i915_emit_arith(p,
    891                          A0_MUL,
    892                          tmp, A0_DEST_CHANNEL_XYZ, 0,
    893                          swizzle(tmp, X, X, Z, ONE),
    894                          swizzle(tmp, Z, ONE, ONE, ONE), 0);
    895 
    896          i915_emit_arith(p,
    897                          A0_DP4,
    898                          get_result_vector(p, &inst->Dst[0]),
    899                          A0_DEST_CHANNEL_X, 0,
    900                          swizzle(tmp, ONE, Z, Y, X),
    901                          i915_emit_const4fv(p, scs_cos_constants), 0);
    902       }
    903       break;
    904 
    905    case TGSI_OPCODE_SEQ:
    906       /* if we're both >= and <= then we're == */
    907       src0 = src_vector(p, &inst->Src[0], fs);
    908       src1 = src_vector(p, &inst->Src[1], fs);
    909       tmp = i915_get_utemp(p);
    910 
    911       i915_emit_arith(p,
    912                       A0_SGE,
    913                       tmp, A0_DEST_CHANNEL_ALL, 0,
    914                       src0,
    915                       src1, 0);
    916 
    917       i915_emit_arith(p,
    918                       A0_SGE,
    919                       get_result_vector(p, &inst->Dst[0]),
    920                       A0_DEST_CHANNEL_ALL, 0,
    921                       src1,
    922                       src0, 0);
    923 
    924       i915_emit_arith(p,
    925                       A0_MUL,
    926                       get_result_vector(p, &inst->Dst[0]),
    927                       A0_DEST_CHANNEL_ALL, 0,
    928                       get_result_vector(p, &inst->Dst[0]),
    929                       tmp, 0);
    930 
    931       break;
    932 
    933    case TGSI_OPCODE_SGE:
    934       emit_simple_arith(p, inst, A0_SGE, 2, fs);
    935       break;
    936 
    937    case TGSI_OPCODE_SIN:
    938       src0 = src_vector(p, &inst->Src[0], fs);
    939       tmp = i915_get_utemp(p);
    940 
    941       i915_emit_arith(p,
    942                       A0_MUL,
    943                       tmp, A0_DEST_CHANNEL_X, 0,
    944                       src0, i915_emit_const1f(p, 1.0f / (float) (M_PI * 2.0)), 0);
    945 
    946       i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
    947 
    948       /*
    949        * t0.xy = MUL x.xx11, x.x1111  ; x^2, x, 1, 1
    950        * t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
    951        * t1 = MUL t0.xyyw t0.yz11    ; x^7 x^5 x^3 x
    952        * result = DP4 t1.wzyx, sin_constants
    953        */
    954       i915_emit_arith(p,
    955                       A0_MUL,
    956                       tmp, A0_DEST_CHANNEL_XY, 0,
    957                       swizzle(tmp, X, X, ONE, ONE),
    958                       swizzle(tmp, X, ONE, ONE, ONE), 0);
    959 
    960       i915_emit_arith(p,
    961                       A0_MUL,
    962                       tmp, A0_DEST_CHANNEL_ALL, 0,
    963                       swizzle(tmp, X, Y, X, Y),
    964                       swizzle(tmp, X, X, ONE, ONE), 0);
    965 
    966       i915_emit_arith(p,
    967                       A0_MUL,
    968                       tmp, A0_DEST_CHANNEL_ALL, 0,
    969                       swizzle(tmp, X, Y, Y, W),
    970                       swizzle(tmp, X, Z, ONE, ONE), 0);
    971 
    972       i915_emit_arith(p,
    973                       A0_DP4,
    974                       get_result_vector(p, &inst->Dst[0]),
    975                       get_result_flags(inst), 0,
    976                       swizzle(tmp, W, Z, Y, X),
    977                       i915_emit_const4fv(p, sin_constants), 0);
    978       break;
    979 
    980    case TGSI_OPCODE_SLE:
    981       /* like SGE, but swap reg0, reg1 */
    982       emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs);
    983       break;
    984 
    985    case TGSI_OPCODE_SLT:
    986       emit_simple_arith(p, inst, A0_SLT, 2, fs);
    987       break;
    988 
    989    case TGSI_OPCODE_SGT:
    990       /* like SLT, but swap reg0, reg1 */
    991       emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs);
    992       break;
    993 
    994    case TGSI_OPCODE_SNE:
    995       /* if we're < or > then we're != */
    996       src0 = src_vector(p, &inst->Src[0], fs);
    997       src1 = src_vector(p, &inst->Src[1], fs);
    998       tmp = i915_get_utemp(p);
    999 
   1000       i915_emit_arith(p,
   1001                       A0_SLT,
   1002                       tmp,
   1003                       A0_DEST_CHANNEL_ALL, 0,
   1004                       src0,
   1005                       src1, 0);
   1006 
   1007       i915_emit_arith(p,
   1008                       A0_SLT,
   1009                       get_result_vector(p, &inst->Dst[0]),
   1010                       A0_DEST_CHANNEL_ALL, 0,
   1011                       src1,
   1012                       src0, 0);
   1013 
   1014       i915_emit_arith(p,
   1015                       A0_ADD,
   1016                       get_result_vector(p, &inst->Dst[0]),
   1017                       A0_DEST_CHANNEL_ALL, 0,
   1018                       get_result_vector(p, &inst->Dst[0]),
   1019                       tmp, 0);
   1020       break;
   1021 
   1022    case TGSI_OPCODE_SSG:
   1023       /* compute (src>0) - (src<0) */
   1024       src0 = src_vector(p, &inst->Src[0], fs);
   1025       tmp = i915_get_utemp(p);
   1026 
   1027       i915_emit_arith(p,
   1028                       A0_SLT,
   1029                       tmp,
   1030                       A0_DEST_CHANNEL_ALL, 0,
   1031                       src0,
   1032                       swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0);
   1033 
   1034       i915_emit_arith(p,
   1035                       A0_SLT,
   1036                       get_result_vector(p, &inst->Dst[0]),
   1037                       A0_DEST_CHANNEL_ALL, 0,
   1038                       swizzle(src0, ZERO, ZERO, ZERO, ZERO),
   1039                       src0, 0);
   1040 
   1041       i915_emit_arith(p,
   1042                       A0_ADD,
   1043                       get_result_vector(p, &inst->Dst[0]),
   1044                       A0_DEST_CHANNEL_ALL, 0,
   1045                       get_result_vector(p, &inst->Dst[0]),
   1046                       negate(tmp, 1, 1, 1, 1), 0);
   1047       break;
   1048 
   1049    case TGSI_OPCODE_SUB:
   1050       src0 = src_vector(p, &inst->Src[0], fs);
   1051       src1 = src_vector(p, &inst->Src[1], fs);
   1052 
   1053       i915_emit_arith(p,
   1054                       A0_ADD,
   1055                       get_result_vector(p, &inst->Dst[0]),
   1056                       get_result_flags(inst), 0,
   1057                       src0, negate(src1, 1, 1, 1, 1), 0);
   1058       break;
   1059 
   1060    case TGSI_OPCODE_TEX:
   1061       emit_tex(p, inst, T0_TEXLD, fs);
   1062       break;
   1063 
   1064    case TGSI_OPCODE_TRUNC:
   1065       emit_simple_arith(p, inst, A0_TRC, 1, fs);
   1066       break;
   1067 
   1068    case TGSI_OPCODE_TXB:
   1069       emit_tex(p, inst, T0_TEXLDB, fs);
   1070       break;
   1071 
   1072    case TGSI_OPCODE_TXP:
   1073       emit_tex(p, inst, T0_TEXLDP, fs);
   1074       break;
   1075 
   1076    case TGSI_OPCODE_XPD:
   1077       /* Cross product:
   1078        *      result.x = src0.y * src1.z - src0.z * src1.y;
   1079        *      result.y = src0.z * src1.x - src0.x * src1.z;
   1080        *      result.z = src0.x * src1.y - src0.y * src1.x;
   1081        *      result.w = undef;
   1082        */
   1083       src0 = src_vector(p, &inst->Src[0], fs);
   1084       src1 = src_vector(p, &inst->Src[1], fs);
   1085       tmp = i915_get_utemp(p);
   1086 
   1087       i915_emit_arith(p,
   1088                       A0_MUL,
   1089                       tmp, A0_DEST_CHANNEL_ALL, 0,
   1090                       swizzle(src0, Z, X, Y, ONE),
   1091                       swizzle(src1, Y, Z, X, ONE), 0);
   1092 
   1093       i915_emit_arith(p,
   1094                       A0_MAD,
   1095                       get_result_vector(p, &inst->Dst[0]),
   1096                       get_result_flags(inst), 0,
   1097                       swizzle(src0, Y, Z, X, ONE),
   1098                       swizzle(src1, Z, X, Y, ONE),
   1099                       negate(tmp, 1, 1, 1, 0));
   1100       break;
   1101 
   1102    default:
   1103       i915_program_error(p, "bad opcode %d", inst->Instruction.Opcode);
   1104       p->error = 1;
   1105       return;
   1106    }
   1107 
   1108    i915_release_utemps(p);
   1109 }
   1110 
   1111 
   1112 static void i915_translate_token(struct i915_fp_compile *p,
   1113                                  const union i915_full_token* token,
   1114                                  struct i915_fragment_shader *fs)
   1115 {
   1116    struct i915_fragment_shader *ifs = p->shader;
   1117    switch( token->Token.Type ) {
   1118    case TGSI_TOKEN_TYPE_PROPERTY:
   1119       /*
   1120        * We only support one cbuf, but we still need to ignore the property
   1121        * correctly so we don't hit the assert at the end of the switch case.
   1122        */
   1123       assert(token->FullProperty.Property.PropertyName ==
   1124              TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS);
   1125       break;
   1126 
   1127    case TGSI_TOKEN_TYPE_DECLARATION:
   1128       if (token->FullDeclaration.Declaration.File
   1129                == TGSI_FILE_CONSTANT) {
   1130          uint i;
   1131          for (i = token->FullDeclaration.Range.First;
   1132               i <= token->FullDeclaration.Range.Last;
   1133               i++) {
   1134             assert(ifs->constant_flags[i] == 0x0);
   1135             ifs->constant_flags[i] = I915_CONSTFLAG_USER;
   1136             ifs->num_constants = MAX2(ifs->num_constants, i + 1);
   1137          }
   1138       }
   1139       else if (token->FullDeclaration.Declaration.File
   1140                == TGSI_FILE_TEMPORARY) {
   1141          uint i;
   1142          for (i = token->FullDeclaration.Range.First;
   1143               i <= token->FullDeclaration.Range.Last;
   1144               i++) {
   1145             if (i >= I915_MAX_TEMPORARY)
   1146                debug_printf("Too many temps (%d)\n",i);
   1147             else
   1148                /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
   1149                p->temp_flag |= (1 << i); /* mark temp as used */
   1150          }
   1151       }
   1152       break;
   1153 
   1154    case TGSI_TOKEN_TYPE_IMMEDIATE:
   1155       {
   1156          const struct tgsi_full_immediate *imm
   1157             = &token->FullImmediate;
   1158          const uint pos = p->num_immediates++;
   1159          uint j;
   1160          assert( imm->Immediate.NrTokens <= 4 + 1 );
   1161          for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
   1162             p->immediates[pos][j] = imm->u[j].Float;
   1163          }
   1164       }
   1165       break;
   1166 
   1167    case TGSI_TOKEN_TYPE_INSTRUCTION:
   1168       if (p->first_instruction) {
   1169          /* resolve location of immediates */
   1170          uint i, j;
   1171          for (i = 0; i < p->num_immediates; i++) {
   1172             /* find constant slot for this immediate */
   1173             for (j = 0; j < I915_MAX_CONSTANT; j++) {
   1174                if (ifs->constant_flags[j] == 0x0) {
   1175                   memcpy(ifs->constants[j],
   1176                          p->immediates[i],
   1177                          4 * sizeof(float));
   1178                   /*printf("immediate %d maps to const %d\n", i, j);*/
   1179                   ifs->constant_flags[j] = 0xf;  /* all four comps used */
   1180                   p->immediates_map[i] = j;
   1181                   ifs->num_constants = MAX2(ifs->num_constants, j + 1);
   1182                   break;
   1183                }
   1184             }
   1185          }
   1186 
   1187          p->first_instruction = FALSE;
   1188       }
   1189 
   1190       i915_translate_instruction(p, &token->FullInstruction, fs);
   1191       break;
   1192 
   1193    default:
   1194       assert( 0 );
   1195    }
   1196 
   1197 }
   1198 
   1199 /**
   1200  * Translate TGSI fragment shader into i915 hardware instructions.
   1201  * \param p  the translation state
   1202  * \param tokens  the TGSI token array
   1203  */
   1204 static void
   1205 i915_translate_instructions(struct i915_fp_compile *p,
   1206                             const struct i915_token_list *tokens,
   1207                             struct i915_fragment_shader *fs)
   1208 {
   1209    int i;
   1210    for(i = 0; i<tokens->NumTokens; i++) {
   1211       i915_translate_token(p, &tokens->Tokens[i], fs);
   1212    }
   1213 }
   1214 
   1215 
   1216 static struct i915_fp_compile *
   1217 i915_init_compile(struct i915_context *i915,
   1218                   struct i915_fragment_shader *ifs)
   1219 {
   1220    struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
   1221    int i;
   1222 
   1223    p->shader = ifs;
   1224 
   1225    /* Put new constants at end of const buffer, growing downward.
   1226     * The problem is we don't know how many user-defined constants might
   1227     * be specified with pipe->set_constant_buffer().
   1228     * Should pre-scan the user's program to determine the highest-numbered
   1229     * constant referenced.
   1230     */
   1231    ifs->num_constants = 0;
   1232    memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));
   1233 
   1234    memset(&p->register_phases, 0, sizeof(p->register_phases));
   1235 
   1236    for (i = 0; i < I915_TEX_UNITS; i++)
   1237       ifs->generic_mapping[i] = -1;
   1238 
   1239    p->first_instruction = TRUE;
   1240 
   1241    p->nr_tex_indirect = 1;      /* correct? */
   1242    p->nr_tex_insn = 0;
   1243    p->nr_alu_insn = 0;
   1244    p->nr_decl_insn = 0;
   1245 
   1246    p->csr = p->program;
   1247    p->decl = p->declarations;
   1248    p->decl_s = 0;
   1249    p->decl_t = 0;
   1250    p->temp_flag = ~0x0 << I915_MAX_TEMPORARY;
   1251    p->utemp_flag = ~0x7;
   1252 
   1253    /* initialize the first program word */
   1254    *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
   1255 
   1256    return p;
   1257 }
   1258 
   1259 
   1260 /* Copy compile results to the fragment program struct and destroy the
   1261  * compilation context.
   1262  */
   1263 static void
   1264 i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
   1265 {
   1266    struct i915_fragment_shader *ifs = p->shader;
   1267    unsigned long program_size = (unsigned long) (p->csr - p->program);
   1268    unsigned long decl_size = (unsigned long) (p->decl - p->declarations);
   1269 
   1270    if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT)
   1271       debug_printf("Exceeded max nr indirect texture lookups\n");
   1272 
   1273    if (p->nr_tex_insn > I915_MAX_TEX_INSN)
   1274       i915_program_error(p, "Exceeded max TEX instructions");
   1275 
   1276    if (p->nr_alu_insn > I915_MAX_ALU_INSN)
   1277       i915_program_error(p, "Exceeded max ALU instructions");
   1278 
   1279    if (p->nr_decl_insn > I915_MAX_DECL_INSN)
   1280       i915_program_error(p, "Exceeded max DECL instructions");
   1281 
   1282    if (p->error) {
   1283       p->NumNativeInstructions = 0;
   1284       p->NumNativeAluInstructions = 0;
   1285       p->NumNativeTexInstructions = 0;
   1286       p->NumNativeTexIndirections = 0;
   1287 
   1288       i915_use_passthrough_shader(ifs);
   1289    }
   1290    else {
   1291       p->NumNativeInstructions
   1292          = p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn;
   1293       p->NumNativeAluInstructions = p->nr_alu_insn;
   1294       p->NumNativeTexInstructions = p->nr_tex_insn;
   1295       p->NumNativeTexIndirections = p->nr_tex_indirect;
   1296 
   1297       /* patch in the program length */
   1298       p->declarations[0] |= program_size + decl_size - 2;
   1299 
   1300       /* Copy compilation results to fragment program struct:
   1301        */
   1302       assert(!ifs->decl);
   1303       assert(!ifs->program);
   1304 
   1305       ifs->decl
   1306          = (uint *) MALLOC(decl_size * sizeof(uint));
   1307       ifs->program
   1308          = (uint *) MALLOC(program_size * sizeof(uint));
   1309 
   1310       if (ifs->decl) {
   1311          ifs->decl_len = decl_size;
   1312 
   1313          memcpy(ifs->decl,
   1314                 p->declarations,
   1315                 decl_size * sizeof(uint));
   1316       }
   1317 
   1318       if (ifs->program) {
   1319          ifs->program_len = program_size;
   1320 
   1321          memcpy(ifs->program,
   1322                 p->program,
   1323                 program_size * sizeof(uint));
   1324       }
   1325    }
   1326 
   1327    /* Release the compilation struct:
   1328     */
   1329    FREE(p);
   1330 }
   1331 
   1332 
   1333 
   1334 
   1335 
   1336 /**
   1337  * Rather than trying to intercept and jiggle depth writes during
   1338  * emit, just move the value into its correct position at the end of
   1339  * the program:
   1340  */
   1341 static void
   1342 i915_fixup_depth_write(struct i915_fp_compile *p)
   1343 {
   1344    /* XXX assuming pos/depth is always in output[0] */
   1345    if (p->shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION) {
   1346       const uint depth = UREG(REG_TYPE_OD, 0);
   1347 
   1348       i915_emit_arith(p,
   1349                       A0_MOV,                     /* opcode */
   1350                       depth,                      /* dest reg */
   1351                       A0_DEST_CHANNEL_W,          /* write mask */
   1352                       0,                          /* saturate? */
   1353                       swizzle(depth, X, Y, Z, Z), /* src0 */
   1354                       0, 0 /* src1, src2 */);
   1355    }
   1356 }
   1357 
   1358 
   1359 void
   1360 i915_translate_fragment_program( struct i915_context *i915,
   1361                                  struct i915_fragment_shader *fs)
   1362 {
   1363    struct i915_fp_compile *p;
   1364    const struct tgsi_token *tokens = fs->state.tokens;
   1365    struct i915_token_list* i_tokens;
   1366 
   1367 #if 0
   1368    tgsi_dump(tokens, 0);
   1369 #endif
   1370 
   1371    /* hw doesn't seem to like empty frag programs, even when the depth write
   1372     * fixup gets emitted below - may that one is fishy, too? */
   1373    if (fs->info.num_instructions == 1) {
   1374       i915_use_passthrough_shader(fs);
   1375 
   1376       return;
   1377    }
   1378 
   1379    p = i915_init_compile(i915, fs);
   1380 
   1381    i_tokens = i915_optimize(tokens);
   1382    i915_translate_instructions(p, i_tokens, fs);
   1383    i915_fixup_depth_write(p);
   1384 
   1385    i915_fini_compile(i915, p);
   1386    i915_optimize_free(i_tokens);
   1387 
   1388 #if 0
   1389    i915_disassemble_program(NULL, fs->program, fs->program_len);
   1390 #endif
   1391 }
   1392