Home | History | Annotate | Download | only in i915
      1 /**************************************************************************
      2  *
      3  * Copyright 2011 The Chromium OS authors.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 #include "i915_reg.h"
     29 #include "i915_context.h"
     30 #include "i915_fpc.h"
     31 
     32 #include "pipe/p_shader_tokens.h"
     33 #include "util/u_math.h"
     34 #include "util/u_memory.h"
     35 #include "util/u_string.h"
     36 #include "tgsi/tgsi_parse.h"
     37 #include "tgsi/tgsi_dump.h"
     38 #include "tgsi/tgsi_exec.h"
     39 
     40 struct i915_optimize_context
     41 {
     42    int first_write[TGSI_EXEC_NUM_TEMPS];
     43    int last_read[TGSI_EXEC_NUM_TEMPS];
     44 };
     45 
     46 static boolean same_src_dst_reg(struct i915_full_src_register *s1, struct i915_full_dst_register *d1)
     47 {
     48    return (s1->Register.File == d1->Register.File &&
     49            s1->Register.Indirect == d1->Register.Indirect &&
     50            s1->Register.Dimension == d1->Register.Dimension &&
     51            s1->Register.Index == d1->Register.Index);
     52 }
     53 
     54 static boolean same_dst_reg(struct i915_full_dst_register *d1, struct i915_full_dst_register *d2)
     55 {
     56    return (d1->Register.File == d2->Register.File &&
     57            d1->Register.Indirect == d2->Register.Indirect &&
     58            d1->Register.Dimension == d2->Register.Dimension &&
     59            d1->Register.Index == d2->Register.Index);
     60 }
     61 
     62 static boolean same_src_reg(struct i915_full_src_register *d1, struct i915_full_src_register *d2)
     63 {
     64    return (d1->Register.File == d2->Register.File &&
     65            d1->Register.Indirect == d2->Register.Indirect &&
     66            d1->Register.Dimension == d2->Register.Dimension &&
     67            d1->Register.Index == d2->Register.Index &&
     68            d1->Register.Absolute == d2->Register.Absolute &&
     69            d1->Register.Negate == d2->Register.Negate);
     70 }
     71 
     72 static const struct {
     73    boolean is_texture;
     74    boolean commutes;
     75    unsigned neutral_element;
     76    unsigned num_dst;
     77    unsigned num_src;
     78 } op_table [TGSI_OPCODE_LAST] = {
     79    [ TGSI_OPCODE_ADD     ] = { false,   true,  TGSI_SWIZZLE_ZERO,  1,  2 },
     80    [ TGSI_OPCODE_CEIL    ] = { false,  false,                  0,  1,  1 },
     81    [ TGSI_OPCODE_CMP     ] = { false,  false,                  0,  1,  2 },
     82    [ TGSI_OPCODE_COS     ] = { false,  false,                  0,  1,  1 },
     83    [ TGSI_OPCODE_DDX     ] = { false,  false,                  0,  1,  0 },
     84    [ TGSI_OPCODE_DDY     ] = { false,  false,                  0,  1,  0 },
     85    [ TGSI_OPCODE_DP2     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
     86    [ TGSI_OPCODE_DP3     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
     87    [ TGSI_OPCODE_DP4     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
     88    [ TGSI_OPCODE_DST     ] = { false,  false,                  0,  1,  2 },
     89    [ TGSI_OPCODE_END     ] = { false,  false,                  0,  0,  0 },
     90    [ TGSI_OPCODE_EX2     ] = { false,  false,                  0,  1,  1 },
     91    [ TGSI_OPCODE_FLR     ] = { false,  false,                  0,  1,  1 },
     92    [ TGSI_OPCODE_FRC     ] = { false,  false,                  0,  1,  1 },
     93    [ TGSI_OPCODE_KILL_IF ] = { false,  false,                  0,  0,  1 },
     94    [ TGSI_OPCODE_KILL    ] = { false,  false,                  0,  0,  0 },
     95    [ TGSI_OPCODE_LG2     ] = { false,  false,                  0,  1,  1 },
     96    [ TGSI_OPCODE_LIT     ] = { false,  false,                  0,  1,  1 },
     97    [ TGSI_OPCODE_LRP     ] = { false,  false,                  0,  1,  3 },
     98    [ TGSI_OPCODE_MAX     ] = { false,  false,                  0,  1,  2 },
     99    [ TGSI_OPCODE_MAD     ] = { false,  false,                  0,  1,  3 },
    100    [ TGSI_OPCODE_MIN     ] = { false,  false,                  0,  1,  2 },
    101    [ TGSI_OPCODE_MOV     ] = { false,  false,                  0,  1,  1 },
    102    [ TGSI_OPCODE_MUL     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
    103    [ TGSI_OPCODE_NOP     ] = { false,  false,                  0,  0,  0 },
    104    [ TGSI_OPCODE_POW     ] = { false,  false,                  0,  1,  2 },
    105    [ TGSI_OPCODE_RCP     ] = { false,  false,                  0,  1,  1 },
    106    [ TGSI_OPCODE_RET     ] = { false,  false,                  0,  0,  0 },
    107    [ TGSI_OPCODE_RSQ     ] = { false,  false,                  0,  1,  1 },
    108    [ TGSI_OPCODE_SEQ     ] = { false,  false,                  0,  1,  2 },
    109    [ TGSI_OPCODE_SGE     ] = { false,  false,                  0,  1,  2 },
    110    [ TGSI_OPCODE_SGT     ] = { false,  false,                  0,  1,  2 },
    111    [ TGSI_OPCODE_SIN     ] = { false,  false,                  0,  1,  1 },
    112    [ TGSI_OPCODE_SLE     ] = { false,  false,                  0,  1,  2 },
    113    [ TGSI_OPCODE_SLT     ] = { false,  false,                  0,  1,  2 },
    114    [ TGSI_OPCODE_SNE     ] = { false,  false,                  0,  1,  2 },
    115    [ TGSI_OPCODE_SSG     ] = { false,  false,                  0,  1,  1 },
    116    [ TGSI_OPCODE_TEX     ] = {  true,  false,                  0,  1,  2 },
    117    [ TGSI_OPCODE_TRUNC   ] = { false,  false,                  0,  1,  1 },
    118    [ TGSI_OPCODE_TXB     ] = {  true,  false,                  0,  1,  2 },
    119    [ TGSI_OPCODE_TXP     ] = {  true,  false,                  0,  1,  2 },
    120 };
    121 
    122 static boolean op_has_dst(unsigned opcode)
    123 {
    124    return (op_table[opcode].num_dst > 0);
    125 }
    126 
    127 static int op_num_dst(unsigned opcode)
    128 {
    129    return op_table[opcode].num_dst;
    130 }
    131 
    132 static int op_num_src(unsigned opcode)
    133 {
    134    return op_table[opcode].num_src;
    135 }
    136 
    137 static boolean op_commutes(unsigned opcode)
    138 {
    139    return op_table[opcode].commutes;
    140 }
    141 
    142 static unsigned mask_for_unswizzled(int num_components)
    143 {
    144    unsigned mask = 0;
    145    switch(num_components)
    146    {
    147       case 4:
    148          mask |= TGSI_WRITEMASK_W;
    149       case 3:
    150          mask |= TGSI_WRITEMASK_Z;
    151       case 2:
    152          mask |= TGSI_WRITEMASK_Y;
    153       case 1:
    154          mask |= TGSI_WRITEMASK_X;
    155    }
    156    return mask;
    157 }
    158 
    159 static boolean is_unswizzled(struct i915_full_src_register *r,
    160                              unsigned write_mask)
    161 {
    162    if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
    163       return FALSE;
    164    if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
    165       return FALSE;
    166    if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
    167       return FALSE;
    168    if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
    169       return FALSE;
    170    return TRUE;
    171 }
    172 
    173 static boolean op_is_texture(unsigned opcode)
    174 {
    175    return op_table[opcode].is_texture;
    176 }
    177 
    178 static unsigned op_neutral_element(unsigned opcode)
    179 {
    180    unsigned ne = op_table[opcode].neutral_element;
    181    if (!ne) {
    182       debug_printf("No neutral element for opcode %d\n",opcode);
    183       ne = TGSI_SWIZZLE_ZERO;
    184    }
    185    return ne;
    186 }
    187 
    188 /*
    189  * Sets the swizzle to the neutral element for the operation for the bits
    190  * of writemask which are set, swizzle to identity otherwise.
    191  */
    192 static void set_neutral_element_swizzle(struct i915_full_src_register *r,
    193                                         unsigned write_mask,
    194                                         unsigned neutral)
    195 {
    196    if ( write_mask & TGSI_WRITEMASK_X )
    197       r->Register.SwizzleX = neutral;
    198    else
    199       r->Register.SwizzleX = TGSI_SWIZZLE_X;
    200 
    201    if ( write_mask & TGSI_WRITEMASK_Y )
    202       r->Register.SwizzleY = neutral;
    203    else
    204       r->Register.SwizzleY = TGSI_SWIZZLE_Y;
    205 
    206    if ( write_mask & TGSI_WRITEMASK_Z )
    207       r->Register.SwizzleZ = neutral;
    208    else
    209       r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
    210 
    211    if ( write_mask & TGSI_WRITEMASK_W )
    212       r->Register.SwizzleW = neutral;
    213    else
    214       r->Register.SwizzleW = TGSI_SWIZZLE_W;
    215 }
    216 
    217 static void copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i)
    218 {
    219    o->File      = i->File;
    220    o->Indirect  = i->Indirect;
    221    o->Dimension = i->Dimension;
    222    o->Index     = i->Index;
    223    o->SwizzleX  = i->SwizzleX;
    224    o->SwizzleY  = i->SwizzleY;
    225    o->SwizzleZ  = i->SwizzleZ;
    226    o->SwizzleW  = i->SwizzleW;
    227    o->Absolute  = i->Absolute;
    228    o->Negate    = i->Negate;
    229 }
    230 
    231 static void copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i)
    232 {
    233    o->File      = i->File;
    234    o->WriteMask = i->WriteMask;
    235    o->Indirect  = i->Indirect;
    236    o->Dimension = i->Dimension;
    237    o->Index     = i->Index;
    238 }
    239 
    240 static void copy_instruction(struct i915_full_instruction *o, const struct tgsi_full_instruction *i)
    241 {
    242    memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
    243    memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
    244 
    245    copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
    246 
    247    copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
    248    copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
    249    copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
    250 }
    251 
    252 static void copy_token(union i915_full_token *o, union tgsi_full_token *i)
    253 {
    254    if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
    255       memcpy(o, i, sizeof(*o));
    256    else
    257       copy_instruction(&o->FullInstruction, &i->FullInstruction);
    258 
    259 }
    260 
    261 static void liveness_mark_written(struct i915_optimize_context *ctx,
    262                                   struct i915_full_dst_register *dst_reg,
    263                                   int pos)
    264 {
    265    int dst_reg_index;
    266    if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) {
    267       dst_reg_index = dst_reg->Register.Index;
    268       assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
    269       /* dead -> live transition */
    270       if (ctx->first_write[dst_reg_index] != -1)
    271          ctx->first_write[dst_reg_index] = pos;
    272    }
    273 }
    274 
    275 static void liveness_mark_read(struct i915_optimize_context *ctx,
    276                                struct i915_full_src_register *src_reg,
    277                                int pos)
    278 {
    279    int src_reg_index;
    280    if (src_reg->Register.File == TGSI_FILE_TEMPORARY) {
    281       src_reg_index = src_reg->Register.Index;
    282       assert(src_reg_index < TGSI_EXEC_NUM_TEMPS);
    283       /* live -> dead transition */
    284       if (ctx->last_read[src_reg_index] != -1)
    285          ctx->last_read[src_reg_index] = pos;
    286    }
    287 }
    288 
    289 static void liveness_analysis(struct i915_optimize_context *ctx,
    290                               struct i915_token_list *tokens)
    291 {
    292    struct i915_full_dst_register *dst_reg;
    293    struct i915_full_src_register *src_reg;
    294    union i915_full_token *current;
    295    unsigned opcode;
    296    int num_dst, num_src;
    297    int i = 0;
    298 
    299    for(i = 0; i < TGSI_EXEC_NUM_TEMPS; i++)
    300    {
    301       ctx->first_write[i] = -1;
    302       ctx->last_read[i] = -1;
    303    }
    304 
    305    for(i = 0; i < tokens->NumTokens; i++)
    306    {
    307       current = &tokens->Tokens[i];
    308 
    309       if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
    310          continue;
    311 
    312       opcode = current->FullInstruction.Instruction.Opcode;
    313       num_dst = op_num_dst(opcode);
    314 
    315       switch(num_dst)
    316       {
    317          case 1:
    318             dst_reg = &current->FullInstruction.Dst[0];
    319             liveness_mark_written(ctx, dst_reg, i);
    320          case 0:
    321             break;
    322          default:
    323             debug_printf("Op %d has %d dst regs\n", opcode, num_dst);
    324             break;
    325       }
    326    }
    327 
    328    for(i = tokens->NumTokens - 1; i >= 0; i--)
    329    {
    330       current = &tokens->Tokens[i];
    331 
    332       if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
    333          continue;
    334 
    335       opcode = current->FullInstruction.Instruction.Opcode;
    336       num_src = op_num_src(opcode);
    337 
    338       switch(num_src)
    339       {
    340          case 3:
    341             src_reg = &current->FullInstruction.Src[2];
    342             liveness_mark_read(ctx, src_reg, i);
    343          case 2:
    344             src_reg = &current->FullInstruction.Src[1];
    345             liveness_mark_read(ctx, src_reg, i);
    346          case 1:
    347             src_reg = &current->FullInstruction.Src[0];
    348             liveness_mark_read(ctx, src_reg, i);
    349          case 0:
    350             break;
    351          default:
    352             debug_printf("Op %d has %d src regs\n", opcode, num_src);
    353             break;
    354       }
    355    }
    356 }
    357 
    358 static int unused_from(struct i915_optimize_context *ctx, struct i915_full_dst_register *dst_reg, int from)
    359 {
    360    int dst_reg_index = dst_reg->Register.Index;
    361    assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
    362    return (from >= ctx->last_read[dst_reg_index]);
    363 }
    364 
    365 /* Returns a mask with the components used for a texture access instruction */
    366 static unsigned i915_tex_mask(union i915_full_token *instr)
    367 {
    368    unsigned mask;
    369 
    370    /* Get the number of coords */
    371    mask = mask_for_unswizzled(i915_num_coords(instr->FullInstruction.Texture.Texture));
    372 
    373    /* Add the W component if projective */
    374    if (instr->FullInstruction.Instruction.Opcode == TGSI_OPCODE_TXP)
    375       mask |= TGSI_WRITEMASK_W;
    376 
    377    return mask;
    378 }
    379 
    380 static boolean target_is_texture2d(uint tex)
    381 {
    382    switch (tex) {
    383    case TGSI_TEXTURE_2D:
    384    case TGSI_TEXTURE_RECT:
    385       return true;
    386    default:
    387       return false;
    388    }
    389 }
    390 
    391 
    392 /*
    393  * Optimize away useless indirect texture reads:
    394  *    MOV TEMP[0].xy, IN[0].xyyy
    395  *    TEX TEMP[1], TEMP[0], SAMP[0], 2D
    396  * into:
    397  *    TEX TEMP[1], IN[0], SAMP[0], 2D
    398  *
    399  * note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/..
    400  */
    401 static void i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx,
    402                                              struct i915_token_list *tokens,
    403                                              int index)
    404 {
    405    union i915_full_token *current = &tokens->Tokens[index - 1];
    406    union i915_full_token *next = &tokens->Tokens[index];
    407 
    408    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    409         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    410         current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
    411         op_is_texture(next->FullInstruction.Instruction.Opcode) &&
    412         target_is_texture2d(next->FullInstruction.Texture.Texture) &&
    413         same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
    414         is_unswizzled(&current->FullInstruction.Src[0], i915_tex_mask(next)) &&
    415         unused_from(ctx, &current->FullInstruction.Dst[0], index))
    416    {
    417       memcpy(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0], sizeof(struct i915_src_register));
    418       current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
    419    }
    420 }
    421 
    422 /*
    423  * Optimize away things like:
    424  *    MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0])
    425  *    MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0])
    426  * into:
    427  *    NOP
    428  *    MOV OUT[0].xyw, TEMP[1].xyww
    429  */
    430 static void i915_fpc_optimize_mov_after_mov(union i915_full_token *current, union i915_full_token *next)
    431 {
    432    struct i915_full_src_register *src_reg1, *src_reg2;
    433    struct i915_full_dst_register *dst_reg1, *dst_reg2;
    434    unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w;
    435 
    436    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    437         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    438         current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
    439         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
    440         current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
    441         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
    442         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
    443         !same_src_dst_reg(&current->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
    444    {
    445       src_reg1 = &current->FullInstruction.Src[0];
    446       dst_reg1 = &current->FullInstruction.Dst[0];
    447       src_reg2 = &next->FullInstruction.Src[0];
    448       dst_reg2 = &next->FullInstruction.Dst[0];
    449 
    450       /* Start with swizzles from the first mov */
    451       swizzle_x = src_reg1->Register.SwizzleX;
    452       swizzle_y = src_reg1->Register.SwizzleY;
    453       swizzle_z = src_reg1->Register.SwizzleZ;
    454       swizzle_w = src_reg1->Register.SwizzleW;
    455 
    456       /* Pile the second mov on top */
    457       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X)
    458          swizzle_x = src_reg2->Register.SwizzleX;
    459       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y)
    460          swizzle_y = src_reg2->Register.SwizzleY;
    461       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z)
    462          swizzle_z = src_reg2->Register.SwizzleZ;
    463       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W)
    464          swizzle_w = src_reg2->Register.SwizzleW;
    465 
    466       dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask;
    467       src_reg2->Register.SwizzleX = swizzle_x;
    468       src_reg2->Register.SwizzleY = swizzle_y;
    469       src_reg2->Register.SwizzleZ = swizzle_z;
    470       src_reg2->Register.SwizzleW = swizzle_w;
    471 
    472       current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
    473 
    474       return;
    475    }
    476 }
    477 
    478 /*
    479  * Optimize away things like:
    480  *    MUL OUT[0].xyz, TEMP[1], TEMP[2]
    481  *    MOV OUT[0].w, TEMP[2]
    482  * into:
    483  *    MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
    484  * This is useful for optimizing texenv.
    485  */
    486 static void i915_fpc_optimize_mov_after_alu(union i915_full_token *current, union i915_full_token *next)
    487 {
    488    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    489         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    490         op_commutes(current->FullInstruction.Instruction.Opcode) &&
    491         current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
    492         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
    493         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
    494         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[1]) &&
    495         !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
    496         is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
    497         is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
    498         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
    499    {
    500       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
    501 
    502       set_neutral_element_swizzle(&current->FullInstruction.Src[1], 0, 0);
    503       set_neutral_element_swizzle(&current->FullInstruction.Src[0],
    504                                   next->FullInstruction.Dst[0].Register.WriteMask,
    505                                   op_neutral_element(current->FullInstruction.Instruction.Opcode));
    506 
    507       current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
    508                                                            next->FullInstruction.Dst[0].Register.WriteMask;
    509       return;
    510    }
    511 
    512    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    513         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    514         op_commutes(current->FullInstruction.Instruction.Opcode) &&
    515         current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
    516         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
    517         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
    518         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
    519         !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
    520         is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
    521         is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
    522         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
    523    {
    524       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
    525 
    526       set_neutral_element_swizzle(&current->FullInstruction.Src[0], 0, 0);
    527       set_neutral_element_swizzle(&current->FullInstruction.Src[1],
    528                                   next->FullInstruction.Dst[0].Register.WriteMask,
    529                                   op_neutral_element(current->FullInstruction.Instruction.Opcode));
    530 
    531       current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
    532                                                            next->FullInstruction.Dst[0].Register.WriteMask;
    533       return;
    534    }
    535 }
    536 
    537 /*
    538  * Optimize away things like:
    539  *    MOV TEMP[0].xyz TEMP[0].xyzx
    540  * into:
    541  *    NOP
    542  */
    543 static boolean i915_fpc_useless_mov(union tgsi_full_token *tgsi_current)
    544 {
    545    union i915_full_token current;
    546    copy_token(&current , tgsi_current);
    547    if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    548         current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
    549         op_has_dst(current.FullInstruction.Instruction.Opcode) &&
    550         !current.FullInstruction.Instruction.Saturate &&
    551         current.FullInstruction.Src[0].Register.Absolute == 0 &&
    552         current.FullInstruction.Src[0].Register.Negate == 0 &&
    553         is_unswizzled(&current.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) &&
    554         same_src_dst_reg(&current.FullInstruction.Src[0], &current.FullInstruction.Dst[0]) )
    555    {
    556       return TRUE;
    557    }
    558    return FALSE;
    559 }
    560 
    561 /*
    562  * Optimize away things like:
    563  *    *** TEMP[0], TEMP[1], TEMP[2]
    564  *    MOV OUT[0] TEMP[0]
    565  * into:
    566  *    *** OUT[0], TEMP[1], TEMP[2]
    567  */
    568 static void i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx,
    569                                                      struct i915_token_list *tokens,
    570                                                      int index)
    571 {
    572    union i915_full_token *current = &tokens->Tokens[index - 1];
    573    union i915_full_token *next = &tokens->Tokens[index];
    574 
    575    // &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
    576    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    577         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    578         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
    579         op_has_dst(current->FullInstruction.Instruction.Opcode) &&
    580         !next->FullInstruction.Instruction.Saturate &&
    581         next->FullInstruction.Src[0].Register.Absolute == 0 &&
    582         next->FullInstruction.Src[0].Register.Negate == 0 &&
    583         unused_from(ctx, &current->FullInstruction.Dst[0], index) &&
    584         current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZW &&
    585         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) &&
    586         current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask &&
    587         same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
    588    {
    589       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
    590 
    591       current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];
    592       return;
    593    }
    594 }
    595 
    596 struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
    597 {
    598    struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
    599    struct tgsi_parse_context parse;
    600    struct i915_optimize_context *ctx;
    601    int i = 0;
    602 
    603    ctx = malloc(sizeof(*ctx));
    604 
    605    out_tokens->NumTokens = 0;
    606 
    607    /* Count the tokens */
    608    tgsi_parse_init( &parse, tokens );
    609    while( !tgsi_parse_end_of_tokens( &parse ) ) {
    610       tgsi_parse_token( &parse );
    611       out_tokens->NumTokens++;
    612    }
    613    tgsi_parse_free (&parse);
    614 
    615    /* Allocate our tokens */
    616    out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
    617 
    618    tgsi_parse_init( &parse, tokens );
    619    while( !tgsi_parse_end_of_tokens( &parse ) ) {
    620       tgsi_parse_token( &parse );
    621 
    622       if (i915_fpc_useless_mov(&parse.FullToken)) {
    623          out_tokens->NumTokens--;
    624          continue;
    625       }
    626 
    627       copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
    628 
    629       i++;
    630    }
    631    tgsi_parse_free (&parse);
    632 
    633    liveness_analysis(ctx, out_tokens);
    634 
    635    i = 1;
    636    while( i < out_tokens->NumTokens) {
    637       i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i);
    638       i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
    639       i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
    640       i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i);
    641       i++;
    642    }
    643 
    644    free(ctx);
    645 
    646    return out_tokens;
    647 }
    648 
    649 void i915_optimize_free(struct i915_token_list *tokens)
    650 {
    651    free(tokens->Tokens);
    652    free(tokens);
    653 }
    654 
    655 
    656