Home | History | Annotate | Download | only in i915
      1 /**************************************************************************
      2  *
      3  * Copyright 2011 The Chromium OS authors.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 #include "i915_reg.h"
     29 #include "i915_context.h"
     30 #include "i915_fpc.h"
     31 
     32 #include "pipe/p_shader_tokens.h"
     33 #include "util/u_math.h"
     34 #include "util/u_memory.h"
     35 #include "util/u_string.h"
     36 #include "tgsi/tgsi_parse.h"
     37 #include "tgsi/tgsi_dump.h"
     38 #include "tgsi/tgsi_exec.h"
     39 
     40 struct i915_optimize_context
     41 {
     42    int first_write[TGSI_EXEC_NUM_TEMPS];
     43    int last_read[TGSI_EXEC_NUM_TEMPS];
     44 };
     45 
     46 static boolean same_src_dst_reg(struct i915_full_src_register *s1, struct i915_full_dst_register *d1)
     47 {
     48    return (s1->Register.File == d1->Register.File &&
     49            s1->Register.Indirect == d1->Register.Indirect &&
     50            s1->Register.Dimension == d1->Register.Dimension &&
     51            s1->Register.Index == d1->Register.Index);
     52 }
     53 
     54 static boolean same_dst_reg(struct i915_full_dst_register *d1, struct i915_full_dst_register *d2)
     55 {
     56    return (d1->Register.File == d2->Register.File &&
     57            d1->Register.Indirect == d2->Register.Indirect &&
     58            d1->Register.Dimension == d2->Register.Dimension &&
     59            d1->Register.Index == d2->Register.Index);
     60 }
     61 
     62 static boolean same_src_reg(struct i915_full_src_register *d1, struct i915_full_src_register *d2)
     63 {
     64    return (d1->Register.File == d2->Register.File &&
     65            d1->Register.Indirect == d2->Register.Indirect &&
     66            d1->Register.Dimension == d2->Register.Dimension &&
     67            d1->Register.Index == d2->Register.Index &&
     68            d1->Register.Absolute == d2->Register.Absolute &&
     69            d1->Register.Negate == d2->Register.Negate);
     70 }
     71 
     72 static const struct {
     73    boolean is_texture;
     74    boolean commutes;
     75    unsigned neutral_element;
     76    unsigned num_dst;
     77    unsigned num_src;
     78 } op_table [TGSI_OPCODE_LAST] = {
     79    [ TGSI_OPCODE_ADD     ] = { false,   true,  TGSI_SWIZZLE_ZERO,  1,  2 },
     80    [ TGSI_OPCODE_CEIL    ] = { false,  false,                  0,  1,  1 },
     81    [ TGSI_OPCODE_CMP     ] = { false,  false,                  0,  1,  2 },
     82    [ TGSI_OPCODE_COS     ] = { false,  false,                  0,  1,  1 },
     83    [ TGSI_OPCODE_DDX     ] = { false,  false,                  0,  1,  0 },
     84    [ TGSI_OPCODE_DDY     ] = { false,  false,                  0,  1,  0 },
     85    [ TGSI_OPCODE_DP2     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
     86    [ TGSI_OPCODE_DP3     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
     87    [ TGSI_OPCODE_DP4     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
     88    [ TGSI_OPCODE_DPH     ] = { false,  false,                  0,  1,  2 },
     89    [ TGSI_OPCODE_DST     ] = { false,  false,                  0,  1,  2 },
     90    [ TGSI_OPCODE_END     ] = { false,  false,                  0,  0,  0 },
     91    [ TGSI_OPCODE_EX2     ] = { false,  false,                  0,  1,  1 },
     92    [ TGSI_OPCODE_FLR     ] = { false,  false,                  0,  1,  1 },
     93    [ TGSI_OPCODE_FRC     ] = { false,  false,                  0,  1,  1 },
     94    [ TGSI_OPCODE_KILL_IF ] = { false,  false,                  0,  0,  1 },
     95    [ TGSI_OPCODE_KILL    ] = { false,  false,                  0,  0,  0 },
     96    [ TGSI_OPCODE_LG2     ] = { false,  false,                  0,  1,  1 },
     97    [ TGSI_OPCODE_LIT     ] = { false,  false,                  0,  1,  1 },
     98    [ TGSI_OPCODE_LRP     ] = { false,  false,                  0,  1,  3 },
     99    [ TGSI_OPCODE_MAX     ] = { false,  false,                  0,  1,  2 },
    100    [ TGSI_OPCODE_MAD     ] = { false,  false,                  0,  1,  3 },
    101    [ TGSI_OPCODE_MIN     ] = { false,  false,                  0,  1,  2 },
    102    [ TGSI_OPCODE_MOV     ] = { false,  false,                  0,  1,  1 },
    103    [ TGSI_OPCODE_MUL     ] = { false,   true,   TGSI_SWIZZLE_ONE,  1,  2 },
    104    [ TGSI_OPCODE_NOP     ] = { false,  false,                  0,  0,  0 },
    105    [ TGSI_OPCODE_POW     ] = { false,  false,                  0,  1,  2 },
    106    [ TGSI_OPCODE_RCP     ] = { false,  false,                  0,  1,  1 },
    107    [ TGSI_OPCODE_RET     ] = { false,  false,                  0,  0,  0 },
    108    [ TGSI_OPCODE_RSQ     ] = { false,  false,                  0,  1,  1 },
    109    [ TGSI_OPCODE_SCS     ] = { false,  false,                  0,  1,  1 },
    110    [ TGSI_OPCODE_SEQ     ] = { false,  false,                  0,  1,  2 },
    111    [ TGSI_OPCODE_SGE     ] = { false,  false,                  0,  1,  2 },
    112    [ TGSI_OPCODE_SGT     ] = { false,  false,                  0,  1,  2 },
    113    [ TGSI_OPCODE_SIN     ] = { false,  false,                  0,  1,  1 },
    114    [ TGSI_OPCODE_SLE     ] = { false,  false,                  0,  1,  2 },
    115    [ TGSI_OPCODE_SLT     ] = { false,  false,                  0,  1,  2 },
    116    [ TGSI_OPCODE_SNE     ] = { false,  false,                  0,  1,  2 },
    117    [ TGSI_OPCODE_SSG     ] = { false,  false,                  0,  1,  1 },
    118    [ TGSI_OPCODE_TEX     ] = {  true,  false,                  0,  1,  2 },
    119    [ TGSI_OPCODE_TRUNC   ] = { false,  false,                  0,  1,  1 },
    120    [ TGSI_OPCODE_TXB     ] = {  true,  false,                  0,  1,  2 },
    121    [ TGSI_OPCODE_TXP     ] = {  true,  false,                  0,  1,  2 },
    122    [ TGSI_OPCODE_XPD     ] = { false,  false,                  0,  1,  2 },
    123 };
    124 
    125 static boolean op_has_dst(unsigned opcode)
    126 {
    127    return (op_table[opcode].num_dst > 0);
    128 }
    129 
    130 static int op_num_dst(unsigned opcode)
    131 {
    132    return op_table[opcode].num_dst;
    133 }
    134 
    135 static int op_num_src(unsigned opcode)
    136 {
    137    return op_table[opcode].num_src;
    138 }
    139 
    140 static boolean op_commutes(unsigned opcode)
    141 {
    142    return op_table[opcode].commutes;
    143 }
    144 
    145 static unsigned mask_for_unswizzled(int num_components)
    146 {
    147    unsigned mask = 0;
    148    switch(num_components)
    149    {
    150       case 4:
    151          mask |= TGSI_WRITEMASK_W;
    152       case 3:
    153          mask |= TGSI_WRITEMASK_Z;
    154       case 2:
    155          mask |= TGSI_WRITEMASK_Y;
    156       case 1:
    157          mask |= TGSI_WRITEMASK_X;
    158    }
    159    return mask;
    160 }
    161 
    162 static boolean is_unswizzled(struct i915_full_src_register *r,
    163                              unsigned write_mask)
    164 {
    165    if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
    166       return FALSE;
    167    if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
    168       return FALSE;
    169    if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
    170       return FALSE;
    171    if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
    172       return FALSE;
    173    return TRUE;
    174 }
    175 
    176 static boolean op_is_texture(unsigned opcode)
    177 {
    178    return op_table[opcode].is_texture;
    179 }
    180 
    181 static unsigned op_neutral_element(unsigned opcode)
    182 {
    183    unsigned ne = op_table[opcode].neutral_element;
    184    if (!ne) {
    185       debug_printf("No neutral element for opcode %d\n",opcode);
    186       ne = TGSI_SWIZZLE_ZERO;
    187    }
    188    return ne;
    189 }
    190 
    191 /*
    192  * Sets the swizzle to the neutral element for the operation for the bits
    193  * of writemask which are set, swizzle to identity otherwise.
    194  */
    195 static void set_neutral_element_swizzle(struct i915_full_src_register *r,
    196                                         unsigned write_mask,
    197                                         unsigned neutral)
    198 {
    199    if ( write_mask & TGSI_WRITEMASK_X )
    200       r->Register.SwizzleX = neutral;
    201    else
    202       r->Register.SwizzleX = TGSI_SWIZZLE_X;
    203 
    204    if ( write_mask & TGSI_WRITEMASK_Y )
    205       r->Register.SwizzleY = neutral;
    206    else
    207       r->Register.SwizzleY = TGSI_SWIZZLE_Y;
    208 
    209    if ( write_mask & TGSI_WRITEMASK_Z )
    210       r->Register.SwizzleZ = neutral;
    211    else
    212       r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
    213 
    214    if ( write_mask & TGSI_WRITEMASK_W )
    215       r->Register.SwizzleW = neutral;
    216    else
    217       r->Register.SwizzleW = TGSI_SWIZZLE_W;
    218 }
    219 
    220 static void copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i)
    221 {
    222    o->File      = i->File;
    223    o->Indirect  = i->Indirect;
    224    o->Dimension = i->Dimension;
    225    o->Index     = i->Index;
    226    o->SwizzleX  = i->SwizzleX;
    227    o->SwizzleY  = i->SwizzleY;
    228    o->SwizzleZ  = i->SwizzleZ;
    229    o->SwizzleW  = i->SwizzleW;
    230    o->Absolute  = i->Absolute;
    231    o->Negate    = i->Negate;
    232 }
    233 
    234 static void copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i)
    235 {
    236    o->File      = i->File;
    237    o->WriteMask = i->WriteMask;
    238    o->Indirect  = i->Indirect;
    239    o->Dimension = i->Dimension;
    240    o->Index     = i->Index;
    241 }
    242 
    243 static void copy_instruction(struct i915_full_instruction *o, const struct tgsi_full_instruction *i)
    244 {
    245    memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
    246    memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
    247 
    248    copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
    249 
    250    copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
    251    copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
    252    copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
    253 }
    254 
    255 static void copy_token(union i915_full_token *o, union tgsi_full_token *i)
    256 {
    257    if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
    258       memcpy(o, i, sizeof(*o));
    259    else
    260       copy_instruction(&o->FullInstruction, &i->FullInstruction);
    261 
    262 }
    263 
    264 static void liveness_mark_written(struct i915_optimize_context *ctx,
    265                                   struct i915_full_dst_register *dst_reg,
    266                                   int pos)
    267 {
    268    int dst_reg_index;
    269    if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) {
    270       dst_reg_index = dst_reg->Register.Index;
    271       assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
    272       /* dead -> live transition */
    273       if (ctx->first_write[dst_reg_index] != -1)
    274          ctx->first_write[dst_reg_index] = pos;
    275    }
    276 }
    277 
    278 static void liveness_mark_read(struct i915_optimize_context *ctx,
    279                                struct i915_full_src_register *src_reg,
    280                                int pos)
    281 {
    282    int src_reg_index;
    283    if (src_reg->Register.File == TGSI_FILE_TEMPORARY) {
    284       src_reg_index = src_reg->Register.Index;
    285       assert(src_reg_index < TGSI_EXEC_NUM_TEMPS);
    286       /* live -> dead transition */
    287       if (ctx->last_read[src_reg_index] != -1)
    288          ctx->last_read[src_reg_index] = pos;
    289    }
    290 }
    291 
    292 static void liveness_analysis(struct i915_optimize_context *ctx,
    293                               struct i915_token_list *tokens)
    294 {
    295    struct i915_full_dst_register *dst_reg;
    296    struct i915_full_src_register *src_reg;
    297    union i915_full_token *current;
    298    unsigned opcode;
    299    int num_dst, num_src;
    300    int i = 0;
    301 
    302    for(i = 0; i < TGSI_EXEC_NUM_TEMPS; i++)
    303    {
    304       ctx->first_write[i] = -1;
    305       ctx->last_read[i] = -1;
    306    }
    307 
    308    for(i = 0; i < tokens->NumTokens; i++)
    309    {
    310       current = &tokens->Tokens[i];
    311 
    312       if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
    313          continue;
    314 
    315       opcode = current->FullInstruction.Instruction.Opcode;
    316       num_dst = op_num_dst(opcode);
    317 
    318       switch(num_dst)
    319       {
    320          case 1:
    321             dst_reg = &current->FullInstruction.Dst[0];
    322             liveness_mark_written(ctx, dst_reg, i);
    323          case 0:
    324             break;
    325          default:
    326             debug_printf("Op %d has %d dst regs\n", opcode, num_dst);
    327             break;
    328       }
    329    }
    330 
    331    for(i = tokens->NumTokens - 1; i >= 0; i--)
    332    {
    333       current = &tokens->Tokens[i];
    334 
    335       if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
    336          continue;
    337 
    338       opcode = current->FullInstruction.Instruction.Opcode;
    339       num_src = op_num_src(opcode);
    340 
    341       switch(num_src)
    342       {
    343          case 3:
    344             src_reg = &current->FullInstruction.Src[2];
    345             liveness_mark_read(ctx, src_reg, i);
    346          case 2:
    347             src_reg = &current->FullInstruction.Src[1];
    348             liveness_mark_read(ctx, src_reg, i);
    349          case 1:
    350             src_reg = &current->FullInstruction.Src[0];
    351             liveness_mark_read(ctx, src_reg, i);
    352          case 0:
    353             break;
    354          default:
    355             debug_printf("Op %d has %d src regs\n", opcode, num_src);
    356             break;
    357       }
    358    }
    359 }
    360 
    361 static int unused_from(struct i915_optimize_context *ctx, struct i915_full_dst_register *dst_reg, int from)
    362 {
    363    int dst_reg_index = dst_reg->Register.Index;
    364    assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
    365    return (from >= ctx->last_read[dst_reg_index]);
    366 }
    367 
    368 /* Returns a mask with the components used for a texture access instruction */
    369 static unsigned i915_tex_mask(union i915_full_token *instr)
    370 {
    371    unsigned mask;
    372 
    373    /* Get the number of coords */
    374    mask = mask_for_unswizzled(i915_num_coords(instr->FullInstruction.Texture.Texture));
    375 
    376    /* Add the W component if projective */
    377    if (instr->FullInstruction.Instruction.Opcode == TGSI_OPCODE_TXP)
    378       mask |= TGSI_WRITEMASK_W;
    379 
    380    return mask;
    381 }
    382 
    383 static boolean target_is_texture2d(uint tex)
    384 {
    385    switch (tex) {
    386    case TGSI_TEXTURE_2D:
    387    case TGSI_TEXTURE_RECT:
    388       return true;
    389    default:
    390       return false;
    391    }
    392 }
    393 
    394 
    395 /*
    396  * Optimize away useless indirect texture reads:
    397  *    MOV TEMP[0].xy, IN[0].xyyy
    398  *    TEX TEMP[1], TEMP[0], SAMP[0], 2D
    399  * into:
    400  *    TEX TEMP[1], IN[0], SAMP[0], 2D
    401  *
    402  * note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/..
    403  */
    404 static void i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx,
    405                                              struct i915_token_list *tokens,
    406                                              int index)
    407 {
    408    union i915_full_token *current = &tokens->Tokens[index - 1];
    409    union i915_full_token *next = &tokens->Tokens[index];
    410 
    411    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    412         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    413         current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
    414         op_is_texture(next->FullInstruction.Instruction.Opcode) &&
    415         target_is_texture2d(next->FullInstruction.Texture.Texture) &&
    416         same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
    417         is_unswizzled(&current->FullInstruction.Src[0], i915_tex_mask(next)) &&
    418         unused_from(ctx, &current->FullInstruction.Dst[0], index))
    419    {
    420       memcpy(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0], sizeof(struct i915_src_register));
    421       current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
    422    }
    423 }
    424 
    425 /*
    426  * Optimize away things like:
    427  *    MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0])
    428  *    MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0])
    429  * into:
    430  *    NOP
    431  *    MOV OUT[0].xyw, TEMP[1].xyww
    432  */
    433 static void i915_fpc_optimize_mov_after_mov(union i915_full_token *current, union i915_full_token *next)
    434 {
    435    struct i915_full_src_register *src_reg1, *src_reg2;
    436    struct i915_full_dst_register *dst_reg1, *dst_reg2;
    437    unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w;
    438 
    439    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    440         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    441         current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
    442         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
    443         current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
    444         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
    445         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
    446         !same_src_dst_reg(&current->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
    447    {
    448       src_reg1 = &current->FullInstruction.Src[0];
    449       dst_reg1 = &current->FullInstruction.Dst[0];
    450       src_reg2 = &next->FullInstruction.Src[0];
    451       dst_reg2 = &next->FullInstruction.Dst[0];
    452 
    453       /* Start with swizzles from the first mov */
    454       swizzle_x = src_reg1->Register.SwizzleX;
    455       swizzle_y = src_reg1->Register.SwizzleY;
    456       swizzle_z = src_reg1->Register.SwizzleZ;
    457       swizzle_w = src_reg1->Register.SwizzleW;
    458 
    459       /* Pile the second mov on top */
    460       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X)
    461          swizzle_x = src_reg2->Register.SwizzleX;
    462       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y)
    463          swizzle_y = src_reg2->Register.SwizzleY;
    464       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z)
    465          swizzle_z = src_reg2->Register.SwizzleZ;
    466       if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W)
    467          swizzle_w = src_reg2->Register.SwizzleW;
    468 
    469       dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask;
    470       src_reg2->Register.SwizzleX = swizzle_x;
    471       src_reg2->Register.SwizzleY = swizzle_y;
    472       src_reg2->Register.SwizzleZ = swizzle_z;
    473       src_reg2->Register.SwizzleW = swizzle_w;
    474 
    475       current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
    476 
    477       return;
    478    }
    479 }
    480 
    481 /*
    482  * Optimize away things like:
    483  *    MUL OUT[0].xyz, TEMP[1], TEMP[2]
    484  *    MOV OUT[0].w, TEMP[2]
    485  * into:
    486  *    MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
    487  * This is useful for optimizing texenv.
    488  */
    489 static void i915_fpc_optimize_mov_after_alu(union i915_full_token *current, union i915_full_token *next)
    490 {
    491    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    492         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    493         op_commutes(current->FullInstruction.Instruction.Opcode) &&
    494         current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
    495         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
    496         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
    497         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[1]) &&
    498         !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
    499         is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
    500         is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
    501         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
    502    {
    503       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
    504 
    505       set_neutral_element_swizzle(&current->FullInstruction.Src[1], 0, 0);
    506       set_neutral_element_swizzle(&current->FullInstruction.Src[0],
    507                                   next->FullInstruction.Dst[0].Register.WriteMask,
    508                                   op_neutral_element(current->FullInstruction.Instruction.Opcode));
    509 
    510       current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
    511                                                            next->FullInstruction.Dst[0].Register.WriteMask;
    512       return;
    513    }
    514 
    515    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    516         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    517         op_commutes(current->FullInstruction.Instruction.Opcode) &&
    518         current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
    519         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
    520         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
    521         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
    522         !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
    523         is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
    524         is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
    525         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
    526    {
    527       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
    528 
    529       set_neutral_element_swizzle(&current->FullInstruction.Src[0], 0, 0);
    530       set_neutral_element_swizzle(&current->FullInstruction.Src[1],
    531                                   next->FullInstruction.Dst[0].Register.WriteMask,
    532                                   op_neutral_element(current->FullInstruction.Instruction.Opcode));
    533 
    534       current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
    535                                                            next->FullInstruction.Dst[0].Register.WriteMask;
    536       return;
    537    }
    538 }
    539 
    540 /*
    541  * Optimize away things like:
    542  *    MOV TEMP[0].xyz TEMP[0].xyzx
    543  * into:
    544  *    NOP
    545  */
    546 static boolean i915_fpc_useless_mov(union tgsi_full_token *tgsi_current)
    547 {
    548    union i915_full_token current;
    549    copy_token(&current , tgsi_current);
    550    if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    551         current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
    552         op_has_dst(current.FullInstruction.Instruction.Opcode) &&
    553         !current.FullInstruction.Instruction.Saturate &&
    554         current.FullInstruction.Src[0].Register.Absolute == 0 &&
    555         current.FullInstruction.Src[0].Register.Negate == 0 &&
    556         is_unswizzled(&current.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) &&
    557         same_src_dst_reg(&current.FullInstruction.Src[0], &current.FullInstruction.Dst[0]) )
    558    {
    559       return TRUE;
    560    }
    561    return FALSE;
    562 }
    563 
    564 /*
    565  * Optimize away things like:
    566  *    *** TEMP[0], TEMP[1], TEMP[2]
    567  *    MOV OUT[0] TEMP[0]
    568  * into:
    569  *    *** OUT[0], TEMP[1], TEMP[2]
    570  */
    571 static void i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx,
    572                                                      struct i915_token_list *tokens,
    573                                                      int index)
    574 {
    575    union i915_full_token *current = &tokens->Tokens[index - 1];
    576    union i915_full_token *next = &tokens->Tokens[index];
    577 
    578    // &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
    579    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    580         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    581         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
    582         op_has_dst(current->FullInstruction.Instruction.Opcode) &&
    583         !next->FullInstruction.Instruction.Saturate &&
    584         next->FullInstruction.Src[0].Register.Absolute == 0 &&
    585         next->FullInstruction.Src[0].Register.Negate == 0 &&
    586         unused_from(ctx, &current->FullInstruction.Dst[0], index) &&
    587         current->FullInstruction.Dst[0].Register.WriteMask == TGSI_WRITEMASK_XYZW &&
    588         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) &&
    589         current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask &&
    590         same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
    591    {
    592       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
    593 
    594       current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];
    595       return;
    596    }
    597 }
    598 
    599 struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
    600 {
    601    struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
    602    struct tgsi_parse_context parse;
    603    struct i915_optimize_context *ctx;
    604    int i = 0;
    605 
    606    ctx = malloc(sizeof(*ctx));
    607 
    608    out_tokens->NumTokens = 0;
    609 
    610    /* Count the tokens */
    611    tgsi_parse_init( &parse, tokens );
    612    while( !tgsi_parse_end_of_tokens( &parse ) ) {
    613       tgsi_parse_token( &parse );
    614       out_tokens->NumTokens++;
    615    }
    616    tgsi_parse_free (&parse);
    617 
    618    /* Allocate our tokens */
    619    out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
    620 
    621    tgsi_parse_init( &parse, tokens );
    622    while( !tgsi_parse_end_of_tokens( &parse ) ) {
    623       tgsi_parse_token( &parse );
    624 
    625       if (i915_fpc_useless_mov(&parse.FullToken)) {
    626          out_tokens->NumTokens--;
    627          continue;
    628       }
    629 
    630       copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
    631 
    632       i++;
    633    }
    634    tgsi_parse_free (&parse);
    635 
    636    liveness_analysis(ctx, out_tokens);
    637 
    638    i = 1;
    639    while( i < out_tokens->NumTokens) {
    640       i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i);
    641       i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
    642       i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
    643       i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i);
    644       i++;
    645    }
    646 
    647    free(ctx);
    648 
    649    return out_tokens;
    650 }
    651 
    652 void i915_optimize_free(struct i915_token_list *tokens)
    653 {
    654    free(tokens->Tokens);
    655    free(tokens);
    656 }
    657 
    658 
    659