Home | History | Annotate | Download | only in i915
      1 /**************************************************************************
      2  *
      3  * Copyright 2011 The Chromium OS authors.
      4  * All Rights Reserved.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the
      8  * "Software"), to deal in the Software without restriction, including
      9  * without limitation the rights to use, copy, modify, merge, publish,
     10  * distribute, sub license, and/or sell copies of the Software, and to
     11  * permit persons to whom the Software is furnished to do so, subject to
     12  * the following conditions:
     13  *
     14  * The above copyright notice and this permission notice (including the
     15  * next paragraph) shall be included in all copies or substantial portions
     16  * of the Software.
     17  *
     18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
     19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
     21  * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
     22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     25  *
     26  **************************************************************************/
     27 
     28 #include "i915_reg.h"
     29 #include "i915_context.h"
     30 #include "i915_fpc.h"
     31 
     32 #include "pipe/p_shader_tokens.h"
     33 #include "util/u_math.h"
     34 #include "util/u_memory.h"
     35 #include "util/u_string.h"
     36 #include "tgsi/tgsi_parse.h"
     37 #include "tgsi/tgsi_dump.h"
     38 
     39 static boolean same_src_dst_reg(struct i915_full_src_register* s1, struct i915_full_dst_register* d1)
     40 {
     41    return (s1->Register.File == d1->Register.File &&
     42            s1->Register.Indirect == d1->Register.Indirect &&
     43            s1->Register.Dimension == d1->Register.Dimension &&
     44            s1->Register.Index == d1->Register.Index);
     45 }
     46 
     47 static boolean same_dst_reg(struct i915_full_dst_register* d1, struct i915_full_dst_register* d2)
     48 {
     49    return (d1->Register.File == d2->Register.File &&
     50            d1->Register.Indirect == d2->Register.Indirect &&
     51            d1->Register.Dimension == d2->Register.Dimension &&
     52            d1->Register.Index == d2->Register.Index);
     53 }
     54 
     55 static boolean same_src_reg(struct i915_full_src_register* d1, struct i915_full_src_register* d2)
     56 {
     57    return (d1->Register.File == d2->Register.File &&
     58            d1->Register.Indirect == d2->Register.Indirect &&
     59            d1->Register.Dimension == d2->Register.Dimension &&
     60            d1->Register.Index == d2->Register.Index &&
     61            d1->Register.Absolute == d2->Register.Absolute &&
     62            d1->Register.Negate == d2->Register.Negate);
     63 }
     64 
     65 static boolean has_destination(unsigned opcode)
     66 {
     67    return (opcode != TGSI_OPCODE_NOP &&
     68            opcode != TGSI_OPCODE_KIL &&
     69            opcode != TGSI_OPCODE_KILP &&
     70            opcode != TGSI_OPCODE_END &&
     71            opcode != TGSI_OPCODE_RET);
     72 }
     73 
     74 static boolean is_unswizzled(struct i915_full_src_register* r,
     75                              unsigned write_mask)
     76 {
     77    if ( write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
     78       return FALSE;
     79    if ( write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
     80       return FALSE;
     81    if ( write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
     82       return FALSE;
     83    if ( write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
     84       return FALSE;
     85    return TRUE;
     86 }
     87 
     88 static boolean op_commutes(unsigned opcode)
     89 {
     90    switch(opcode)
     91    {
     92       case TGSI_OPCODE_ADD:
     93       case TGSI_OPCODE_MUL:
     94       case TGSI_OPCODE_DP2:
     95       case TGSI_OPCODE_DP3:
     96       case TGSI_OPCODE_DP4:
     97          return TRUE;
     98    }
     99    return FALSE;
    100 }
    101 
    102 static unsigned op_neutral_element(unsigned opcode)
    103 {
    104    switch(opcode)
    105    {
    106       case TGSI_OPCODE_ADD:
    107          return TGSI_SWIZZLE_ZERO;
    108       case TGSI_OPCODE_MUL:
    109       case TGSI_OPCODE_DP2:
    110       case TGSI_OPCODE_DP3:
    111       case TGSI_OPCODE_DP4:
    112          return TGSI_SWIZZLE_ONE;
    113    }
    114 
    115    debug_printf("Unknown opcode %d\n",opcode);
    116    return TGSI_SWIZZLE_ZERO;
    117 }
    118 
    119 /*
    120  * Sets the swizzle to the neutral element for the operation for the bits
    121  * of writemask which are set, swizzle to identity otherwise.
    122  */
    123 static void set_neutral_element_swizzle(struct i915_full_src_register* r,
    124                                         unsigned write_mask,
    125                                         unsigned neutral)
    126 {
    127    if ( write_mask & TGSI_WRITEMASK_X )
    128       r->Register.SwizzleX = neutral;
    129    else
    130       r->Register.SwizzleX = TGSI_SWIZZLE_X;
    131 
    132    if ( write_mask & TGSI_WRITEMASK_Y )
    133       r->Register.SwizzleY = neutral;
    134    else
    135       r->Register.SwizzleY = TGSI_SWIZZLE_Y;
    136 
    137    if ( write_mask & TGSI_WRITEMASK_Z )
    138       r->Register.SwizzleZ = neutral;
    139    else
    140       r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
    141 
    142    if ( write_mask & TGSI_WRITEMASK_W )
    143       r->Register.SwizzleW = neutral;
    144    else
    145       r->Register.SwizzleW = TGSI_SWIZZLE_W;
    146 }
    147 
    148 static void copy_src_reg(struct i915_src_register* o, const struct tgsi_src_register* i)
    149 {
    150    o->File      = i->File;
    151    o->Indirect  = i->Indirect;
    152    o->Dimension = i->Dimension;
    153    o->Index     = i->Index;
    154    o->SwizzleX  = i->SwizzleX;
    155    o->SwizzleY  = i->SwizzleY;
    156    o->SwizzleZ  = i->SwizzleZ;
    157    o->SwizzleW  = i->SwizzleW;
    158    o->Absolute  = i->Absolute;
    159    o->Negate    = i->Negate;
    160 }
    161 
    162 static void copy_dst_reg(struct i915_dst_register* o, const struct tgsi_dst_register* i)
    163 {
    164    o->File      = i->File;
    165    o->WriteMask = i->WriteMask;
    166    o->Indirect  = i->Indirect;
    167    o->Dimension = i->Dimension;
    168    o->Index     = i->Index;
    169 }
    170 
    171 static void copy_instruction(struct i915_full_instruction* o, const struct tgsi_full_instruction* i)
    172 {
    173    memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
    174    memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
    175 
    176    copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
    177 
    178    copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
    179    copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
    180    copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
    181 }
    182 
    183 static void copy_token(union i915_full_token* o, union tgsi_full_token* i)
    184 {
    185    if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
    186       memcpy(o, i, sizeof(*o));
    187    else
    188       copy_instruction(&o->FullInstruction, &i->FullInstruction);
    189 
    190 }
    191 
    192 /*
    193  * Optimize away things like:
    194  *    MUL OUT[0].xyz, TEMP[1], TEMP[2]
    195  *    MOV OUT[0].w, TEMP[2]
    196  * into:
    197  *    MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
    198  * This is useful for optimizing texenv.
    199  */
    200 static void i915_fpc_optimize_mov_after_alu(union i915_full_token* current, union i915_full_token* next)
    201 {
    202    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    203         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    204         op_commutes(current->FullInstruction.Instruction.Opcode) &&
    205         current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
    206         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
    207         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
    208         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[1]) &&
    209         !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
    210         is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
    211         is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
    212         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
    213    {
    214       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
    215 
    216       set_neutral_element_swizzle(&current->FullInstruction.Src[1], 0, 0);
    217       set_neutral_element_swizzle(&current->FullInstruction.Src[0],
    218                                   next->FullInstruction.Dst[0].Register.WriteMask,
    219                                   op_neutral_element(current->FullInstruction.Instruction.Opcode));
    220 
    221       current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
    222                                                            next->FullInstruction.Dst[0].Register.WriteMask;
    223       return;
    224    }
    225 
    226    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    227         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    228         op_commutes(current->FullInstruction.Instruction.Opcode) &&
    229         current->FullInstruction.Instruction.Saturate == next->FullInstruction.Instruction.Saturate &&
    230         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
    231         same_dst_reg(&next->FullInstruction.Dst[0], &current->FullInstruction.Dst[0]) &&
    232         same_src_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0]) &&
    233         !same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) &&
    234         is_unswizzled(&current->FullInstruction.Src[0], current->FullInstruction.Dst[0].Register.WriteMask) &&
    235         is_unswizzled(&current->FullInstruction.Src[1], current->FullInstruction.Dst[0].Register.WriteMask) &&
    236         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) )
    237    {
    238       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
    239 
    240       set_neutral_element_swizzle(&current->FullInstruction.Src[0], 0, 0);
    241       set_neutral_element_swizzle(&current->FullInstruction.Src[1],
    242                                   next->FullInstruction.Dst[0].Register.WriteMask,
    243                                   op_neutral_element(current->FullInstruction.Instruction.Opcode));
    244 
    245       current->FullInstruction.Dst[0].Register.WriteMask = current->FullInstruction.Dst[0].Register.WriteMask |
    246                                                            next->FullInstruction.Dst[0].Register.WriteMask;
    247       return;
    248    }
    249 }
    250 
    251 /*
    252  * Optimize away things like:
    253  *    MOV TEMP[0].xyz TEMP[0].xyzx
    254  * into:
    255  *    NOP
    256  */
    257 static boolean i915_fpc_useless_mov(union tgsi_full_token* tgsi_current)
    258 {
    259    union i915_full_token current;
    260    copy_token(&current , tgsi_current);
    261    if ( current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    262         current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
    263         has_destination(current.FullInstruction.Instruction.Opcode) &&
    264         current.FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
    265         current.FullInstruction.Src[0].Register.Absolute == 0 &&
    266         current.FullInstruction.Src[0].Register.Negate == 0 &&
    267         is_unswizzled(&current.FullInstruction.Src[0], current.FullInstruction.Dst[0].Register.WriteMask) &&
    268         same_src_dst_reg(&current.FullInstruction.Src[0], &current.FullInstruction.Dst[0]) )
    269    {
    270       return TRUE;
    271    }
    272    return FALSE;
    273 }
    274 
    275 /*
    276  * Optimize away things like:
    277  *    *** TEMP[0], TEMP[1], TEMP[2]
    278  *    MOV OUT[0] TEMP[0]
    279  * into:
    280  *    *** OUT[0], TEMP[1], TEMP[2]
    281  */
    282 static void i915_fpc_optimize_useless_mov_after_inst(union i915_full_token* current, union i915_full_token* next)
    283 {
    284    if ( current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    285         next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION  &&
    286         next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
    287         has_destination(current->FullInstruction.Instruction.Opcode) &&
    288         next->FullInstruction.Instruction.Saturate == TGSI_SAT_NONE &&
    289         next->FullInstruction.Src[0].Register.Absolute == 0 &&
    290         next->FullInstruction.Src[0].Register.Negate == 0 &&
    291         next->FullInstruction.Dst[0].Register.File == TGSI_FILE_OUTPUT &&
    292         is_unswizzled(&next->FullInstruction.Src[0], next->FullInstruction.Dst[0].Register.WriteMask) &&
    293         current->FullInstruction.Dst[0].Register.WriteMask == next->FullInstruction.Dst[0].Register.WriteMask &&
    294         same_src_dst_reg(&next->FullInstruction.Src[0], &current->FullInstruction.Dst[0]) )
    295    {
    296       next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
    297 
    298       current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];
    299       return;
    300    }
    301 }
    302 
    303 struct i915_token_list* i915_optimize(const struct tgsi_token *tokens)
    304 {
    305    struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
    306    struct tgsi_parse_context parse;
    307    int i = 0;
    308 
    309    out_tokens->NumTokens = 0;
    310 
    311    /* Count the tokens */
    312    tgsi_parse_init( &parse, tokens );
    313    while( !tgsi_parse_end_of_tokens( &parse ) ) {
    314       tgsi_parse_token( &parse );
    315       out_tokens->NumTokens++;
    316    }
    317    tgsi_parse_free (&parse);
    318 
    319    /* Allocate our tokens */
    320    out_tokens->Tokens = MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
    321 
    322    tgsi_parse_init( &parse, tokens );
    323    while( !tgsi_parse_end_of_tokens( &parse ) ) {
    324       tgsi_parse_token( &parse );
    325 
    326       if (i915_fpc_useless_mov(&parse.FullToken)) {
    327          out_tokens->NumTokens--;
    328          continue;
    329       }
    330 
    331       copy_token(&out_tokens->Tokens[i] , &parse.FullToken);
    332 
    333       if (i > 0) {
    334          i915_fpc_optimize_useless_mov_after_inst(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
    335          i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
    336       }
    337       i++;
    338    }
    339    tgsi_parse_free (&parse);
    340 
    341    return out_tokens;
    342 }
    343 
    344 void i915_optimize_free(struct i915_token_list* tokens)
    345 {
    346    free(tokens->Tokens);
    347    free(tokens);
    348 }
    349 
    350 
    351