Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright  2013 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file opt_vectorize.cpp
     26  *
     27  * Combines scalar assignments of the same expression (modulo swizzle) to
     28  * multiple channels of the same variable into a single vectorized expression
     29  * and assignment.
     30  *
     31  * Many generated shaders contain scalarized code. That is, they contain
     32  *
     33  * r1.x = log2(v0.x);
     34  * r1.y = log2(v0.y);
     35  * r1.z = log2(v0.z);
     36  *
     37  * rather than
     38  *
     39  * r1.xyz = log2(v0.xyz);
     40  *
     41  * We look for consecutive assignments of the same expression (modulo swizzle)
     42  * to each channel of the same variable.
     43  *
     44  * For instance, we want to convert these three scalar operations
     45  *
     46  * (assign (x) (var_ref r1) (expression float log2 (swiz x (var_ref v0))))
     47  * (assign (y) (var_ref r1) (expression float log2 (swiz y (var_ref v0))))
     48  * (assign (z) (var_ref r1) (expression float log2 (swiz z (var_ref v0))))
     49  *
     50  * into a single vector operation
     51  *
     52  * (assign (xyz) (var_ref r1) (expression vec3 log2 (swiz xyz (var_ref v0))))
     53  */
     54 
     55 #include "ir.h"
     56 #include "ir_visitor.h"
     57 #include "ir_optimization.h"
     58 #include "compiler/glsl_types.h"
     59 #include "program/prog_instruction.h"
     60 
     61 namespace {
     62 
     63 class ir_vectorize_visitor : public ir_hierarchical_visitor {
     64 public:
     65    void clear()
     66    {
     67       assignment[0] = NULL;
     68       assignment[1] = NULL;
     69       assignment[2] = NULL;
     70       assignment[3] = NULL;
     71       current_assignment = NULL;
     72       last_assignment = NULL;
     73       channels = 0;
     74       has_swizzle = false;
     75    }
     76 
     77    ir_vectorize_visitor()
     78    {
     79       clear();
     80       progress = false;
     81    }
     82 
     83    virtual ir_visitor_status visit_enter(ir_assignment *);
     84    virtual ir_visitor_status visit_enter(ir_swizzle *);
     85    virtual ir_visitor_status visit_enter(ir_dereference_array *);
     86    virtual ir_visitor_status visit_enter(ir_expression *);
     87    virtual ir_visitor_status visit_enter(ir_if *);
     88    virtual ir_visitor_status visit_enter(ir_loop *);
     89    virtual ir_visitor_status visit_enter(ir_texture *);
     90 
     91    virtual ir_visitor_status visit_leave(ir_assignment *);
     92 
     93    void try_vectorize();
     94 
     95    ir_assignment *assignment[4];
     96    ir_assignment *current_assignment, *last_assignment;
     97    unsigned channels;
     98    bool has_swizzle;
     99 
    100    bool progress;
    101 };
    102 
    103 } /* unnamed namespace */
    104 
    105 /**
    106  * Rewrites the swizzles and types of a right-hand side of an assignment.
    107  *
    108  * From the example above, this function would be called (by visit_tree()) on
    109  * the nodes of the tree (expression float log2 (swiz z   (var_ref v0))),
    110  * rewriting it into     (expression vec3  log2 (swiz xyz (var_ref v0))).
    111  *
    112  * The function operates on ir_expressions (and its operands) and ir_swizzles.
    113  * For expressions it sets a new type and swizzles any non-expression and non-
    114  * swizzle scalar operands into appropriately sized vector arguments. For
    115  * example, if combining
    116  *
    117  * (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1))))
    118  * (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1))))
    119  *
    120  * where v1 is a scalar, rewrite_swizzle() would insert a swizzle on
    121  * (var_ref v1) such that the final result was
    122  *
    123  * (assign (xy) (var_ref r1) (expression vec2 + (swiz xy (var_ref v0))
    124  *                                              (swiz xx (var_ref v1))))
    125  *
    126  * For swizzles, it sets a new type, and if the variable being swizzled is a
    127  * vector it overwrites the swizzle mask with the ir_swizzle_mask passed as the
    128  * data parameter. If the swizzled variable is scalar, then the swizzle was
    129  * added by an earlier call to rewrite_swizzle() on an expression, so the
    130  * mask should not be modified.
    131  */
    132 static void
    133 rewrite_swizzle(ir_instruction *ir, void *data)
    134 {
    135    ir_swizzle_mask *mask = (ir_swizzle_mask *)data;
    136 
    137    switch (ir->ir_type) {
    138    case ir_type_swizzle: {
    139       ir_swizzle *swz = (ir_swizzle *)ir;
    140       if (swz->val->type->is_vector()) {
    141          swz->mask = *mask;
    142       }
    143       swz->type = glsl_type::get_instance(swz->type->base_type,
    144                                           mask->num_components, 1);
    145       break;
    146    }
    147    case ir_type_expression: {
    148       ir_expression *expr = (ir_expression *)ir;
    149       expr->type = glsl_type::get_instance(expr->type->base_type,
    150                                            mask->num_components, 1);
    151       for (unsigned i = 0; i < 4; i++) {
    152          if (expr->operands[i]) {
    153             ir_rvalue *rval = expr->operands[i]->as_rvalue();
    154             if (rval && rval->type->is_scalar() &&
    155                 !rval->as_expression() && !rval->as_swizzle()) {
    156                expr->operands[i] = new(ir) ir_swizzle(rval, 0, 0, 0, 0,
    157                                                       mask->num_components);
    158             }
    159          }
    160       }
    161       break;
    162    }
    163    default:
    164       break;
    165    }
    166 }
    167 
    168 /**
    169  * Attempt to vectorize the previously saved assignments, and clear them from
    170  * consideration.
    171  *
    172  * If the assignments are able to be combined, it modifies in-place the last
    173  * assignment seen to be an equivalent vector form of the scalar assignments.
    174  * It then removes the other now obsolete scalar assignments.
    175  */
    176 void
    177 ir_vectorize_visitor::try_vectorize()
    178 {
    179    if (this->last_assignment && this->channels > 1) {
    180       ir_swizzle_mask mask = {0, 0, 0, 0, channels, 0};
    181 
    182       this->last_assignment->write_mask = 0;
    183 
    184       for (unsigned i = 0, j = 0; i < 4; i++) {
    185          if (this->assignment[i]) {
    186             this->last_assignment->write_mask |= 1 << i;
    187 
    188             if (this->assignment[i] != this->last_assignment) {
    189                this->assignment[i]->remove();
    190             }
    191 
    192             switch (j) {
    193             case 0: mask.x = i; break;
    194             case 1: mask.y = i; break;
    195             case 2: mask.z = i; break;
    196             case 3: mask.w = i; break;
    197             }
    198 
    199             j++;
    200          }
    201       }
    202 
    203       visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask);
    204 
    205       this->progress = true;
    206    }
    207    clear();
    208 }
    209 
    210 /**
    211  * Returns whether the write mask is a single channel.
    212  */
    213 static bool
    214 single_channel_write_mask(unsigned write_mask)
    215 {
    216    return write_mask != 0 && (write_mask & (write_mask - 1)) == 0;
    217 }
    218 
    219 /**
    220  * Translates single-channeled write mask to single-channeled swizzle.
    221  */
    222 static unsigned
    223 write_mask_to_swizzle(unsigned write_mask)
    224 {
    225    switch (write_mask) {
    226    case WRITEMASK_X: return SWIZZLE_X;
    227    case WRITEMASK_Y: return SWIZZLE_Y;
    228    case WRITEMASK_Z: return SWIZZLE_Z;
    229    case WRITEMASK_W: return SWIZZLE_W;
    230    }
    231    unreachable("not reached");
    232 }
    233 
    234 /**
    235  * Returns whether a single-channeled write mask matches a swizzle.
    236  */
    237 static bool
    238 write_mask_matches_swizzle(unsigned write_mask,
    239                            const ir_swizzle *swz)
    240 {
    241    return ((write_mask == WRITEMASK_X && swz->mask.x == SWIZZLE_X) ||
    242            (write_mask == WRITEMASK_Y && swz->mask.x == SWIZZLE_Y) ||
    243            (write_mask == WRITEMASK_Z && swz->mask.x == SWIZZLE_Z) ||
    244            (write_mask == WRITEMASK_W && swz->mask.x == SWIZZLE_W));
    245 }
    246 
    247 /**
    248  * Upon entering an ir_assignment, attempt to vectorize the currently tracked
    249  * assignments if the current assignment is not suitable. Keep a pointer to
    250  * the current assignment.
    251  */
    252 ir_visitor_status
    253 ir_vectorize_visitor::visit_enter(ir_assignment *ir)
    254 {
    255    ir_dereference *lhs = this->last_assignment != NULL ?
    256                          this->last_assignment->lhs : NULL;
    257    ir_rvalue *rhs = this->last_assignment != NULL ?
    258                     this->last_assignment->rhs : NULL;
    259 
    260    if (ir->condition ||
    261        this->channels >= 4 ||
    262        !single_channel_write_mask(ir->write_mask) ||
    263        this->assignment[write_mask_to_swizzle(ir->write_mask)] != NULL ||
    264        (lhs && !ir->lhs->equals(lhs)) ||
    265        (rhs && !ir->rhs->equals(rhs, ir_type_swizzle))) {
    266       try_vectorize();
    267    }
    268 
    269    this->current_assignment = ir;
    270 
    271    return visit_continue;
    272 }
    273 
    274 /**
    275  * Upon entering an ir_swizzle, set ::has_swizzle if we're visiting from an
    276  * ir_assignment (i.e., that ::current_assignment is set) and the swizzle mask
    277  * matches the current assignment's write mask.
    278  *
    279  * If the write mask doesn't match the swizzle mask, remove the current
    280  * assignment from further consideration.
    281  */
    282 ir_visitor_status
    283 ir_vectorize_visitor::visit_enter(ir_swizzle *ir)
    284 {
    285    if (this->current_assignment) {
    286       if (write_mask_matches_swizzle(this->current_assignment->write_mask, ir)) {
    287          this->has_swizzle = true;
    288       } else {
    289          this->current_assignment = NULL;
    290       }
    291    }
    292    return visit_continue;
    293 }
    294 
    295 /* Upon entering an ir_array_dereference, remove the current assignment from
    296  * further consideration. Since the index of an array dereference must scalar,
    297  * we are not able to vectorize it.
    298  *
    299  * FINISHME: If all of scalar indices are identical we could vectorize.
    300  */
    301 ir_visitor_status
    302 ir_vectorize_visitor::visit_enter(ir_dereference_array *)
    303 {
    304    this->current_assignment = NULL;
    305    return visit_continue_with_parent;
    306 }
    307 
    308 /**
    309  * Upon entering an ir_expression, remove the current assignment from further
    310  * consideration if the expression operates horizontally on vectors.
    311  */
    312 ir_visitor_status
    313 ir_vectorize_visitor::visit_enter(ir_expression *ir)
    314 {
    315    if (ir->is_horizontal()) {
    316       this->current_assignment = NULL;
    317       return visit_continue_with_parent;
    318    }
    319    return visit_continue;
    320 }
    321 
    322 /* Since there is no statement to visit between the "then" and "else"
    323  * instructions try to vectorize before, in between, and after them to avoid
    324  * combining statements from different basic blocks.
    325  */
    326 ir_visitor_status
    327 ir_vectorize_visitor::visit_enter(ir_if *ir)
    328 {
    329    try_vectorize();
    330 
    331    visit_list_elements(this, &ir->then_instructions);
    332    try_vectorize();
    333 
    334    visit_list_elements(this, &ir->else_instructions);
    335    try_vectorize();
    336 
    337    return visit_continue_with_parent;
    338 }
    339 
    340 /* Since there is no statement to visit between the instructions in the body of
    341  * the loop and the instructions after it try to vectorize before and after the
    342  * body to avoid combining statements from different basic blocks.
    343  */
    344 ir_visitor_status
    345 ir_vectorize_visitor::visit_enter(ir_loop *ir)
    346 {
    347    try_vectorize();
    348 
    349    visit_list_elements(this, &ir->body_instructions);
    350    try_vectorize();
    351 
    352    return visit_continue_with_parent;
    353 }
    354 
    355 /**
    356  * Upon entering an ir_texture, remove the current assignment from
    357  * further consideration. Vectorizing multiple texture lookups into one
    358  * is wrong.
    359  */
    360 ir_visitor_status
    361 ir_vectorize_visitor::visit_enter(ir_texture *)
    362 {
    363    this->current_assignment = NULL;
    364    return visit_continue_with_parent;
    365 }
    366 
    367 /**
    368  * Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if
    369  * the swizzle mask(s) found were appropriate. Also save a pointer in
    370  * ::last_assignment so that we can compare future assignments with it.
    371  *
    372  * Finally, clear ::current_assignment and ::has_swizzle.
    373  */
    374 ir_visitor_status
    375 ir_vectorize_visitor::visit_leave(ir_assignment *ir)
    376 {
    377    if (this->has_swizzle && this->current_assignment) {
    378       assert(this->current_assignment == ir);
    379 
    380       unsigned channel = write_mask_to_swizzle(this->current_assignment->write_mask);
    381       this->assignment[channel] = ir;
    382       this->channels++;
    383 
    384       this->last_assignment = this->current_assignment;
    385    }
    386    this->current_assignment = NULL;
    387    this->has_swizzle = false;
    388    return visit_continue;
    389 }
    390 
    391 /**
    392  * Combines scalar assignments of the same expression (modulo swizzle) to
    393  * multiple channels of the same variable into a single vectorized expression
    394  * and assignment.
    395  */
    396 bool
    397 do_vectorize(exec_list *instructions)
    398 {
    399    ir_vectorize_visitor v;
    400 
    401    v.run(instructions);
    402 
    403    /* Try to vectorize the last assignments seen. */
    404    v.try_vectorize();
    405 
    406    return v.progress;
    407 }
    408