Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright  2010 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file lower_vector.cpp
     26  * IR lowering pass to remove some types of ir_quadop_vector
     27  *
     28  * \author Ian Romanick <ian.d.romanick (at) intel.com>
     29  */
     30 
     31 #include "ir.h"
     32 #include "ir_rvalue_visitor.h"
     33 
     34 namespace {
     35 
     36 class lower_vector_visitor : public ir_rvalue_visitor {
     37 public:
     38    lower_vector_visitor() : dont_lower_swz(false), progress(false)
     39    {
     40       /* empty */
     41    }
     42 
     43    void handle_rvalue(ir_rvalue **rvalue);
     44 
     45    /**
     46     * Should SWZ-like expressions be lowered?
     47     */
     48    bool dont_lower_swz;
     49 
     50    bool progress;
     51 };
     52 
     53 } /* anonymous namespace */
     54 
     55 /**
     56  * Determine if an IR expression tree looks like an extended swizzle
     57  *
     58  * Extended swizzles consist of access of a single vector source (with possible
     59  * per component negation) and the constants -1, 0, or 1.
     60  */
     61 static bool
     62 is_extended_swizzle(ir_expression *ir)
     63 {
     64    /* Track any variables that are accessed by this expression.
     65     */
     66    ir_variable *var = NULL;
     67 
     68    assert(ir->operation == ir_quadop_vector);
     69 
     70    for (unsigned i = 0; i < ir->type->vector_elements; i++) {
     71       ir_rvalue *op = ir->operands[i];
     72 
     73       while (op != NULL) {
     74 	 switch (op->ir_type) {
     75 	 case ir_type_constant: {
     76 	    const ir_constant *const c = op->as_constant();
     77 
     78 	    if (!c->is_one() && !c->is_zero() && !c->is_negative_one())
     79 	       return false;
     80 
     81 	    op = NULL;
     82 	    break;
     83 	 }
     84 
     85 	 case ir_type_dereference_variable: {
     86 	    ir_dereference_variable *const d = (ir_dereference_variable *) op;
     87 
     88 	    if ((var != NULL) && (var != d->var))
     89 	       return false;
     90 
     91 	    var = d->var;
     92 	    op = NULL;
     93 	    break;
     94 	 }
     95 
     96 	 case ir_type_expression: {
     97 	    ir_expression *const ex = (ir_expression *) op;
     98 
     99 	    if (ex->operation != ir_unop_neg)
    100 	       return false;
    101 
    102 	    op = ex->operands[0];
    103 	    break;
    104 	 }
    105 
    106 	 case ir_type_swizzle:
    107 	    op = ((ir_swizzle *) op)->val;
    108 	    break;
    109 
    110 	 default:
    111 	    return false;
    112 	 }
    113       }
    114    }
    115 
    116    return true;
    117 }
    118 
    119 void
    120 lower_vector_visitor::handle_rvalue(ir_rvalue **rvalue)
    121 {
    122    if (!*rvalue)
    123       return;
    124 
    125    ir_expression *expr = (*rvalue)->as_expression();
    126    if ((expr == NULL) || (expr->operation != ir_quadop_vector))
    127       return;
    128 
    129    if (this->dont_lower_swz && is_extended_swizzle(expr))
    130       return;
    131 
    132    /* FINISHME: Is this the right thing to use for the ralloc context?
    133     */
    134    void *const mem_ctx = expr;
    135 
    136    assert(expr->type->vector_elements == expr->num_operands);
    137 
    138    /* Generate a temporary with the same type as the ir_quadop_operation.
    139     */
    140    ir_variable *const temp =
    141       new(mem_ctx) ir_variable(expr->type, "vecop_tmp", ir_var_temporary);
    142 
    143    this->base_ir->insert_before(temp);
    144 
    145    /* Counter of the number of components collected so far.
    146     */
    147    unsigned assigned;
    148 
    149    /* Write-mask in the destination that receives counted by 'assigned'.
    150     */
    151    unsigned write_mask;
    152 
    153 
    154    /* Generate upto four assignments to that variable.  Try to group component
    155     * assignments together:
    156     *
    157     * - All constant components can be assigned at once.
    158     * - All assigments of components from a single variable with the same
    159     *   unary operator can be assigned at once.
    160     */
    161    ir_constant_data d = { { 0 } };
    162 
    163    assigned = 0;
    164    write_mask = 0;
    165    for (unsigned i = 0; i < expr->type->vector_elements; i++) {
    166       const ir_constant *const c = expr->operands[i]->as_constant();
    167 
    168       if (c == NULL)
    169 	 continue;
    170 
    171       switch (expr->type->base_type) {
    172       case GLSL_TYPE_UINT:  d.u[assigned] = c->value.u[0]; break;
    173       case GLSL_TYPE_INT:   d.i[assigned] = c->value.i[0]; break;
    174       case GLSL_TYPE_FLOAT: d.f[assigned] = c->value.f[0]; break;
    175       case GLSL_TYPE_BOOL:  d.b[assigned] = c->value.b[0]; break;
    176       default:              assert(!"Should not get here."); break;
    177       }
    178 
    179       write_mask |= (1U << i);
    180       assigned++;
    181    }
    182 
    183    assert((write_mask == 0) == (assigned == 0));
    184 
    185    /* If there were constant values, generate an assignment.
    186     */
    187    if (assigned > 0) {
    188       ir_constant *const c =
    189 	 new(mem_ctx) ir_constant(glsl_type::get_instance(expr->type->base_type,
    190 							  assigned, 1),
    191 				  &d);
    192       ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp);
    193       ir_assignment *const assign =
    194 	 new(mem_ctx) ir_assignment(lhs, c, NULL, write_mask);
    195 
    196       this->base_ir->insert_before(assign);
    197    }
    198 
    199    /* FINISHME: This should try to coalesce assignments.
    200     */
    201    for (unsigned i = 0; i < expr->type->vector_elements; i++) {
    202       if (expr->operands[i]->ir_type == ir_type_constant)
    203 	 continue;
    204 
    205       ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp);
    206       ir_assignment *const assign =
    207 	 new(mem_ctx) ir_assignment(lhs, expr->operands[i], NULL, (1U << i));
    208 
    209       this->base_ir->insert_before(assign);
    210       assigned++;
    211    }
    212 
    213    assert(assigned == expr->type->vector_elements);
    214 
    215    *rvalue = new(mem_ctx) ir_dereference_variable(temp);
    216    this->progress = true;
    217 }
    218 
    219 bool
    220 lower_quadop_vector(exec_list *instructions, bool dont_lower_swz)
    221 {
    222    lower_vector_visitor v;
    223 
    224    v.dont_lower_swz = dont_lower_swz;
    225    visit_list_elements(&v, instructions);
    226 
    227    return v.progress;
    228 }
    229