Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright  2010 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file lower_instructions.cpp
     26  *
     27  * Many GPUs lack native instructions for certain expression operations, and
     28  * must replace them with some other expression tree.  This pass lowers some
     29  * of the most common cases, allowing the lowering code to be implemented once
     30  * rather than in each driver backend.
     31  *
     32  * Currently supported transformations:
     33  * - SUB_TO_ADD_NEG
     34  * - DIV_TO_MUL_RCP
     35  * - INT_DIV_TO_MUL_RCP
     36  * - EXP_TO_EXP2
     37  * - POW_TO_EXP2
     38  * - LOG_TO_LOG2
     39  * - MOD_TO_FRACT
     40  *
     41  * SUB_TO_ADD_NEG:
     42  * ---------------
     43  * Breaks an ir_binop_sub expression down to add(op0, neg(op1))
     44  *
     45  * This simplifies expression reassociation, and for many backends
     46  * there is no subtract operation separate from adding the negation.
     47  * For backends with native subtract operations, they will probably
     48  * want to recognize add(op0, neg(op1)) or the other way around to
     49  * produce a subtract anyway.
     50  *
     51  * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP:
     52  * --------------------------------------
     53  * Breaks an ir_binop_div expression down to op0 * (rcp(op1)).
     54  *
     55  * Many GPUs don't have a divide instruction (945 and 965 included),
     56  * but they do have an RCP instruction to compute an approximate
     57  * reciprocal.  By breaking the operation down, constant reciprocals
     58  * can get constant folded.
     59  *
     60  * DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP
     61  * handles the integer case, converting to and from floating point so that
     62  * RCP is possible.
     63  *
     64  * EXP_TO_EXP2 and LOG_TO_LOG2:
     65  * ----------------------------
     66  * Many GPUs don't have a base e log or exponent instruction, but they
     67  * do have base 2 versions, so this pass converts exp and log to exp2
     68  * and log2 operations.
     69  *
     70  * POW_TO_EXP2:
     71  * -----------
     72  * Many older GPUs don't have an x**y instruction.  For these GPUs, convert
     73  * x**y to 2**(y * log2(x)).
     74  *
     75  * MOD_TO_FRACT:
     76  * -------------
     77  * Breaks an ir_binop_mod expression down to (op1 * fract(op0 / op1))
     78  *
     79  * Many GPUs don't have a MOD instruction (945 and 965 included), and
     80  * if we have to break it down like this anyway, it gives an
     81  * opportunity to do things like constant fold the (1.0 / op1) easily.
     82  */
     83 
     84 #include "main/core.h" /* for M_LOG2E */
     85 #include "glsl_types.h"
     86 #include "ir.h"
     87 #include "ir_optimization.h"
     88 
     89 class lower_instructions_visitor : public ir_hierarchical_visitor {
     90 public:
     91    lower_instructions_visitor(unsigned lower)
     92       : progress(false), lower(lower) { }
     93 
     94    ir_visitor_status visit_leave(ir_expression *);
     95 
     96    bool progress;
     97 
     98 private:
     99    unsigned lower; /** Bitfield of which operations to lower */
    100 
    101    void sub_to_add_neg(ir_expression *);
    102    void div_to_mul_rcp(ir_expression *);
    103    void int_div_to_mul_rcp(ir_expression *);
    104    void mod_to_fract(ir_expression *);
    105    void exp_to_exp2(ir_expression *);
    106    void pow_to_exp2(ir_expression *);
    107    void log_to_log2(ir_expression *);
    108 };
    109 
    110 /**
    111  * Determine if a particular type of lowering should occur
    112  */
    113 #define lowering(x) (this->lower & x)
    114 
    115 bool
    116 lower_instructions(exec_list *instructions, unsigned what_to_lower)
    117 {
    118    lower_instructions_visitor v(what_to_lower);
    119 
    120    visit_list_elements(&v, instructions);
    121    return v.progress;
    122 }
    123 
    124 void
    125 lower_instructions_visitor::sub_to_add_neg(ir_expression *ir)
    126 {
    127    ir->operation = ir_binop_add;
    128    ir->operands[1] = new(ir) ir_expression(ir_unop_neg, ir->operands[1]->type,
    129 					   ir->operands[1], NULL);
    130    this->progress = true;
    131 }
    132 
    133 void
    134 lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
    135 {
    136    assert(ir->operands[1]->type->is_float());
    137 
    138    /* New expression for the 1.0 / op1 */
    139    ir_rvalue *expr;
    140    expr = new(ir) ir_expression(ir_unop_rcp,
    141 				ir->operands[1]->type,
    142 				ir->operands[1]);
    143 
    144    /* op0 / op1 -> op0 * (1.0 / op1) */
    145    ir->operation = ir_binop_mul;
    146    ir->operands[1] = expr;
    147 
    148    this->progress = true;
    149 }
    150 
    151 void
    152 lower_instructions_visitor::int_div_to_mul_rcp(ir_expression *ir)
    153 {
    154    assert(ir->operands[1]->type->is_integer());
    155 
    156    /* Be careful with integer division -- we need to do it as a
    157     * float and re-truncate, since rcp(n > 1) of an integer would
    158     * just be 0.
    159     */
    160    ir_rvalue *op0, *op1;
    161    const struct glsl_type *vec_type;
    162 
    163    vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
    164 				      ir->operands[1]->type->vector_elements,
    165 				      ir->operands[1]->type->matrix_columns);
    166 
    167    if (ir->operands[1]->type->base_type == GLSL_TYPE_INT)
    168       op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL);
    169    else
    170       op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL);
    171 
    172    op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL);
    173 
    174    vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
    175 				      ir->operands[0]->type->vector_elements,
    176 				      ir->operands[0]->type->matrix_columns);
    177 
    178    if (ir->operands[0]->type->base_type == GLSL_TYPE_INT)
    179       op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL);
    180    else
    181       op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);
    182 
    183    vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
    184 				      ir->type->vector_elements,
    185 				      ir->type->matrix_columns);
    186 
    187    op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);
    188 
    189    if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) {
    190       ir->operation = ir_unop_f2i;
    191       ir->operands[0] = op0;
    192    } else {
    193       ir->operation = ir_unop_i2u;
    194       ir->operands[0] = new(ir) ir_expression(ir_unop_f2i, op0);
    195    }
    196    ir->operands[1] = NULL;
    197 
    198    this->progress = true;
    199 }
    200 
    201 void
    202 lower_instructions_visitor::exp_to_exp2(ir_expression *ir)
    203 {
    204    ir_constant *log2_e = new(ir) ir_constant(float(M_LOG2E));
    205 
    206    ir->operation = ir_unop_exp2;
    207    ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[0]->type,
    208 					   ir->operands[0], log2_e);
    209    this->progress = true;
    210 }
    211 
    212 void
    213 lower_instructions_visitor::pow_to_exp2(ir_expression *ir)
    214 {
    215    ir_expression *const log2_x =
    216       new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
    217 			    ir->operands[0]);
    218 
    219    ir->operation = ir_unop_exp2;
    220    ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[1]->type,
    221 					   ir->operands[1], log2_x);
    222    ir->operands[1] = NULL;
    223    this->progress = true;
    224 }
    225 
    226 void
    227 lower_instructions_visitor::log_to_log2(ir_expression *ir)
    228 {
    229    ir->operation = ir_binop_mul;
    230    ir->operands[0] = new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
    231 					   ir->operands[0], NULL);
    232    ir->operands[1] = new(ir) ir_constant(float(1.0 / M_LOG2E));
    233    this->progress = true;
    234 }
    235 
    236 void
    237 lower_instructions_visitor::mod_to_fract(ir_expression *ir)
    238 {
    239    ir_variable *temp = new(ir) ir_variable(ir->operands[1]->type, "mod_b",
    240 					   ir_var_temporary);
    241    this->base_ir->insert_before(temp);
    242 
    243    ir_assignment *const assign =
    244       new(ir) ir_assignment(new(ir) ir_dereference_variable(temp),
    245 			    ir->operands[1], NULL);
    246 
    247    this->base_ir->insert_before(assign);
    248 
    249    ir_expression *const div_expr =
    250       new(ir) ir_expression(ir_binop_div, ir->operands[0]->type,
    251 			    ir->operands[0],
    252 			    new(ir) ir_dereference_variable(temp));
    253 
    254    /* Don't generate new IR that would need to be lowered in an additional
    255     * pass.
    256     */
    257    if (lowering(DIV_TO_MUL_RCP))
    258       div_to_mul_rcp(div_expr);
    259 
    260    ir_rvalue *expr = new(ir) ir_expression(ir_unop_fract,
    261 					   ir->operands[0]->type,
    262 					   div_expr,
    263 					   NULL);
    264 
    265    ir->operation = ir_binop_mul;
    266    ir->operands[0] = new(ir) ir_dereference_variable(temp);
    267    ir->operands[1] = expr;
    268    this->progress = true;
    269 }
    270 
    271 ir_visitor_status
    272 lower_instructions_visitor::visit_leave(ir_expression *ir)
    273 {
    274    switch (ir->operation) {
    275    case ir_binop_sub:
    276       if (lowering(SUB_TO_ADD_NEG))
    277 	 sub_to_add_neg(ir);
    278       break;
    279 
    280    case ir_binop_div:
    281       if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP))
    282 	 int_div_to_mul_rcp(ir);
    283       else if (ir->operands[1]->type->is_float() && lowering(DIV_TO_MUL_RCP))
    284 	 div_to_mul_rcp(ir);
    285       break;
    286 
    287    case ir_unop_exp:
    288       if (lowering(EXP_TO_EXP2))
    289 	 exp_to_exp2(ir);
    290       break;
    291 
    292    case ir_unop_log:
    293       if (lowering(LOG_TO_LOG2))
    294 	 log_to_log2(ir);
    295       break;
    296 
    297    case ir_binop_mod:
    298       if (lowering(MOD_TO_FRACT) && ir->type->is_float())
    299 	 mod_to_fract(ir);
    300       break;
    301 
    302    case ir_binop_pow:
    303       if (lowering(POW_TO_EXP2))
    304 	 pow_to_exp2(ir);
    305       break;
    306 
    307    default:
    308       return visit_continue;
    309    }
    310 
    311    return visit_continue;
    312 }
    313