Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright  2010 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file lower_instructions.cpp
     26  *
     27  * Many GPUs lack native instructions for certain expression operations, and
     28  * must replace them with some other expression tree.  This pass lowers some
     29  * of the most common cases, allowing the lowering code to be implemented once
     30  * rather than in each driver backend.
     31  *
     32  * Currently supported transformations:
     33  * - SUB_TO_ADD_NEG
     34  * - DIV_TO_MUL_RCP
     35  * - EXP_TO_EXP2
     36  * - POW_TO_EXP2
     37  * - LOG_TO_LOG2
     38  * - MOD_TO_FRACT
     39  *
     40  * SUB_TO_ADD_NEG:
     41  * ---------------
     42  * Breaks an ir_binop_sub expression down to add(op0, neg(op1))
     43  *
     44  * This simplifies expression reassociation, and for many backends
     45  * there is no subtract operation separate from adding the negation.
     46  * For backends with native subtract operations, they will probably
     47  * want to recognize add(op0, neg(op1)) or the other way around to
     48  * produce a subtract anyway.
     49  *
     50  * DIV_TO_MUL_RCP:
     51  * ---------------
     52  * Breaks an ir_unop_div expression down to op0 * (rcp(op1)).
     53  *
     54  * Many GPUs don't have a divide instruction (945 and 965 included),
     55  * but they do have an RCP instruction to compute an approximate
     56  * reciprocal.  By breaking the operation down, constant reciprocals
     57  * can get constant folded.
     58  *
     59  * EXP_TO_EXP2 and LOG_TO_LOG2:
     60  * ----------------------------
     61  * Many GPUs don't have a base e log or exponent instruction, but they
     62  * do have base 2 versions, so this pass converts exp and log to exp2
     63  * and log2 operations.
     64  *
     65  * POW_TO_EXP2:
     66  * -----------
     67  * Many older GPUs don't have an x**y instruction.  For these GPUs, convert
     68  * x**y to 2**(y * log2(x)).
     69  *
     70  * MOD_TO_FRACT:
     71  * -------------
     72  * Breaks an ir_unop_mod expression down to (op1 * fract(op0 / op1))
     73  *
     74  * Many GPUs don't have a MOD instruction (945 and 965 included), and
     75  * if we have to break it down like this anyway, it gives an
     76  * opportunity to do things like constant fold the (1.0 / op1) easily.
     77  */
     78 
     79 #include "main/core.h" /* for M_LOG2E */
     80 #include "glsl_types.h"
     81 #include "ir.h"
     82 #include "ir_optimization.h"
     83 
     84 class lower_instructions_visitor : public ir_hierarchical_visitor {
     85 public:
     86    lower_instructions_visitor(unsigned lower)
     87       : progress(false), lower(lower) { }
     88 
     89    ir_visitor_status visit_leave(ir_expression *);
     90 
     91    bool progress;
     92 
     93 private:
     94    unsigned lower; /** Bitfield of which operations to lower */
     95 
     96    void sub_to_add_neg(ir_expression *);
     97    void div_to_mul_rcp(ir_expression *);
     98    void mod_to_fract(ir_expression *);
     99    void exp_to_exp2(ir_expression *);
    100    void pow_to_exp2(ir_expression *);
    101    void log_to_log2(ir_expression *);
    102 };
    103 
    104 /**
    105  * Determine if a particular type of lowering should occur
    106  */
    107 #define lowering(x) (this->lower & x)
    108 
    109 bool
    110 lower_instructions(exec_list *instructions, unsigned what_to_lower)
    111 {
    112    lower_instructions_visitor v(what_to_lower);
    113 
    114    visit_list_elements(&v, instructions);
    115    return v.progress;
    116 }
    117 
    118 void
    119 lower_instructions_visitor::sub_to_add_neg(ir_expression *ir)
    120 {
    121    ir->operation = ir_binop_add;
    122    ir->operands[1] = new(ir) ir_expression(ir_unop_neg, ir->operands[1]->type,
    123 					   ir->operands[1], NULL);
    124    this->progress = true;
    125 }
    126 
    127 void
    128 lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
    129 {
    130    if (!ir->operands[1]->type->is_integer()) {
    131       /* New expression for the 1.0 / op1 */
    132       ir_rvalue *expr;
    133       expr = new(ir) ir_expression(ir_unop_rcp,
    134 				   ir->operands[1]->type,
    135 				   ir->operands[1],
    136 				   NULL);
    137 
    138       /* op0 / op1 -> op0 * (1.0 / op1) */
    139       ir->operation = ir_binop_mul;
    140       ir->operands[1] = expr;
    141    } else {
    142       /* Be careful with integer division -- we need to do it as a
    143        * float and re-truncate, since rcp(n > 1) of an integer would
    144        * just be 0.
    145        */
    146       ir_rvalue *op0, *op1;
    147       const struct glsl_type *vec_type;
    148 
    149       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
    150 					 ir->operands[1]->type->vector_elements,
    151 					 ir->operands[1]->type->matrix_columns);
    152 
    153       if (ir->operands[1]->type->base_type == GLSL_TYPE_INT)
    154 	 op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL);
    155       else
    156 	 op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL);
    157 
    158       op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL);
    159 
    160       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
    161 					 ir->operands[0]->type->vector_elements,
    162 					 ir->operands[0]->type->matrix_columns);
    163 
    164       if (ir->operands[0]->type->base_type == GLSL_TYPE_INT)
    165 	 op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL);
    166       else
    167 	 op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);
    168 
    169       op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);
    170 
    171       ir->operation = ir_unop_f2i;
    172       ir->operands[0] = op0;
    173       ir->operands[1] = NULL;
    174    }
    175 
    176    this->progress = true;
    177 }
    178 
    179 void
    180 lower_instructions_visitor::exp_to_exp2(ir_expression *ir)
    181 {
    182    ir_constant *log2_e = new(ir) ir_constant(float(M_LOG2E));
    183 
    184    ir->operation = ir_unop_exp2;
    185    ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[0]->type,
    186 					   ir->operands[0], log2_e);
    187    this->progress = true;
    188 }
    189 
    190 void
    191 lower_instructions_visitor::pow_to_exp2(ir_expression *ir)
    192 {
    193    ir_expression *const log2_x =
    194       new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
    195 			    ir->operands[0]);
    196 
    197    ir->operation = ir_unop_exp2;
    198    ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[1]->type,
    199 					   ir->operands[1], log2_x);
    200    ir->operands[1] = NULL;
    201    this->progress = true;
    202 }
    203 
    204 void
    205 lower_instructions_visitor::log_to_log2(ir_expression *ir)
    206 {
    207    ir->operation = ir_binop_mul;
    208    ir->operands[0] = new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
    209 					   ir->operands[0], NULL);
    210    ir->operands[1] = new(ir) ir_constant(float(1.0 / M_LOG2E));
    211    this->progress = true;
    212 }
    213 
    214 void
    215 lower_instructions_visitor::mod_to_fract(ir_expression *ir)
    216 {
    217    ir_variable *temp = new(ir) ir_variable(ir->operands[1]->type, "mod_b",
    218 					   ir_var_temporary);
    219    this->base_ir->insert_before(temp);
    220 
    221    ir_assignment *const assign =
    222       new(ir) ir_assignment(new(ir) ir_dereference_variable(temp),
    223 			    ir->operands[1], NULL);
    224 
    225    this->base_ir->insert_before(assign);
    226 
    227    ir_expression *const div_expr =
    228       new(ir) ir_expression(ir_binop_div, ir->operands[0]->type,
    229 			    ir->operands[0],
    230 			    new(ir) ir_dereference_variable(temp));
    231 
    232    /* Don't generate new IR that would need to be lowered in an additional
    233     * pass.
    234     */
    235    if (lowering(DIV_TO_MUL_RCP))
    236       div_to_mul_rcp(div_expr);
    237 
    238    ir_rvalue *expr = new(ir) ir_expression(ir_unop_fract,
    239 					   ir->operands[0]->type,
    240 					   div_expr,
    241 					   NULL);
    242 
    243    ir->operation = ir_binop_mul;
    244    ir->operands[0] = new(ir) ir_dereference_variable(temp);
    245    ir->operands[1] = expr;
    246    this->progress = true;
    247 }
    248 
    249 ir_visitor_status
    250 lower_instructions_visitor::visit_leave(ir_expression *ir)
    251 {
    252    switch (ir->operation) {
    253    case ir_binop_sub:
    254       if (lowering(SUB_TO_ADD_NEG))
    255 	 sub_to_add_neg(ir);
    256       break;
    257 
    258    case ir_binop_div:
    259       if (lowering(DIV_TO_MUL_RCP))
    260 	 div_to_mul_rcp(ir);
    261       break;
    262 
    263    case ir_unop_exp:
    264       if (lowering(EXP_TO_EXP2))
    265 	 exp_to_exp2(ir);
    266       break;
    267 
    268    case ir_unop_log:
    269       if (lowering(LOG_TO_LOG2))
    270 	 log_to_log2(ir);
    271       break;
    272 
    273    case ir_binop_mod:
    274       if (lowering(MOD_TO_FRACT))
    275 	 mod_to_fract(ir);
    276       break;
    277 
    278    case ir_binop_pow:
    279       if (lowering(POW_TO_EXP2))
    280 	 pow_to_exp2(ir);
    281       break;
    282 
    283    default:
    284       return visit_continue;
    285    }
    286 
    287    return visit_continue;
    288 }
    289