Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright  2010 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file ir_div_to_mul_rcp.cpp
     26  *
     27  * Breaks an ir_unop_div expression down to op0 * (rcp(op1)).
     28  *
     29  * Many GPUs don't have a divide instruction (945 and 965 included),
     30  * but they do have an RCP instruction to compute an approximate
     31  * reciprocal.  By breaking the operation down, constant reciprocals
     32  * can get constant folded.
     33  */
     34 
     35 #include "ir.h"
     36 #include "glsl_types.h"
     37 
     38 class ir_div_to_mul_rcp_visitor : public ir_hierarchical_visitor {
     39 public:
     40    ir_div_to_mul_rcp_visitor()
     41    {
     42       this->made_progress = false;
     43    }
     44 
     45    ir_visitor_status visit_leave(ir_expression *);
     46 
     47    bool made_progress;
     48 };
     49 
     50 bool
     51 do_div_to_mul_rcp(exec_list *instructions)
     52 {
     53    ir_div_to_mul_rcp_visitor v;
     54 
     55    visit_list_elements(&v, instructions);
     56    return v.made_progress;
     57 }
     58 
     59 ir_visitor_status
     60 ir_div_to_mul_rcp_visitor::visit_leave(ir_expression *ir)
     61 {
     62    if (ir->operation != ir_binop_div)
     63       return visit_continue;
     64 
     65    if (ir->operands[1]->type->base_type != GLSL_TYPE_INT &&
     66        ir->operands[1]->type->base_type != GLSL_TYPE_UINT) {
     67       /* New expression for the 1.0 / op1 */
     68       ir_rvalue *expr;
     69       expr = new(ir) ir_expression(ir_unop_rcp,
     70 				   ir->operands[1]->type,
     71 				   ir->operands[1],
     72 				   NULL);
     73 
     74       /* op0 / op1 -> op0 * (1.0 / op1) */
     75       ir->operation = ir_binop_mul;
     76       ir->operands[1] = expr;
     77    } else {
     78       /* Be careful with integer division -- we need to do it as a
     79        * float and re-truncate, since rcp(n > 1) of an integer would
     80        * just be 0.
     81        */
     82       ir_rvalue *op0, *op1;
     83       const struct glsl_type *vec_type;
     84 
     85       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
     86 					 ir->operands[1]->type->vector_elements,
     87 					 ir->operands[1]->type->matrix_columns);
     88 
     89       if (ir->operands[1]->type->base_type == GLSL_TYPE_INT)
     90 	 op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL);
     91       else
     92 	 op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL);
     93 
     94       op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL);
     95 
     96       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
     97 					 ir->operands[0]->type->vector_elements,
     98 					 ir->operands[0]->type->matrix_columns);
     99 
    100       if (ir->operands[0]->type->base_type == GLSL_TYPE_INT)
    101 	 op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL);
    102       else
    103 	 op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);
    104 
    105       op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);
    106 
    107       ir->operation = ir_unop_f2i;
    108       ir->operands[0] = op0;
    109       ir->operands[1] = NULL;
    110    }
    111 
    112    this->made_progress = true;
    113 
    114    return visit_continue;
    115 }
    116