1 /* 2 * Copyright 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /** 25 * \file ir_div_to_mul_rcp.cpp 26 * 27 * Breaks an ir_unop_div expression down to op0 * (rcp(op1)). 28 * 29 * Many GPUs don't have a divide instruction (945 and 965 included), 30 * but they do have an RCP instruction to compute an approximate 31 * reciprocal. By breaking the operation down, constant reciprocals 32 * can get constant folded. 33 */ 34 35 #include "ir.h" 36 #include "glsl_types.h" 37 38 class ir_div_to_mul_rcp_visitor : public ir_hierarchical_visitor { 39 public: 40 ir_div_to_mul_rcp_visitor() 41 { 42 this->made_progress = false; 43 } 44 45 ir_visitor_status visit_leave(ir_expression *); 46 47 bool made_progress; 48 }; 49 50 bool 51 do_div_to_mul_rcp(exec_list *instructions) 52 { 53 ir_div_to_mul_rcp_visitor v; 54 55 visit_list_elements(&v, instructions); 56 return v.made_progress; 57 } 58 59 ir_visitor_status 60 ir_div_to_mul_rcp_visitor::visit_leave(ir_expression *ir) 61 { 62 if (ir->operation != ir_binop_div) 63 return visit_continue; 64 65 if (ir->operands[1]->type->base_type != GLSL_TYPE_INT && 66 ir->operands[1]->type->base_type != GLSL_TYPE_UINT) { 67 /* New expression for the 1.0 / op1 */ 68 ir_rvalue *expr; 69 expr = new(ir) ir_expression(ir_unop_rcp, 70 ir->operands[1]->type, 71 ir->operands[1], 72 NULL); 73 74 /* op0 / op1 -> op0 * (1.0 / op1) */ 75 ir->operation = ir_binop_mul; 76 ir->operands[1] = expr; 77 } else { 78 /* Be careful with integer division -- we need to do it as a 79 * float and re-truncate, since rcp(n > 1) of an integer would 80 * just be 0. 81 */ 82 ir_rvalue *op0, *op1; 83 const struct glsl_type *vec_type; 84 85 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 86 ir->operands[1]->type->vector_elements, 87 ir->operands[1]->type->matrix_columns); 88 89 if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) 90 op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL); 91 else 92 op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL); 93 94 op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL); 95 96 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 97 ir->operands[0]->type->vector_elements, 98 ir->operands[0]->type->matrix_columns); 99 100 if (ir->operands[0]->type->base_type == GLSL_TYPE_INT) 101 op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL); 102 else 103 op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL); 104 105 op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1); 106 107 ir->operation = ir_unop_f2i; 108 ir->operands[0] = op0; 109 ir->operands[1] = NULL; 110 } 111 112 this->made_progress = true; 113 114 return visit_continue; 115 } 116