1 /* 2 * Copyright 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /** 25 * \file lower_vector.cpp 26 * IR lowering pass to remove some types of ir_quadop_vector 27 * 28 * \author Ian Romanick <ian.d.romanick (at) intel.com> 29 */ 30 31 #include "ir.h" 32 #include "ir_rvalue_visitor.h" 33 34 namespace { 35 36 class lower_vector_visitor : public ir_rvalue_visitor { 37 public: 38 lower_vector_visitor() : dont_lower_swz(false), progress(false) 39 { 40 /* empty */ 41 } 42 43 void handle_rvalue(ir_rvalue **rvalue); 44 45 /** 46 * Should SWZ-like expressions be lowered? 47 */ 48 bool dont_lower_swz; 49 50 bool progress; 51 }; 52 53 } /* anonymous namespace */ 54 55 /** 56 * Determine if an IR expression tree looks like an extended swizzle 57 * 58 * Extended swizzles consist of access of a single vector source (with possible 59 * per component negation) and the constants -1, 0, or 1. 60 */ 61 static bool 62 is_extended_swizzle(ir_expression *ir) 63 { 64 /* Track any variables that are accessed by this expression. 65 */ 66 ir_variable *var = NULL; 67 68 assert(ir->operation == ir_quadop_vector); 69 70 for (unsigned i = 0; i < ir->type->vector_elements; i++) { 71 ir_rvalue *op = ir->operands[i]; 72 73 while (op != NULL) { 74 switch (op->ir_type) { 75 case ir_type_constant: { 76 const ir_constant *const c = op->as_constant(); 77 78 if (!c->is_one() && !c->is_zero() && !c->is_negative_one()) 79 return false; 80 81 op = NULL; 82 break; 83 } 84 85 case ir_type_dereference_variable: { 86 ir_dereference_variable *const d = (ir_dereference_variable *) op; 87 88 if ((var != NULL) && (var != d->var)) 89 return false; 90 91 var = d->var; 92 op = NULL; 93 break; 94 } 95 96 case ir_type_expression: { 97 ir_expression *const ex = (ir_expression *) op; 98 99 if (ex->operation != ir_unop_neg) 100 return false; 101 102 op = ex->operands[0]; 103 break; 104 } 105 106 case ir_type_swizzle: 107 op = ((ir_swizzle *) op)->val; 108 break; 109 110 default: 111 return false; 112 } 113 } 114 } 115 116 return true; 117 } 118 119 void 120 lower_vector_visitor::handle_rvalue(ir_rvalue **rvalue) 121 { 122 if (!*rvalue) 123 return; 124 125 ir_expression *expr = (*rvalue)->as_expression(); 126 if ((expr == NULL) || (expr->operation != ir_quadop_vector)) 127 return; 128 129 if (this->dont_lower_swz && is_extended_swizzle(expr)) 130 return; 131 132 /* FINISHME: Is this the right thing to use for the ralloc context? 133 */ 134 void *const mem_ctx = expr; 135 136 assert(expr->type->vector_elements == expr->num_operands); 137 138 /* Generate a temporary with the same type as the ir_quadop_operation. 139 */ 140 ir_variable *const temp = 141 new(mem_ctx) ir_variable(expr->type, "vecop_tmp", ir_var_temporary); 142 143 this->base_ir->insert_before(temp); 144 145 /* Counter of the number of components collected so far. 146 */ 147 unsigned assigned; 148 149 /* Write-mask in the destination that receives counted by 'assigned'. 150 */ 151 unsigned write_mask; 152 153 154 /* Generate upto four assignments to that variable. Try to group component 155 * assignments together: 156 * 157 * - All constant components can be assigned at once. 158 * - All assigments of components from a single variable with the same 159 * unary operator can be assigned at once. 160 */ 161 ir_constant_data d = { { 0 } }; 162 163 assigned = 0; 164 write_mask = 0; 165 for (unsigned i = 0; i < expr->type->vector_elements; i++) { 166 const ir_constant *const c = expr->operands[i]->as_constant(); 167 168 if (c == NULL) 169 continue; 170 171 switch (expr->type->base_type) { 172 case GLSL_TYPE_UINT: d.u[assigned] = c->value.u[0]; break; 173 case GLSL_TYPE_INT: d.i[assigned] = c->value.i[0]; break; 174 case GLSL_TYPE_FLOAT: d.f[assigned] = c->value.f[0]; break; 175 case GLSL_TYPE_BOOL: d.b[assigned] = c->value.b[0]; break; 176 default: assert(!"Should not get here."); break; 177 } 178 179 write_mask |= (1U << i); 180 assigned++; 181 } 182 183 assert((write_mask == 0) == (assigned == 0)); 184 185 /* If there were constant values, generate an assignment. 186 */ 187 if (assigned > 0) { 188 ir_constant *const c = 189 new(mem_ctx) ir_constant(glsl_type::get_instance(expr->type->base_type, 190 assigned, 1), 191 &d); 192 ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp); 193 ir_assignment *const assign = 194 new(mem_ctx) ir_assignment(lhs, c, NULL, write_mask); 195 196 this->base_ir->insert_before(assign); 197 } 198 199 /* FINISHME: This should try to coalesce assignments. 200 */ 201 for (unsigned i = 0; i < expr->type->vector_elements; i++) { 202 if (expr->operands[i]->ir_type == ir_type_constant) 203 continue; 204 205 ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp); 206 ir_assignment *const assign = 207 new(mem_ctx) ir_assignment(lhs, expr->operands[i], NULL, (1U << i)); 208 209 this->base_ir->insert_before(assign); 210 assigned++; 211 } 212 213 assert(assigned == expr->type->vector_elements); 214 215 *rvalue = new(mem_ctx) ir_dereference_variable(temp); 216 this->progress = true; 217 } 218 219 bool 220 lower_quadop_vector(exec_list *instructions, bool dont_lower_swz) 221 { 222 lower_vector_visitor v; 223 224 v.dont_lower_swz = dont_lower_swz; 225 visit_list_elements(&v, instructions); 226 227 return v.progress; 228 } 229