1 /* 2 * Copyright 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /** 25 * \file opt_rebalance_tree.cpp 26 * 27 * Rebalances a reduction expression tree. 28 * 29 * For reduction operations (e.g., x + y + z + w) we generate an expression 30 * tree like 31 * 32 * + 33 * / \ 34 * + w 35 * / \ 36 * + z 37 * / \ 38 * x y 39 * 40 * which we can rebalance into 41 * 42 * + 43 * / \ 44 * / \ 45 * + + 46 * / \ / \ 47 * x y z w 48 * 49 * to get a better instruction scheduling. 50 * 51 * See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout 52 * and Bette L. Warren. 53 * 54 * Also see http://penguin.ewu.edu/~trolfe/DSWpaper/ for a very readable 55 * explanation of the of the tree_to_vine() (rightward rotation) and 56 * vine_to_tree() (leftward rotation) algorithms. 57 */ 58 59 #include "ir.h" 60 #include "ir_visitor.h" 61 #include "ir_rvalue_visitor.h" 62 #include "ir_optimization.h" 63 #include "main/macros.h" /* for MAX2 */ 64 65 /* The DSW algorithm generates a degenerate tree (really, a linked list) in 66 * tree_to_vine(). We'd rather not leave a binary expression with only one 67 * operand, so trivial modifications (the ternary operators below) are needed 68 * to ensure that we only rotate around the ir_expression nodes of the tree. 69 */ 70 static unsigned 71 tree_to_vine(ir_expression *root) 72 { 73 unsigned size = 0; 74 ir_rvalue *vine_tail = root; 75 ir_rvalue *remainder = root->operands[1]; 76 77 while (remainder != NULL) { 78 ir_expression *remainder_temp = remainder->as_expression(); 79 ir_expression *remainder_left = remainder_temp ? 80 remainder_temp->operands[0]->as_expression() : NULL; 81 82 if (remainder_left == NULL) { 83 /* move vine_tail down one */ 84 vine_tail = remainder; 85 remainder = remainder->as_expression() ? 86 ((ir_expression *)remainder)->operands[1] : NULL; 87 size++; 88 } else { 89 /* rotate */ 90 ir_expression *tempptr = remainder_left; 91 ((ir_expression *)remainder)->operands[0] = tempptr->operands[1]; 92 tempptr->operands[1] = remainder; 93 remainder = tempptr; 94 ((ir_expression *)vine_tail)->operands[1] = tempptr; 95 } 96 } 97 98 return size; 99 } 100 101 static void 102 compression(ir_expression *root, unsigned count) 103 { 104 ir_expression *scanner = root; 105 106 for (unsigned i = 0; i < count; i++) { 107 ir_expression *child = (ir_expression *)scanner->operands[1]; 108 scanner->operands[1] = child->operands[1]; 109 scanner = (ir_expression *)scanner->operands[1]; 110 child->operands[1] = scanner->operands[0]; 111 scanner->operands[0] = child; 112 } 113 } 114 115 static void 116 vine_to_tree(ir_expression *root, unsigned size) 117 { 118 int n = size - 1; 119 for (int m = n / 2; m > 0; m = n / 2) { 120 compression(root, m); 121 n -= m + 1; 122 } 123 } 124 125 namespace { 126 127 class ir_rebalance_visitor : public ir_rvalue_enter_visitor { 128 public: 129 ir_rebalance_visitor() 130 { 131 progress = false; 132 } 133 134 virtual ir_visitor_status visit_enter(ir_assignment *ir); 135 136 void handle_rvalue(ir_rvalue **rvalue); 137 138 bool progress; 139 }; 140 141 struct is_reduction_data { 142 ir_expression_operation operation; 143 const glsl_type *type; 144 unsigned num_expr; 145 bool is_reduction; 146 bool contains_constant; 147 }; 148 149 } /* anonymous namespace */ 150 151 ir_visitor_status 152 ir_rebalance_visitor::visit_enter(ir_assignment *ir) 153 { 154 ir_variable *var = ir->lhs->variable_referenced(); 155 if (var->data.invariant || var->data.precise) { 156 /* If we're assigning to an invariant variable, just bail. Tree 157 * rebalancing (reassociation) isn't precision-safe. 158 */ 159 return visit_continue_with_parent; 160 } else { 161 return visit_continue; 162 } 163 } 164 165 static bool 166 is_reduction_operation(ir_expression_operation operation) 167 { 168 switch (operation) { 169 case ir_binop_add: 170 case ir_binop_mul: 171 case ir_binop_bit_and: 172 case ir_binop_bit_xor: 173 case ir_binop_bit_or: 174 case ir_binop_logic_and: 175 case ir_binop_logic_xor: 176 case ir_binop_logic_or: 177 case ir_binop_min: 178 case ir_binop_max: 179 return true; 180 default: 181 return false; 182 } 183 } 184 185 /* Note that this function does not attempt to recognize that reduction trees 186 * are already balanced. 187 * 188 * We return false from this function for a number of reasons other than an 189 * expression tree not being a mathematical reduction. Namely, 190 * 191 * - if the tree contains multiple constants that we may be able to combine. 192 * - if the tree contains matrices: 193 * - they might contain vec4's with many constant components that we can 194 * simplify after splitting. 195 * - applying the matrix chain ordering optimization is more than just 196 * balancing an expression tree. 197 * - if the tree contains operations on multiple types. 198 * - if the tree contains ir_dereference_{array,record}, since foo[a+b] + c 199 * would trick the visiting pass. 200 */ 201 static void 202 is_reduction(ir_instruction *ir, void *data) 203 { 204 struct is_reduction_data *ird = (struct is_reduction_data *)data; 205 if (!ird->is_reduction) 206 return; 207 208 /* We don't want to balance a tree that contains multiple constants, since 209 * we'll be able to constant fold them if they're not in separate subtrees. 210 */ 211 if (ir->as_constant()) { 212 if (ird->contains_constant) { 213 ird->is_reduction = false; 214 } 215 ird->contains_constant = true; 216 return; 217 } 218 219 /* Array/record dereferences have subtrees that are not part of the expr 220 * tree we're balancing. Skip trees containing them. 221 */ 222 if (ir->ir_type == ir_type_dereference_array || 223 ir->ir_type == ir_type_dereference_record) { 224 ird->is_reduction = false; 225 return; 226 } 227 228 ir_expression *expr = ir->as_expression(); 229 if (!expr) 230 return; 231 232 /* Non-constant matrices might still contain constant vec4 that we can 233 * constant fold once split up. Handling matrices will need some more 234 * work. 235 */ 236 if (expr->type->is_matrix() || 237 expr->operands[0]->type->is_matrix() || 238 (expr->operands[1] && expr->operands[1]->type->is_matrix())) { 239 ird->is_reduction = false; 240 return; 241 } 242 243 if (ird->type != NULL && ird->type != expr->type) { 244 ird->is_reduction = false; 245 return; 246 } 247 ird->type = expr->type; 248 249 ird->num_expr++; 250 if (is_reduction_operation(expr->operation)) { 251 if (ird->operation != 0 && ird->operation != expr->operation) 252 ird->is_reduction = false; 253 ird->operation = expr->operation; 254 } else { 255 ird->is_reduction = false; 256 } 257 } 258 259 static ir_rvalue * 260 handle_expression(ir_expression *expr) 261 { 262 struct is_reduction_data ird; 263 ird.operation = (ir_expression_operation)0; 264 ird.type = NULL; 265 ird.num_expr = 0; 266 ird.is_reduction = true; 267 ird.contains_constant = false; 268 269 visit_tree(expr, is_reduction, (void *)&ird); 270 271 if (ird.is_reduction && ird.num_expr > 2) { 272 ir_constant z = ir_constant(0.0f); 273 ir_expression pseudo_root = ir_expression(ir_binop_add, &z, expr); 274 275 unsigned size = tree_to_vine(&pseudo_root); 276 vine_to_tree(&pseudo_root, size); 277 278 expr = (ir_expression *)pseudo_root.operands[1]; 279 } 280 return expr; 281 } 282 283 static void 284 update_types(ir_instruction *ir, void *) 285 { 286 ir_expression *expr = ir->as_expression(); 287 if (!expr) 288 return; 289 290 const glsl_type *const new_type = 291 glsl_type::get_instance(expr->type->base_type, 292 MAX2(expr->operands[0]->type->vector_elements, 293 expr->operands[1]->type->vector_elements), 294 1); 295 assert(new_type != glsl_type::error_type); 296 expr->type = new_type; 297 } 298 299 void 300 ir_rebalance_visitor::handle_rvalue(ir_rvalue **rvalue) 301 { 302 if (!*rvalue) 303 return; 304 305 ir_expression *expr = (*rvalue)->as_expression(); 306 if (!expr || !is_reduction_operation(expr->operation)) 307 return; 308 309 ir_rvalue *new_rvalue = handle_expression(expr); 310 311 /* If we failed to rebalance the tree (e.g., because it wasn't a reduction, 312 * or some other set of cases) new_rvalue will point to the same root as 313 * before. 314 * 315 * Similarly, if the tree rooted at *rvalue was a reduction and was already 316 * balanced, the algorithm will rearrange the tree but will ultimately 317 * return an identical tree, so this check will handle that as well and 318 * will not set progress = true. 319 */ 320 if (new_rvalue == *rvalue) 321 return; 322 323 visit_tree(new_rvalue, NULL, NULL, update_types); 324 325 *rvalue = new_rvalue; 326 this->progress = true; 327 } 328 329 bool 330 do_rebalance_tree(exec_list *instructions) 331 { 332 ir_rebalance_visitor v; 333 334 v.run(instructions); 335 336 return v.progress; 337 } 338