1 /* 2 * Copyright 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /** 25 * \file opt_tree_grafting.cpp 26 * 27 * Takes assignments to variables that are dereferenced only once and 28 * pastes the RHS expression into where the variable is dereferenced. 29 * 30 * In the process of various operations like function inlining and 31 * tertiary op handling, we'll end up with our expression trees having 32 * been chopped up into a series of assignments of short expressions 33 * to temps. Other passes like ir_algebraic.cpp would prefer to see 34 * the deepest expression trees they can to try to optimize them. 35 * 36 * This is a lot like copy propagaton. In comparison, copy 37 * propagation only acts on plain copies, not arbitrary expressions on 38 * the RHS. Generally, we wouldn't want to go pasting some 39 * complicated expression everywhere it got used, though, so we don't 40 * handle expressions in that pass. 41 * 42 * The hard part is making sure we don't move an expression across 43 * some other assignments that would change the value of the 44 * expression. So we split this into two passes: First, find the 45 * variables in our scope which are written to once and read once, and 46 * then go through basic blocks seeing if we find an opportunity to 47 * move those expressions safely. 48 */ 49 50 #include "ir.h" 51 #include "ir_visitor.h" 52 #include "ir_variable_refcount.h" 53 #include "ir_basic_block.h" 54 #include "ir_optimization.h" 55 #include "compiler/glsl_types.h" 56 57 namespace { 58 59 static bool debug = false; 60 61 class ir_tree_grafting_visitor : public ir_hierarchical_visitor { 62 public: 63 ir_tree_grafting_visitor(ir_assignment *graft_assign, 64 ir_variable *graft_var) 65 { 66 this->progress = false; 67 this->graft_assign = graft_assign; 68 this->graft_var = graft_var; 69 } 70 71 virtual ir_visitor_status visit_leave(class ir_assignment *); 72 virtual ir_visitor_status visit_enter(class ir_call *); 73 virtual ir_visitor_status visit_enter(class ir_expression *); 74 virtual ir_visitor_status visit_enter(class ir_function *); 75 virtual ir_visitor_status visit_enter(class ir_function_signature *); 76 virtual ir_visitor_status visit_enter(class ir_if *); 77 virtual ir_visitor_status visit_enter(class ir_loop *); 78 virtual ir_visitor_status visit_enter(class ir_swizzle *); 79 virtual ir_visitor_status visit_enter(class ir_texture *); 80 81 ir_visitor_status check_graft(ir_instruction *ir, ir_variable *var); 82 83 bool do_graft(ir_rvalue **rvalue); 84 85 bool progress; 86 ir_variable *graft_var; 87 ir_assignment *graft_assign; 88 }; 89 90 struct find_deref_info { 91 ir_variable *var; 92 bool found; 93 }; 94 95 void 96 dereferences_variable_callback(ir_instruction *ir, void *data) 97 { 98 struct find_deref_info *info = (struct find_deref_info *)data; 99 ir_dereference_variable *deref = ir->as_dereference_variable(); 100 101 if (deref && deref->var == info->var) 102 info->found = true; 103 } 104 105 static bool 106 dereferences_variable(ir_instruction *ir, ir_variable *var) 107 { 108 struct find_deref_info info; 109 110 info.var = var; 111 info.found = false; 112 113 visit_tree(ir, dereferences_variable_callback, &info); 114 115 return info.found; 116 } 117 118 bool 119 ir_tree_grafting_visitor::do_graft(ir_rvalue **rvalue) 120 { 121 if (!*rvalue) 122 return false; 123 124 ir_dereference_variable *deref = (*rvalue)->as_dereference_variable(); 125 126 if (!deref || deref->var != this->graft_var) 127 return false; 128 129 if (debug) { 130 fprintf(stderr, "GRAFTING:\n"); 131 this->graft_assign->fprint(stderr); 132 fprintf(stderr, "\n"); 133 fprintf(stderr, "TO:\n"); 134 (*rvalue)->fprint(stderr); 135 fprintf(stderr, "\n"); 136 } 137 138 this->graft_assign->remove(); 139 *rvalue = this->graft_assign->rhs; 140 141 this->progress = true; 142 return true; 143 } 144 145 ir_visitor_status 146 ir_tree_grafting_visitor::visit_enter(ir_loop *ir) 147 { 148 (void)ir; 149 /* Do not traverse into the body of the loop since that is a 150 * different basic block. 151 */ 152 return visit_stop; 153 } 154 155 /** 156 * Check if we can continue grafting after writing to a variable. If the 157 * expression we're trying to graft references the variable, we must stop. 158 * 159 * \param ir An instruction that writes to a variable. 160 * \param var The variable being updated. 161 */ 162 ir_visitor_status 163 ir_tree_grafting_visitor::check_graft(ir_instruction *ir, ir_variable *var) 164 { 165 if (dereferences_variable(this->graft_assign->rhs, var)) { 166 if (debug) { 167 fprintf(stderr, "graft killed by: "); 168 ir->fprint(stderr); 169 fprintf(stderr, "\n"); 170 } 171 return visit_stop; 172 } 173 174 return visit_continue; 175 } 176 177 ir_visitor_status 178 ir_tree_grafting_visitor::visit_leave(ir_assignment *ir) 179 { 180 if (do_graft(&ir->rhs) || 181 do_graft(&ir->condition)) 182 return visit_stop; 183 184 /* If this assignment updates a variable used in the assignment 185 * we're trying to graft, then we're done. 186 */ 187 return check_graft(ir, ir->lhs->variable_referenced()); 188 } 189 190 ir_visitor_status 191 ir_tree_grafting_visitor::visit_enter(ir_function *ir) 192 { 193 (void) ir; 194 return visit_continue_with_parent; 195 } 196 197 ir_visitor_status 198 ir_tree_grafting_visitor::visit_enter(ir_function_signature *ir) 199 { 200 (void)ir; 201 return visit_continue_with_parent; 202 } 203 204 ir_visitor_status 205 ir_tree_grafting_visitor::visit_enter(ir_call *ir) 206 { 207 foreach_two_lists(formal_node, &ir->callee->parameters, 208 actual_node, &ir->actual_parameters) { 209 ir_variable *sig_param = (ir_variable *) formal_node; 210 ir_rvalue *ir = (ir_rvalue *) actual_node; 211 ir_rvalue *new_ir = ir; 212 213 if (sig_param->data.mode != ir_var_function_in 214 && sig_param->data.mode != ir_var_const_in) { 215 if (check_graft(ir, sig_param) == visit_stop) 216 return visit_stop; 217 continue; 218 } 219 220 if (do_graft(&new_ir)) { 221 ir->replace_with(new_ir); 222 return visit_stop; 223 } 224 } 225 226 if (ir->return_deref && check_graft(ir, ir->return_deref->var) == visit_stop) 227 return visit_stop; 228 229 return visit_continue; 230 } 231 232 ir_visitor_status 233 ir_tree_grafting_visitor::visit_enter(ir_expression *ir) 234 { 235 for (unsigned int i = 0; i < ir->get_num_operands(); i++) { 236 if (do_graft(&ir->operands[i])) 237 return visit_stop; 238 } 239 240 return visit_continue; 241 } 242 243 ir_visitor_status 244 ir_tree_grafting_visitor::visit_enter(ir_if *ir) 245 { 246 if (do_graft(&ir->condition)) 247 return visit_stop; 248 249 /* Do not traverse into the body of the if-statement since that is a 250 * different basic block. 251 */ 252 return visit_continue_with_parent; 253 } 254 255 ir_visitor_status 256 ir_tree_grafting_visitor::visit_enter(ir_swizzle *ir) 257 { 258 if (do_graft(&ir->val)) 259 return visit_stop; 260 261 return visit_continue; 262 } 263 264 ir_visitor_status 265 ir_tree_grafting_visitor::visit_enter(ir_texture *ir) 266 { 267 if (do_graft(&ir->coordinate) || 268 do_graft(&ir->projector) || 269 do_graft(&ir->offset) || 270 do_graft(&ir->shadow_comparator)) 271 return visit_stop; 272 273 switch (ir->op) { 274 case ir_tex: 275 case ir_lod: 276 case ir_query_levels: 277 case ir_texture_samples: 278 case ir_samples_identical: 279 break; 280 case ir_txb: 281 if (do_graft(&ir->lod_info.bias)) 282 return visit_stop; 283 break; 284 case ir_txf: 285 case ir_txl: 286 case ir_txs: 287 if (do_graft(&ir->lod_info.lod)) 288 return visit_stop; 289 break; 290 case ir_txf_ms: 291 if (do_graft(&ir->lod_info.sample_index)) 292 return visit_stop; 293 break; 294 case ir_txd: 295 if (do_graft(&ir->lod_info.grad.dPdx) || 296 do_graft(&ir->lod_info.grad.dPdy)) 297 return visit_stop; 298 break; 299 case ir_tg4: 300 if (do_graft(&ir->lod_info.component)) 301 return visit_stop; 302 break; 303 } 304 305 return visit_continue; 306 } 307 308 struct tree_grafting_info { 309 ir_variable_refcount_visitor *refs; 310 bool progress; 311 }; 312 313 static bool 314 try_tree_grafting(ir_assignment *start, 315 ir_variable *lhs_var, 316 ir_instruction *bb_last) 317 { 318 ir_tree_grafting_visitor v(start, lhs_var); 319 320 if (debug) { 321 fprintf(stderr, "trying to graft: "); 322 lhs_var->fprint(stderr); 323 fprintf(stderr, "\n"); 324 } 325 326 for (ir_instruction *ir = (ir_instruction *)start->next; 327 ir != bb_last->next; 328 ir = (ir_instruction *)ir->next) { 329 330 if (debug) { 331 fprintf(stderr, "- "); 332 ir->fprint(stderr); 333 fprintf(stderr, "\n"); 334 } 335 336 ir_visitor_status s = ir->accept(&v); 337 if (s == visit_stop) 338 return v.progress; 339 } 340 341 return false; 342 } 343 344 static void 345 tree_grafting_basic_block(ir_instruction *bb_first, 346 ir_instruction *bb_last, 347 void *data) 348 { 349 struct tree_grafting_info *info = (struct tree_grafting_info *)data; 350 ir_instruction *ir, *next; 351 352 for (ir = bb_first, next = (ir_instruction *)ir->next; 353 ir != bb_last->next; 354 ir = next, next = (ir_instruction *)ir->next) { 355 ir_assignment *assign = ir->as_assignment(); 356 357 if (!assign) 358 continue; 359 360 ir_variable *lhs_var = assign->whole_variable_written(); 361 if (!lhs_var) 362 continue; 363 364 if (lhs_var->data.mode == ir_var_function_out || 365 lhs_var->data.mode == ir_var_function_inout || 366 lhs_var->data.mode == ir_var_shader_out || 367 lhs_var->data.mode == ir_var_shader_storage || 368 lhs_var->data.mode == ir_var_shader_shared) 369 continue; 370 371 if (lhs_var->data.precise) 372 continue; 373 374 ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var); 375 376 if (!entry->declaration || 377 entry->assigned_count != 1 || 378 entry->referenced_count != 2) 379 continue; 380 381 /* Found a possibly graftable assignment. Now, walk through the 382 * rest of the BB seeing if the deref is here, and if nothing interfered with 383 * pasting its expression's values in between. 384 */ 385 info->progress |= try_tree_grafting(assign, lhs_var, bb_last); 386 } 387 } 388 389 } /* unnamed namespace */ 390 391 /** 392 * Does a copy propagation pass on the code present in the instruction stream. 393 */ 394 bool 395 do_tree_grafting(exec_list *instructions) 396 { 397 ir_variable_refcount_visitor refs; 398 struct tree_grafting_info info; 399 400 info.progress = false; 401 info.refs = &refs; 402 403 visit_list_elements(info.refs, instructions); 404 405 call_for_basic_blocks(instructions, tree_grafting_basic_block, &info); 406 407 return info.progress; 408 } 409