1 /* 2 * Copyright 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /** 25 * \file opt_copy_propagation_elements.cpp 26 * 27 * Replaces usage of recently-copied components of variables with the 28 * previous copy of the variable. 29 * 30 * This pass can be compared with opt_copy_propagation, which operands 31 * on arbitrary whole-variable copies. However, in order to handle 32 * the copy propagation of swizzled variables or writemasked writes, 33 * we want to track things on a channel-wise basis. I found that 34 * trying to mix the swizzled/writemasked support here with the 35 * whole-variable stuff in opt_copy_propagation.cpp just made a mess, 36 * so this is separate despite the ACP handling being somewhat 37 * similar. 38 * 39 * This should reduce the number of MOV instructions in the generated 40 * programs unless copy propagation is also done on the LIR, and may 41 * help anyway by triggering other optimizations that live in the HIR. 42 */ 43 44 #include "ir.h" 45 #include "ir_rvalue_visitor.h" 46 #include "ir_basic_block.h" 47 #include "ir_optimization.h" 48 #include "glsl_types.h" 49 50 static bool debug = false; 51 52 namespace { 53 54 class acp_entry : public exec_node 55 { 56 public: 57 acp_entry(ir_variable *lhs, ir_variable *rhs, int write_mask, int swizzle[4]) 58 { 59 this->lhs = lhs; 60 this->rhs = rhs; 61 this->write_mask = write_mask; 62 memcpy(this->swizzle, swizzle, sizeof(this->swizzle)); 63 } 64 65 acp_entry(acp_entry *a) 66 { 67 this->lhs = a->lhs; 68 this->rhs = a->rhs; 69 this->write_mask = a->write_mask; 70 memcpy(this->swizzle, a->swizzle, sizeof(this->swizzle)); 71 } 72 73 ir_variable *lhs; 74 ir_variable *rhs; 75 unsigned int write_mask; 76 int swizzle[4]; 77 }; 78 79 80 class kill_entry : public exec_node 81 { 82 public: 83 kill_entry(ir_variable *var, int write_mask) 84 { 85 this->var = var; 86 this->write_mask = write_mask; 87 } 88 89 ir_variable *var; 90 unsigned int write_mask; 91 }; 92 93 class ir_copy_propagation_elements_visitor : public ir_rvalue_visitor { 94 public: 95 ir_copy_propagation_elements_visitor() 96 { 97 this->progress = false; 98 this->killed_all = false; 99 this->mem_ctx = ralloc_context(NULL); 100 this->shader_mem_ctx = NULL; 101 this->acp = new(mem_ctx) exec_list; 102 this->kills = new(mem_ctx) exec_list; 103 } 104 ~ir_copy_propagation_elements_visitor() 105 { 106 ralloc_free(mem_ctx); 107 } 108 109 virtual ir_visitor_status visit_enter(class ir_loop *); 110 virtual ir_visitor_status visit_enter(class ir_function_signature *); 111 virtual ir_visitor_status visit_leave(class ir_assignment *); 112 virtual ir_visitor_status visit_enter(class ir_call *); 113 virtual ir_visitor_status visit_enter(class ir_if *); 114 virtual ir_visitor_status visit_leave(class ir_swizzle *); 115 116 void handle_rvalue(ir_rvalue **rvalue); 117 118 void add_copy(ir_assignment *ir); 119 void kill(kill_entry *k); 120 void handle_if_block(exec_list *instructions); 121 122 /** List of acp_entry: The available copies to propagate */ 123 exec_list *acp; 124 /** 125 * List of kill_entry: The variables whose values were killed in this 126 * block. 127 */ 128 exec_list *kills; 129 130 bool progress; 131 132 bool killed_all; 133 134 /* Context for our local data structures. */ 135 void *mem_ctx; 136 /* Context for allocating new shader nodes. */ 137 void *shader_mem_ctx; 138 }; 139 140 } /* unnamed namespace */ 141 142 ir_visitor_status 143 ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir) 144 { 145 /* Treat entry into a function signature as a completely separate 146 * block. Any instructions at global scope will be shuffled into 147 * main() at link time, so they're irrelevant to us. 148 */ 149 exec_list *orig_acp = this->acp; 150 exec_list *orig_kills = this->kills; 151 bool orig_killed_all = this->killed_all; 152 153 this->acp = new(mem_ctx) exec_list; 154 this->kills = new(mem_ctx) exec_list; 155 this->killed_all = false; 156 157 visit_list_elements(this, &ir->body); 158 159 this->kills = orig_kills; 160 this->acp = orig_acp; 161 this->killed_all = orig_killed_all; 162 163 return visit_continue_with_parent; 164 } 165 166 ir_visitor_status 167 ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir) 168 { 169 ir_dereference_variable *lhs = ir->lhs->as_dereference_variable(); 170 ir_variable *var = ir->lhs->variable_referenced(); 171 172 if (var->type->is_scalar() || var->type->is_vector()) { 173 kill_entry *k; 174 175 if (lhs) 176 k = new(mem_ctx) kill_entry(var, ir->write_mask); 177 else 178 k = new(mem_ctx) kill_entry(var, ~0); 179 180 kill(k); 181 } 182 183 add_copy(ir); 184 185 return visit_continue; 186 } 187 188 ir_visitor_status 189 ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle *ir) 190 { 191 /* Don't visit the values of swizzles since they are handled while 192 * visiting the swizzle itself. 193 */ 194 return visit_continue; 195 } 196 197 /** 198 * Replaces dereferences of ACP RHS variables with ACP LHS variables. 199 * 200 * This is where the actual copy propagation occurs. Note that the 201 * rewriting of ir_dereference means that the ir_dereference instance 202 * must not be shared by multiple IR operations! 203 */ 204 void 205 ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue **ir) 206 { 207 int swizzle_chan[4]; 208 ir_dereference_variable *deref_var; 209 ir_variable *source[4] = {NULL, NULL, NULL, NULL}; 210 int source_chan[4]; 211 int chans; 212 213 if (!*ir) 214 return; 215 216 ir_swizzle *swizzle = (*ir)->as_swizzle(); 217 if (swizzle) { 218 deref_var = swizzle->val->as_dereference_variable(); 219 if (!deref_var) 220 return; 221 222 swizzle_chan[0] = swizzle->mask.x; 223 swizzle_chan[1] = swizzle->mask.y; 224 swizzle_chan[2] = swizzle->mask.z; 225 swizzle_chan[3] = swizzle->mask.w; 226 chans = swizzle->type->vector_elements; 227 } else { 228 deref_var = (*ir)->as_dereference_variable(); 229 if (!deref_var) 230 return; 231 232 swizzle_chan[0] = 0; 233 swizzle_chan[1] = 1; 234 swizzle_chan[2] = 2; 235 swizzle_chan[3] = 3; 236 chans = deref_var->type->vector_elements; 237 } 238 239 if (this->in_assignee) 240 return; 241 242 ir_variable *var = deref_var->var; 243 244 /* Try to find ACP entries covering swizzle_chan[], hoping they're 245 * the same source variable. 246 */ 247 foreach_iter(exec_list_iterator, iter, *this->acp) { 248 acp_entry *entry = (acp_entry *)iter.get(); 249 250 if (var == entry->lhs) { 251 for (int c = 0; c < chans; c++) { 252 if (entry->write_mask & (1 << swizzle_chan[c])) { 253 source[c] = entry->rhs; 254 source_chan[c] = entry->swizzle[swizzle_chan[c]]; 255 } 256 } 257 } 258 } 259 260 /* Make sure all channels are copying from the same source variable. */ 261 if (!source[0]) 262 return; 263 for (int c = 1; c < chans; c++) { 264 if (source[c] != source[0]) 265 return; 266 } 267 268 if (!shader_mem_ctx) 269 shader_mem_ctx = ralloc_parent(deref_var); 270 271 if (debug) { 272 printf("Copy propagation from:\n"); 273 (*ir)->print(); 274 } 275 276 deref_var = new(shader_mem_ctx) ir_dereference_variable(source[0]); 277 *ir = new(shader_mem_ctx) ir_swizzle(deref_var, 278 source_chan[0], 279 source_chan[1], 280 source_chan[2], 281 source_chan[3], 282 chans); 283 284 if (debug) { 285 printf("to:\n"); 286 (*ir)->print(); 287 printf("\n"); 288 } 289 } 290 291 292 ir_visitor_status 293 ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir) 294 { 295 /* Do copy propagation on call parameters, but skip any out params */ 296 exec_list_iterator sig_param_iter = ir->callee->parameters.iterator(); 297 foreach_iter(exec_list_iterator, iter, ir->actual_parameters) { 298 ir_variable *sig_param = (ir_variable *)sig_param_iter.get(); 299 ir_instruction *ir = (ir_instruction *)iter.get(); 300 if (sig_param->mode != ir_var_out && sig_param->mode != ir_var_inout) { 301 ir->accept(this); 302 } 303 sig_param_iter.next(); 304 } 305 306 /* Since we're unlinked, we don't (necessarily) know the side effects of 307 * this call. So kill all copies. 308 */ 309 acp->make_empty(); 310 this->killed_all = true; 311 312 return visit_continue_with_parent; 313 } 314 315 void 316 ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions) 317 { 318 exec_list *orig_acp = this->acp; 319 exec_list *orig_kills = this->kills; 320 bool orig_killed_all = this->killed_all; 321 322 this->acp = new(mem_ctx) exec_list; 323 this->kills = new(mem_ctx) exec_list; 324 this->killed_all = false; 325 326 /* Populate the initial acp with a copy of the original */ 327 foreach_iter(exec_list_iterator, iter, *orig_acp) { 328 acp_entry *a = (acp_entry *)iter.get(); 329 this->acp->push_tail(new(this->mem_ctx) acp_entry(a)); 330 } 331 332 visit_list_elements(this, instructions); 333 334 if (this->killed_all) { 335 orig_acp->make_empty(); 336 } 337 338 exec_list *new_kills = this->kills; 339 this->kills = orig_kills; 340 this->acp = orig_acp; 341 this->killed_all = this->killed_all || orig_killed_all; 342 343 /* Move the new kills into the parent block's list, removing them 344 * from the parent's ACP list in the process. 345 */ 346 foreach_list_safe(node, new_kills) { 347 kill_entry *k = (kill_entry *)node; 348 kill(k); 349 } 350 } 351 352 ir_visitor_status 353 ir_copy_propagation_elements_visitor::visit_enter(ir_if *ir) 354 { 355 ir->condition->accept(this); 356 357 handle_if_block(&ir->then_instructions); 358 handle_if_block(&ir->else_instructions); 359 360 /* handle_if_block() already descended into the children. */ 361 return visit_continue_with_parent; 362 } 363 364 ir_visitor_status 365 ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir) 366 { 367 exec_list *orig_acp = this->acp; 368 exec_list *orig_kills = this->kills; 369 bool orig_killed_all = this->killed_all; 370 371 /* FINISHME: For now, the initial acp for loops is totally empty. 372 * We could go through once, then go through again with the acp 373 * cloned minus the killed entries after the first run through. 374 */ 375 this->acp = new(mem_ctx) exec_list; 376 this->kills = new(mem_ctx) exec_list; 377 this->killed_all = false; 378 379 visit_list_elements(this, &ir->body_instructions); 380 381 if (this->killed_all) { 382 orig_acp->make_empty(); 383 } 384 385 exec_list *new_kills = this->kills; 386 this->kills = orig_kills; 387 this->acp = orig_acp; 388 this->killed_all = this->killed_all || orig_killed_all; 389 390 foreach_list_safe(node, new_kills) { 391 kill_entry *k = (kill_entry *)node; 392 kill(k); 393 } 394 395 /* already descended into the children. */ 396 return visit_continue_with_parent; 397 } 398 399 /* Remove any entries currently in the ACP for this kill. */ 400 void 401 ir_copy_propagation_elements_visitor::kill(kill_entry *k) 402 { 403 foreach_list_safe(node, acp) { 404 acp_entry *entry = (acp_entry *)node; 405 406 if (entry->lhs == k->var) { 407 entry->write_mask = entry->write_mask & ~k->write_mask; 408 if (entry->write_mask == 0) { 409 entry->remove(); 410 continue; 411 } 412 } 413 if (entry->rhs == k->var) { 414 entry->remove(); 415 } 416 } 417 418 /* If we were on a list, remove ourselves before inserting */ 419 if (k->next) 420 k->remove(); 421 422 this->kills->push_tail(k); 423 } 424 425 /** 426 * Adds directly-copied channels between vector variables to the available 427 * copy propagation list. 428 */ 429 void 430 ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir) 431 { 432 acp_entry *entry; 433 int orig_swizzle[4] = {0, 1, 2, 3}; 434 int swizzle[4]; 435 436 if (ir->condition) 437 return; 438 439 ir_dereference_variable *lhs = ir->lhs->as_dereference_variable(); 440 if (!lhs || !(lhs->type->is_scalar() || lhs->type->is_vector())) 441 return; 442 443 ir_dereference_variable *rhs = ir->rhs->as_dereference_variable(); 444 if (!rhs) { 445 ir_swizzle *swiz = ir->rhs->as_swizzle(); 446 if (!swiz) 447 return; 448 449 rhs = swiz->val->as_dereference_variable(); 450 if (!rhs) 451 return; 452 453 orig_swizzle[0] = swiz->mask.x; 454 orig_swizzle[1] = swiz->mask.y; 455 orig_swizzle[2] = swiz->mask.z; 456 orig_swizzle[3] = swiz->mask.w; 457 } 458 459 /* Move the swizzle channels out to the positions they match in the 460 * destination. We don't want to have to rewrite the swizzle[] 461 * array every time we clear a bit of the write_mask. 462 */ 463 int j = 0; 464 for (int i = 0; i < 4; i++) { 465 if (ir->write_mask & (1 << i)) 466 swizzle[i] = orig_swizzle[j++]; 467 } 468 469 int write_mask = ir->write_mask; 470 if (lhs->var == rhs->var) { 471 /* If this is a copy from the variable to itself, then we need 472 * to be sure not to include the updated channels from this 473 * instruction in the set of new source channels to be 474 * copy-propagated from. 475 */ 476 for (int i = 0; i < 4; i++) { 477 if (ir->write_mask & (1 << orig_swizzle[i])) 478 write_mask &= ~(1 << i); 479 } 480 } 481 482 entry = new(this->mem_ctx) acp_entry(lhs->var, rhs->var, write_mask, 483 swizzle); 484 this->acp->push_tail(entry); 485 } 486 487 bool 488 do_copy_propagation_elements(exec_list *instructions) 489 { 490 ir_copy_propagation_elements_visitor v; 491 492 visit_list_elements(&v, instructions); 493 494 return v.progress; 495 } 496