1 /* 2 * Copyright 2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include "brw_fs.h" 25 #include "brw_fs_cfg.h" 26 27 /** @file brw_fs_cse.cpp 28 * 29 * Support for local common subexpression elimination. 30 * 31 * See Muchnik's Advanced Compiler Design and Implementation, section 32 * 13.1 (p378). 33 */ 34 35 namespace { 36 struct aeb_entry : public exec_node { 37 /** The instruction that generates the expression value. */ 38 fs_inst *generator; 39 40 /** The temporary where the value is stored. */ 41 fs_reg tmp; 42 }; 43 } 44 45 static bool 46 is_expression(const fs_inst *const inst) 47 { 48 switch (inst->opcode) { 49 case BRW_OPCODE_SEL: 50 case BRW_OPCODE_NOT: 51 case BRW_OPCODE_AND: 52 case BRW_OPCODE_OR: 53 case BRW_OPCODE_XOR: 54 case BRW_OPCODE_SHR: 55 case BRW_OPCODE_SHL: 56 case BRW_OPCODE_RSR: 57 case BRW_OPCODE_RSL: 58 case BRW_OPCODE_ASR: 59 case BRW_OPCODE_ADD: 60 case BRW_OPCODE_MUL: 61 case BRW_OPCODE_FRC: 62 case BRW_OPCODE_RNDU: 63 case BRW_OPCODE_RNDD: 64 case BRW_OPCODE_RNDE: 65 case BRW_OPCODE_RNDZ: 66 case BRW_OPCODE_LINE: 67 case BRW_OPCODE_PLN: 68 case BRW_OPCODE_MAD: 69 case FS_OPCODE_CINTERP: 70 case FS_OPCODE_LINTERP: 71 return true; 72 default: 73 return false; 74 } 75 } 76 77 static bool 78 operands_match(fs_reg *xs, fs_reg *ys) 79 { 80 return xs[0].equals(ys[0]) && xs[1].equals(ys[1]) && xs[2].equals(ys[2]); 81 } 82 83 bool 84 fs_visitor::opt_cse_local(fs_bblock *block, exec_list *aeb) 85 { 86 bool progress = false; 87 88 void *mem_ctx = ralloc_context(this->mem_ctx); 89 90 for (fs_inst *inst = block->start; 91 inst != block->end->next; 92 inst = (fs_inst *) inst->next) { 93 94 /* Skip some cases. */ 95 if (is_expression(inst) && !inst->predicated && inst->mlen == 0 && 96 !inst->force_uncompressed && !inst->force_sechalf && 97 !inst->conditional_mod) 98 { 99 bool found = false; 100 101 aeb_entry *entry; 102 foreach_list(entry_node, aeb) { 103 entry = (aeb_entry *) entry_node; 104 105 /* Match current instruction's expression against those in AEB. */ 106 if (inst->opcode == entry->generator->opcode && 107 inst->saturate == entry->generator->saturate && 108 operands_match(entry->generator->src, inst->src)) { 109 110 found = true; 111 progress = true; 112 break; 113 } 114 } 115 116 if (!found) { 117 /* Our first sighting of this expression. Create an entry. */ 118 aeb_entry *entry = ralloc(mem_ctx, aeb_entry); 119 entry->tmp = reg_undef; 120 entry->generator = inst; 121 aeb->push_tail(entry); 122 } else { 123 /* This is at least our second sighting of this expression. 124 * If we don't have a temporary already, make one. 125 */ 126 bool no_existing_temp = entry->tmp.file == BAD_FILE; 127 if (no_existing_temp) { 128 entry->tmp = fs_reg(this, glsl_type::float_type); 129 entry->tmp.type = inst->dst.type; 130 131 fs_inst *copy = new(ralloc_parent(inst)) 132 fs_inst(BRW_OPCODE_MOV, entry->generator->dst, entry->tmp); 133 entry->generator->insert_after(copy); 134 entry->generator->dst = entry->tmp; 135 } 136 137 /* dest <- temp */ 138 fs_inst *copy = new(ralloc_parent(inst)) 139 fs_inst(BRW_OPCODE_MOV, inst->dst, entry->tmp); 140 inst->replace_with(copy); 141 142 /* Appending an instruction may have changed our bblock end. */ 143 if (inst == block->end) { 144 block->end = copy; 145 } 146 147 /* Continue iteration with copy->next */ 148 inst = copy; 149 } 150 } 151 152 /* Kill all AEB entries that use the destination. */ 153 foreach_list_safe(entry_node, aeb) { 154 aeb_entry *entry = (aeb_entry *)entry_node; 155 156 for (int i = 0; i < 3; i++) { 157 if (inst->overwrites_reg(entry->generator->src[i])) { 158 entry->remove(); 159 ralloc_free(entry); 160 break; 161 } 162 } 163 } 164 } 165 166 ralloc_free(mem_ctx); 167 168 if (progress) 169 this->live_intervals_valid = false; 170 171 return progress; 172 } 173 174 bool 175 fs_visitor::opt_cse() 176 { 177 bool progress = false; 178 179 fs_cfg cfg(this); 180 181 for (int b = 0; b < cfg.num_blocks; b++) { 182 fs_bblock *block = cfg.blocks[b]; 183 exec_list aeb; 184 185 progress = opt_cse_local(block, &aeb) || progress; 186 } 187 188 return progress; 189 } 190