1 /* 2 * Copyright 2013 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include "brw_fs.h" 25 #include "brw_fs_live_variables.h" 26 #include "brw_cfg.h" 27 28 /** @file brw_fs_saturate_propagation.cpp 29 * 30 * Implements a pass that propagates the SAT modifier from a MOV.SAT into the 31 * instruction that produced the source of the MOV.SAT, thereby allowing the 32 * MOV's src and dst to be coalesced and the MOV removed. 33 * 34 * For instance, 35 * 36 * ADD tmp, src0, src1 37 * MOV.SAT dst, tmp 38 * 39 * would be transformed into 40 * 41 * ADD.SAT tmp, src0, src1 42 * MOV dst, tmp 43 */ 44 45 static bool 46 opt_saturate_propagation_local(fs_visitor *v, bblock_t *block) 47 { 48 bool progress = false; 49 int ip = block->end_ip + 1; 50 51 foreach_inst_in_block_reverse(fs_inst, inst, block) { 52 ip--; 53 54 if (inst->opcode != BRW_OPCODE_MOV || 55 !inst->saturate || 56 inst->dst.file != VGRF || 57 inst->dst.type != inst->src[0].type || 58 inst->src[0].file != VGRF || 59 inst->src[0].abs) 60 continue; 61 62 int src_var = v->live_intervals->var_from_reg(inst->src[0]); 63 int src_end_ip = v->live_intervals->end[src_var]; 64 65 bool interfered = false; 66 foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) { 67 if (regions_overlap(scan_inst->dst, scan_inst->size_written, 68 inst->src[0], inst->size_read(0))) { 69 if (scan_inst->is_partial_write() || 70 (scan_inst->dst.type != inst->dst.type && 71 !scan_inst->can_change_types())) 72 break; 73 74 if (scan_inst->saturate) { 75 inst->saturate = false; 76 progress = true; 77 } else if (src_end_ip == ip || inst->dst.equals(inst->src[0])) { 78 if (scan_inst->can_do_saturate()) { 79 if (scan_inst->dst.type != inst->dst.type) { 80 scan_inst->dst.type = inst->dst.type; 81 for (int i = 0; i < scan_inst->sources; i++) { 82 scan_inst->src[i].type = inst->dst.type; 83 } 84 } 85 86 if (inst->src[0].negate) { 87 if (scan_inst->opcode == BRW_OPCODE_MUL) { 88 scan_inst->src[0].negate = !scan_inst->src[0].negate; 89 inst->src[0].negate = false; 90 } else if (scan_inst->opcode == BRW_OPCODE_MAD) { 91 for (int i = 0; i < 2; i++) { 92 if (scan_inst->src[i].file == IMM) { 93 brw_negate_immediate(scan_inst->src[i].type, 94 &scan_inst->src[i].as_brw_reg()); 95 } else { 96 scan_inst->src[i].negate = !scan_inst->src[i].negate; 97 } 98 } 99 inst->src[0].negate = false; 100 } else if (scan_inst->opcode == BRW_OPCODE_ADD) { 101 if (scan_inst->src[1].file == IMM) { 102 if (!brw_negate_immediate(scan_inst->src[1].type, 103 &scan_inst->src[1].as_brw_reg())) { 104 break; 105 } 106 } else { 107 scan_inst->src[1].negate = !scan_inst->src[1].negate; 108 } 109 scan_inst->src[0].negate = !scan_inst->src[0].negate; 110 inst->src[0].negate = false; 111 } else { 112 break; 113 } 114 } 115 116 scan_inst->saturate = true; 117 inst->saturate = false; 118 progress = true; 119 } 120 } 121 break; 122 } 123 for (int i = 0; i < scan_inst->sources; i++) { 124 if (scan_inst->src[i].file == VGRF && 125 scan_inst->src[i].nr == inst->src[0].nr && 126 scan_inst->src[i].offset / REG_SIZE == 127 inst->src[0].offset / REG_SIZE) { 128 if (scan_inst->opcode != BRW_OPCODE_MOV || 129 !scan_inst->saturate || 130 scan_inst->src[0].abs || 131 scan_inst->src[0].negate || 132 scan_inst->src[0].abs != inst->src[0].abs || 133 scan_inst->src[0].negate != inst->src[0].negate) { 134 interfered = true; 135 break; 136 } 137 } 138 } 139 140 if (interfered) 141 break; 142 } 143 } 144 145 return progress; 146 } 147 148 bool 149 fs_visitor::opt_saturate_propagation() 150 { 151 bool progress = false; 152 153 calculate_live_intervals(); 154 155 foreach_block (block, cfg) { 156 progress = opt_saturate_propagation_local(this, block) || progress; 157 } 158 159 /* Live intervals are still valid. */ 160 161 return progress; 162 } 163