1 /* 2 * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27 #define PPH_DEBUG 0 28 29 #if PPH_DEBUG 30 #define PPH_DUMP(q) do { q } while (0) 31 #else 32 #define PPH_DUMP(q) 33 #endif 34 35 #include "sb_shader.h" 36 #include "sb_pass.h" 37 38 namespace r600_sb { 39 40 int peephole::run() { 41 42 run_on(sh.root); 43 44 return 0; 45 } 46 47 void peephole::run_on(container_node* c) { 48 49 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { 50 node *n = *I; 51 52 if (n->is_container()) 53 run_on(static_cast<container_node*>(n)); 54 else { 55 56 if (n->is_alu_inst()) { 57 alu_node *a = static_cast<alu_node*>(n); 58 59 if (a->bc.op_ptr->flags & 60 (AF_PRED | AF_SET | AF_CMOV | AF_KILL)) { 61 optimize_cc_op(a); 62 } else if (a->bc.op == ALU_OP1_FLT_TO_INT) { 63 64 alu_node *s = a; 65 if (get_bool_flt_to_int_source(s)) { 66 convert_float_setcc(a, s); 67 } 68 } 69 } 70 } 71 } 72 } 73 74 void peephole::optimize_cc_op(alu_node* a) { 75 unsigned aflags = a->bc.op_ptr->flags; 76 77 if (aflags & (AF_PRED | AF_SET | AF_KILL)) { 78 optimize_cc_op2(a); 79 } else if (aflags & AF_CMOV) { 80 optimize_CNDcc_op(a); 81 } 82 } 83 84 void peephole::convert_float_setcc(alu_node *f2i, alu_node *s) { 85 alu_node *ns = sh.clone(s); 86 87 ns->dst[0] = f2i->dst[0]; 88 ns->dst[0]->def = ns; 89 ns->bc.set_op(ns->bc.op + (ALU_OP2_SETE_DX10 - ALU_OP2_SETE)); 90 f2i->insert_after(ns); 91 f2i->remove(); 92 } 93 94 void peephole::optimize_cc_op2(alu_node* a) { 95 96 unsigned flags = a->bc.op_ptr->flags; 97 unsigned cc = flags & AF_CC_MASK; 98 99 if ((cc != AF_CC_E && cc != AF_CC_NE) || a->pred) 100 return; 101 102 unsigned cmp_type = flags & AF_CMP_TYPE_MASK; 103 unsigned dst_type = flags & AF_DST_TYPE_MASK; 104 105 int op_kind = (flags & AF_PRED) ? 1 : 106 (flags & AF_SET) ? 2 : 107 (flags & AF_KILL) ? 3 : 0; 108 109 bool swapped = false; 110 111 if (a->src[0]->is_const() && a->src[0]->literal_value == literal(0)) { 112 std::swap(a->src[0],a->src[1]); 113 swapped = true; 114 // clear modifiers 115 memset(&a->bc.src[0], 0, sizeof(bc_alu_src)); 116 memset(&a->bc.src[1], 0, sizeof(bc_alu_src)); 117 } 118 119 if (swapped || (a->src[1]->is_const() && 120 a->src[1]->literal_value == literal(0))) { 121 122 value *s = a->src[0]; 123 124 bool_op_info bop = {}; 125 126 PPH_DUMP( 127 sblog << "cc_op2: "; 128 dump::dump_op(a); 129 sblog << "\n"; 130 ); 131 132 if (!get_bool_op_info(s, bop)) 133 return; 134 135 if (cc == AF_CC_E) 136 bop.invert = !bop.invert; 137 138 bool swap_args = false; 139 140 cc = bop.n->bc.op_ptr->flags & AF_CC_MASK; 141 142 if (bop.invert) 143 cc = invert_setcc_condition(cc, swap_args); 144 145 if (bop.int_cvt) { 146 assert(cmp_type != AF_FLOAT_CMP); 147 cmp_type = AF_FLOAT_CMP; 148 } 149 150 PPH_DUMP( 151 sblog << "boi node: "; 152 dump::dump_op(bop.n); 153 sblog << " invert: " << bop.invert << " int_cvt: " << bop.int_cvt; 154 sblog <<"\n"; 155 ); 156 157 unsigned newop; 158 159 switch(op_kind) { 160 case 1: 161 newop = get_predsetcc_op(cc, cmp_type); 162 break; 163 case 2: 164 newop = get_setcc_op(cc, cmp_type, dst_type != AF_FLOAT_DST); 165 break; 166 case 3: 167 newop = get_killcc_op(cc, cmp_type); 168 break; 169 default: 170 newop = ALU_OP0_NOP; 171 assert(!"invalid op kind"); 172 break; 173 } 174 175 a->bc.set_op(newop); 176 177 if (swap_args) { 178 a->src[0] = bop.n->src[1]; 179 a->src[1] = bop.n->src[0]; 180 a->bc.src[0] = bop.n->bc.src[1]; 181 a->bc.src[1] = bop.n->bc.src[0]; 182 183 } else { 184 a->src[0] = bop.n->src[0]; 185 a->src[1] = bop.n->src[1]; 186 a->bc.src[0] = bop.n->bc.src[0]; 187 a->bc.src[1] = bop.n->bc.src[1]; 188 } 189 } 190 } 191 192 void peephole::optimize_CNDcc_op(alu_node* a) { 193 unsigned flags = a->bc.op_ptr->flags; 194 unsigned cc = flags & AF_CC_MASK; 195 unsigned cmp_type = flags & AF_CMP_TYPE_MASK; 196 bool swap = false; 197 198 if (cc == AF_CC_E) { 199 swap = !swap; 200 cc = AF_CC_NE; 201 } else if (cc != AF_CC_NE) 202 return; 203 204 value *s = a->src[0]; 205 206 bool_op_info bop = {}; 207 208 PPH_DUMP( 209 sblog << "cndcc: "; 210 dump::dump_op(a); 211 sblog << "\n"; 212 ); 213 214 if (!get_bool_op_info(s, bop)) 215 return; 216 217 alu_node *d = bop.n; 218 219 if (d->bc.omod) 220 return; 221 222 PPH_DUMP( 223 sblog << "cndcc def: "; 224 dump::dump_op(d); 225 sblog << "\n"; 226 ); 227 228 229 unsigned dflags = d->bc.op_ptr->flags; 230 unsigned dcc = dflags & AF_CC_MASK; 231 unsigned dcmp_type = dflags & AF_CMP_TYPE_MASK; 232 unsigned ddst_type = dflags & AF_DST_TYPE_MASK; 233 int nds; 234 235 // TODO we can handle some of these cases, 236 // though probably this shouldn't happen 237 if (cmp_type != AF_FLOAT_CMP && ddst_type == AF_FLOAT_DST) 238 return; 239 240 if (d->src[0]->is_const() && d->src[0]->literal_value == literal(0)) 241 nds = 1; 242 else if ((d->src[1]->is_const() && 243 d->src[1]->literal_value == literal(0))) 244 nds = 0; 245 else 246 return; 247 248 // can't propagate ABS modifier to CNDcc because it's OP3 249 if (d->bc.src[nds].abs) 250 return; 251 252 // TODO we can handle some cases for uint comparison 253 if (dcmp_type == AF_UINT_CMP) 254 return; 255 256 if (dcc == AF_CC_NE) { 257 dcc = AF_CC_E; 258 swap = !swap; 259 } 260 261 if (nds == 1) { 262 switch (dcc) { 263 case AF_CC_GT: dcc = AF_CC_GE; swap = !swap; break; 264 case AF_CC_GE: dcc = AF_CC_GT; swap = !swap; break; 265 default: break; 266 } 267 } 268 269 a->src[0] = d->src[nds]; 270 a->bc.src[0] = d->bc.src[nds]; 271 272 if (swap) { 273 std::swap(a->src[1], a->src[2]); 274 std::swap(a->bc.src[1], a->bc.src[2]); 275 } 276 277 a->bc.set_op(get_cndcc_op(dcc, dcmp_type)); 278 279 } 280 281 bool peephole::get_bool_flt_to_int_source(alu_node* &a) { 282 283 if (a->bc.op == ALU_OP1_FLT_TO_INT) { 284 285 if (a->bc.src[0].neg || a->bc.src[0].abs || a->bc.src[0].rel) 286 return false; 287 288 value *s = a->src[0]; 289 if (!s || !s->def || !s->def->is_alu_inst()) 290 return false; 291 292 alu_node *dn = static_cast<alu_node*>(s->def); 293 294 if (dn->is_alu_op(ALU_OP1_TRUNC)) { 295 s = dn->src[0]; 296 if (!s || !s->def || !s->def->is_alu_inst()) 297 return false; 298 299 if (dn->bc.src[0].neg != 1 || dn->bc.src[0].abs != 0 || 300 dn->bc.src[0].rel != 0) { 301 return false; 302 } 303 304 dn = static_cast<alu_node*>(s->def); 305 306 } 307 308 if (dn->bc.op_ptr->flags & AF_SET) { 309 a = dn; 310 return true; 311 } 312 } 313 return false; 314 } 315 316 bool peephole::get_bool_op_info(value* b, bool_op_info& bop) { 317 318 node *d = b->def; 319 320 if (!d || !d->is_alu_inst()) 321 return false; 322 323 alu_node *dn = static_cast<alu_node*>(d); 324 325 if (dn->bc.op_ptr->flags & AF_SET) { 326 bop.n = dn; 327 328 if (dn->bc.op_ptr->flags & AF_DX10) 329 bop.int_cvt = true; 330 331 return true; 332 } 333 334 if (get_bool_flt_to_int_source(dn)) { 335 bop.n = dn; 336 bop.int_cvt = true; 337 return true; 338 } 339 340 return false; 341 } 342 343 } // namespace r600_sb 344