Home | History | Annotate | Download | only in sb
      1 /*
      2  * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *      Vadim Girlin
     25  */
     26 
     27 #define PPH_DEBUG 0
     28 
     29 #if PPH_DEBUG
     30 #define PPH_DUMP(q) do { q } while (0)
     31 #else
     32 #define PPH_DUMP(q)
     33 #endif
     34 
     35 #include "sb_shader.h"
     36 #include "sb_pass.h"
     37 
     38 namespace r600_sb {
     39 
     40 int peephole::run() {
     41 
     42 	run_on(sh.root);
     43 
     44 	return 0;
     45 }
     46 
     47 void peephole::run_on(container_node* c) {
     48 
     49 	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
     50 		node *n = *I;
     51 
     52 		if (n->is_container())
     53 			run_on(static_cast<container_node*>(n));
     54 		else {
     55 
     56 			if (n->is_alu_inst()) {
     57 				alu_node *a = static_cast<alu_node*>(n);
     58 
     59 				if (a->bc.op_ptr->flags &
     60 						(AF_PRED | AF_SET | AF_CMOV | AF_KILL)) {
     61 					optimize_cc_op(a);
     62 				} else if (a->bc.op == ALU_OP1_FLT_TO_INT) {
     63 
     64 					alu_node *s = a;
     65 					if (get_bool_flt_to_int_source(s)) {
     66 						convert_float_setcc(a, s);
     67 					}
     68 				}
     69 			}
     70 		}
     71 	}
     72 }
     73 
     74 void peephole::optimize_cc_op(alu_node* a) {
     75 	unsigned aflags = a->bc.op_ptr->flags;
     76 
     77 	if (aflags & (AF_PRED | AF_SET | AF_KILL)) {
     78 		optimize_cc_op2(a);
     79 	} else if (aflags & AF_CMOV) {
     80 		optimize_CNDcc_op(a);
     81 	}
     82 }
     83 
     84 void peephole::convert_float_setcc(alu_node *f2i, alu_node *s) {
     85 	alu_node *ns = sh.clone(s);
     86 
     87 	ns->dst[0] = f2i->dst[0];
     88 	ns->dst[0]->def = ns;
     89 	ns->bc.set_op(ns->bc.op + (ALU_OP2_SETE_DX10 - ALU_OP2_SETE));
     90 	f2i->insert_after(ns);
     91 	f2i->remove();
     92 }
     93 
     94 void peephole::optimize_cc_op2(alu_node* a) {
     95 
     96 	unsigned flags = a->bc.op_ptr->flags;
     97 	unsigned cc = flags & AF_CC_MASK;
     98 
     99 	if ((cc != AF_CC_E && cc != AF_CC_NE) || a->pred)
    100 		return;
    101 
    102 	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
    103 	unsigned dst_type = flags & AF_DST_TYPE_MASK;
    104 
    105 	int op_kind = (flags & AF_PRED) ? 1 :
    106 			(flags & AF_SET) ? 2 :
    107 			(flags & AF_KILL) ? 3 : 0;
    108 
    109 	bool swapped = false;
    110 
    111 	if (a->src[0]->is_const() && a->src[0]->literal_value == literal(0)) {
    112 		std::swap(a->src[0],a->src[1]);
    113 		swapped = true;
    114 		// clear modifiers
    115 		memset(&a->bc.src[0], 0, sizeof(bc_alu_src));
    116 		memset(&a->bc.src[1], 0, sizeof(bc_alu_src));
    117 	}
    118 
    119 	if (swapped || (a->src[1]->is_const() &&
    120 			a->src[1]->literal_value == literal(0))) {
    121 
    122 		value *s = a->src[0];
    123 
    124 		bool_op_info bop = {};
    125 
    126 		PPH_DUMP(
    127 			sblog << "cc_op2: ";
    128 			dump::dump_op(a);
    129 			sblog << "\n";
    130 		);
    131 
    132 		if (!get_bool_op_info(s, bop))
    133 			return;
    134 
    135 		if (cc == AF_CC_E)
    136 			bop.invert = !bop.invert;
    137 
    138 		bool swap_args = false;
    139 
    140 		cc = bop.n->bc.op_ptr->flags & AF_CC_MASK;
    141 
    142 		if (bop.invert)
    143 			cc = invert_setcc_condition(cc, swap_args);
    144 
    145 		if (bop.int_cvt) {
    146 			assert(cmp_type != AF_FLOAT_CMP);
    147 			cmp_type = AF_FLOAT_CMP;
    148 		}
    149 
    150 		PPH_DUMP(
    151 			sblog << "boi node: ";
    152 			dump::dump_op(bop.n);
    153 			sblog << " invert: " << bop.invert << "  int_cvt: " << bop.int_cvt;
    154 			sblog <<"\n";
    155 		);
    156 
    157 		unsigned newop;
    158 
    159 		switch(op_kind) {
    160 		case 1:
    161 			newop = get_predsetcc_op(cc, cmp_type);
    162 			break;
    163 		case 2:
    164 			newop = get_setcc_op(cc, cmp_type, dst_type != AF_FLOAT_DST);
    165 			break;
    166 		case 3:
    167 			newop = get_killcc_op(cc, cmp_type);
    168 			break;
    169 		default:
    170 			newop = ALU_OP0_NOP;
    171 			assert(!"invalid op kind");
    172 			break;
    173 		}
    174 
    175 		a->bc.set_op(newop);
    176 
    177 		if (swap_args) {
    178 			a->src[0] = bop.n->src[1];
    179 			a->src[1] = bop.n->src[0];
    180 			a->bc.src[0] = bop.n->bc.src[1];
    181 			a->bc.src[1] = bop.n->bc.src[0];
    182 
    183 		} else {
    184 			a->src[0] = bop.n->src[0];
    185 			a->src[1] = bop.n->src[1];
    186 			a->bc.src[0] = bop.n->bc.src[0];
    187 			a->bc.src[1] = bop.n->bc.src[1];
    188 		}
    189 	}
    190 }
    191 
    192 void peephole::optimize_CNDcc_op(alu_node* a) {
    193 	unsigned flags = a->bc.op_ptr->flags;
    194 	unsigned cc = flags & AF_CC_MASK;
    195 	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
    196 	bool swap = false;
    197 
    198 	if (cc == AF_CC_E) {
    199 		swap = !swap;
    200 		cc = AF_CC_NE;
    201 	} else if (cc != AF_CC_NE)
    202 		return;
    203 
    204 	value *s = a->src[0];
    205 
    206 	bool_op_info bop = {};
    207 
    208 	PPH_DUMP(
    209 		sblog << "cndcc: ";
    210 		dump::dump_op(a);
    211 		sblog << "\n";
    212 	);
    213 
    214 	if (!get_bool_op_info(s, bop))
    215 		return;
    216 
    217 	alu_node *d = bop.n;
    218 
    219 	if (d->bc.omod)
    220 		return;
    221 
    222 	PPH_DUMP(
    223 		sblog << "cndcc def: ";
    224 		dump::dump_op(d);
    225 		sblog << "\n";
    226 	);
    227 
    228 
    229 	unsigned dflags = d->bc.op_ptr->flags;
    230 	unsigned dcc = dflags & AF_CC_MASK;
    231 	unsigned dcmp_type = dflags & AF_CMP_TYPE_MASK;
    232 	unsigned ddst_type = dflags & AF_DST_TYPE_MASK;
    233 	int nds;
    234 
    235 	// TODO we can handle some of these cases,
    236 	// though probably this shouldn't happen
    237 	if (cmp_type != AF_FLOAT_CMP && ddst_type == AF_FLOAT_DST)
    238 		return;
    239 
    240 	if (d->src[0]->is_const() && d->src[0]->literal_value == literal(0))
    241 		nds = 1;
    242 	else if ((d->src[1]->is_const() &&
    243 			d->src[1]->literal_value == literal(0)))
    244 		nds = 0;
    245 	else
    246 		return;
    247 
    248 	// can't propagate ABS modifier to CNDcc because it's OP3
    249 	if (d->bc.src[nds].abs)
    250 		return;
    251 
    252 	// TODO we can handle some cases for uint comparison
    253 	if (dcmp_type == AF_UINT_CMP)
    254 		return;
    255 
    256 	if (dcc == AF_CC_NE) {
    257 		dcc = AF_CC_E;
    258 		swap = !swap;
    259 	}
    260 
    261 	if (nds == 1) {
    262 		switch (dcc) {
    263 		case AF_CC_GT: dcc = AF_CC_GE; swap = !swap; break;
    264 		case AF_CC_GE: dcc = AF_CC_GT; swap = !swap; break;
    265 		default: break;
    266 		}
    267 	}
    268 
    269 	a->src[0] = d->src[nds];
    270 	a->bc.src[0] = d->bc.src[nds];
    271 
    272 	if (swap) {
    273 		std::swap(a->src[1], a->src[2]);
    274 		std::swap(a->bc.src[1], a->bc.src[2]);
    275 	}
    276 
    277 	a->bc.set_op(get_cndcc_op(dcc, dcmp_type));
    278 
    279 }
    280 
    281 bool peephole::get_bool_flt_to_int_source(alu_node* &a) {
    282 
    283 	if (a->bc.op == ALU_OP1_FLT_TO_INT) {
    284 
    285 		if (a->bc.src[0].neg || a->bc.src[0].abs || a->bc.src[0].rel)
    286 			return false;
    287 
    288 		value *s = a->src[0];
    289 		if (!s || !s->def || !s->def->is_alu_inst())
    290 			return false;
    291 
    292 		alu_node *dn = static_cast<alu_node*>(s->def);
    293 
    294 		if (dn->is_alu_op(ALU_OP1_TRUNC)) {
    295 			s = dn->src[0];
    296 			if (!s || !s->def || !s->def->is_alu_inst())
    297 				return false;
    298 
    299 			if (dn->bc.src[0].neg != 1 || dn->bc.src[0].abs != 0 ||
    300 					dn->bc.src[0].rel != 0) {
    301 				return false;
    302 			}
    303 
    304 			dn = static_cast<alu_node*>(s->def);
    305 
    306 		}
    307 
    308 		if (dn->bc.op_ptr->flags & AF_SET) {
    309 			a = dn;
    310 			return true;
    311 		}
    312 	}
    313 	return false;
    314 }
    315 
    316 bool peephole::get_bool_op_info(value* b, bool_op_info& bop) {
    317 
    318 	node *d = b->def;
    319 
    320 	if (!d || !d->is_alu_inst())
    321 		return false;
    322 
    323 	alu_node *dn = static_cast<alu_node*>(d);
    324 
    325 	if (dn->bc.op_ptr->flags & AF_SET) {
    326 		bop.n = dn;
    327 
    328 		if (dn->bc.op_ptr->flags & AF_DX10)
    329 			bop.int_cvt = true;
    330 
    331 		return true;
    332 	}
    333 
    334 	if (get_bool_flt_to_int_source(dn)) {
    335 		bop.n = dn;
    336 		bop.int_cvt = true;
    337 		return true;
    338 	}
    339 
    340 	return false;
    341 }
    342 
    343 } // namespace r600_sb
    344