Home | History | Annotate | Download | only in sb
      1 /*
      2  * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *      Vadim Girlin
     25  */
     26 
     27 #define IFC_DEBUG 0
     28 
     29 #if IFC_DEBUG
     30 #define IFC_DUMP(q) do { q } while (0)
     31 #else
     32 #define IFC_DUMP(q)
     33 #endif
     34 
     35 #include "sb_shader.h"
     36 #include "sb_pass.h"
     37 
     38 namespace r600_sb {
     39 
     40 int if_conversion::run() {
     41 
     42 	regions_vec &rv = sh.get_regions();
     43 
     44 	unsigned converted = 0;
     45 
     46 	for (regions_vec::reverse_iterator N, I = rv.rbegin(), E = rv.rend();
     47 			I != E; I = N) {
     48 		N = I; ++N;
     49 
     50 		region_node *r = *I;
     51 		if (run_on(r)) {
     52 			rv.erase(I.base() - 1);
     53 			++converted;
     54 		}
     55 	}
     56 	return 0;
     57 }
     58 
     59 void if_conversion::convert_kill_instructions(region_node *r,
     60                                               value *em, bool branch,
     61                                               container_node *c) {
     62 	value *cnd = NULL;
     63 
     64 	for (node_iterator I = c->begin(), E = c->end(), N; I != E; I = N) {
     65 		N = I + 1;
     66 
     67 		if (!I->is_alu_inst())
     68 			continue;
     69 
     70 		alu_node *a = static_cast<alu_node*>(*I);
     71 		unsigned flags = a->bc.op_ptr->flags;
     72 
     73 		if (!(flags & AF_KILL))
     74 			continue;
     75 
     76 		// ignore predicated or non-const kill instructions
     77 		if (a->pred || !a->src[0]->is_const() || !a->src[1]->is_const())
     78 			continue;
     79 
     80 		literal l0 = a->src[0]->literal_value;
     81 		literal l1 = a->src[1]->literal_value;
     82 
     83 		expr_handler::apply_alu_src_mod(a->bc, 0, l0);
     84 		expr_handler::apply_alu_src_mod(a->bc, 1, l1);
     85 
     86 		if (expr_handler::evaluate_condition(flags, l0, l1)) {
     87 			// kill with constant 'true' condition, we'll convert it to the
     88 			// conditional kill outside of the if-then-else block
     89 
     90 			a->remove();
     91 
     92 			if (!cnd) {
     93 				cnd = get_select_value_for_em(sh, em);
     94 			} else {
     95 				// more than one kill with the same condition, just remove it
     96 				continue;
     97 			}
     98 
     99 			r->insert_before(a);
    100 			a->bc.set_op(branch ? ALU_OP2_KILLE_INT : ALU_OP2_KILLNE_INT);
    101 
    102 			a->src[0] = cnd;
    103 			a->src[1] = sh.get_const_value(0);
    104 			// clear modifiers
    105 			memset(&a->bc.src[0], 0, sizeof(bc_alu_src));
    106 			memset(&a->bc.src[1], 0, sizeof(bc_alu_src));
    107 		} else {
    108 			// kill with constant 'false' condition, this shouldn't happen
    109 			// but remove it anyway
    110 			a->remove();
    111 		}
    112 	}
    113 }
    114 
    115 bool if_conversion::check_and_convert(region_node *r) {
    116 
    117 	depart_node *nd1 = static_cast<depart_node*>(r->first);
    118 	if (!nd1->is_depart() || nd1->target != r)
    119 		return false;
    120 	if_node *nif = static_cast<if_node*>(nd1->first);
    121 	if (!nif->is_if())
    122 		return false;
    123 	depart_node *nd2 = static_cast<depart_node*>(nif->first);
    124 	if (!nd2->is_depart() || nd2->target != r)
    125 		return false;
    126 
    127 	value* &em = nif->cond;
    128 
    129 	node_stats s;
    130 
    131 	r->collect_stats(s);
    132 
    133 	IFC_DUMP(
    134 		sblog << "ifcvt: region " << r->region_id << " :\n";
    135 		s.dump();
    136 	);
    137 
    138 	if (s.region_count || s.fetch_count || s.alu_kill_count ||
    139 			s.if_count != 1 || s.repeat_count)
    140 		return false;
    141 
    142 	unsigned real_alu_count = s.alu_count - s.alu_copy_mov_count;
    143 
    144 	// if_conversion allows to eliminate JUMP-ALU_POP_AFTER or
    145 	// JUMP-ALU-ELSE-ALU_POP_AFTER, for now let's assume that 3 CF instructions
    146 	// are eliminated. According to the docs, cost of CF instruction is
    147 	// equal to ~40 ALU VLIW instructions (instruction groups),
    148 	// so we have eliminated cost equal to ~120 groups in total.
    149 	// Let's also assume that we have avg 3 ALU instructions per group,
    150 	// This means that potential eliminated cost is about 360 single alu inst.
    151 	// On the other hand, we are speculatively executing conditional code now,
    152 	// so we are increasing the cost in some cases. In the worst case, we'll
    153 	// have to execute real_alu_count additional alu instructions instead of
    154 	// jumping over them. Let's assume for now that average added cost is
    155 	//
    156 	// (0.9 * real_alu_count)
    157 	//
    158 	// So we should perform if_conversion if
    159 	//
    160 	// (0.9 * real_alu_count) < 360, or
    161 	//
    162 	// real_alu_count < 400
    163 	//
    164 	// So if real_alu_count is more than 400, than we think that if_conversion
    165 	// doesn't make sense.
    166 
    167 	// FIXME: We can use more precise heuristic, taking into account sizes of
    168 	// the branches and their probability instead of total size.
    169 	// Another way to improve this is to consider the number of the groups
    170 	// instead of the number of instructions (taking into account actual VLIW
    171 	// packing).
    172 	// (Currently we don't know anything about packing at this stage, but
    173 	// probably we can make some more precise estimations anyway)
    174 
    175 	if (real_alu_count > 400)
    176 		return false;
    177 
    178 	IFC_DUMP( sblog << "if_cvt: processing...\n"; );
    179 
    180 	value *select = get_select_value_for_em(sh, em);
    181 
    182 	if (!select)
    183 		return false;
    184 
    185 	for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E; ++I) {
    186 		node *n = *I;
    187 
    188 		alu_node *ns = convert_phi(select, n);
    189 
    190 		if (ns)
    191 			r->insert_after(ns);
    192 	}
    193 
    194 	nd2->expand();
    195 	nif->expand();
    196 	nd1->expand();
    197 	r->expand();
    198 
    199 	return true;
    200 }
    201 
    202 bool if_conversion::run_on(region_node* r) {
    203 
    204 	if (r->dep_count() != 2 || r->rep_count() != 1)
    205 		return false;
    206 
    207 	depart_node *nd1 = static_cast<depart_node*>(r->first);
    208 	if (!nd1->is_depart())
    209 		return false;
    210 	if_node *nif = static_cast<if_node*>(nd1->first);
    211 	if (!nif->is_if())
    212 		return false;
    213 	depart_node *nd2 = static_cast<depart_node*>(nif->first);
    214 	if (!nd2->is_depart())
    215 		return false;
    216 
    217 	value* &em = nif->cond;
    218 
    219 	convert_kill_instructions(r, em, true, nd2);
    220 	convert_kill_instructions(r, em, false, nd1);
    221 
    222 	if (check_and_convert(r))
    223 		return true;
    224 
    225 	if (nd2->empty() && nif->next) {
    226 		// empty true branch, non-empty false branch
    227 		// we'll invert it to get rid of 'else'
    228 
    229 		assert(em && em->def);
    230 
    231 		alu_node *predset = static_cast<alu_node*>(em->def);
    232 
    233 		// create clone of PREDSET instruction with inverted condition.
    234 		// PREDSET has 3 dst operands in our IR (value written to gpr,
    235 		// predicate value and exec mask value), we'll split it such that
    236 		// new PREDSET will define exec mask value only, and two others will
    237 		// be defined in the old PREDSET (if they are not used then DCE will
    238 		// simply remove old PREDSET).
    239 
    240 		alu_node *newpredset = sh.clone(predset);
    241 		predset->insert_after(newpredset);
    242 
    243 		predset->dst[2] = NULL;
    244 
    245 		newpredset->dst[0] = NULL;
    246 		newpredset->dst[1] = NULL;
    247 
    248 		em->def = newpredset;
    249 
    250 		unsigned cc = newpredset->bc.op_ptr->flags & AF_CC_MASK;
    251 		unsigned cmptype = newpredset->bc.op_ptr->flags & AF_CMP_TYPE_MASK;
    252 		bool swapargs = false;
    253 
    254 		cc = invert_setcc_condition(cc, swapargs);
    255 
    256 		if (swapargs) {
    257 			std::swap(newpredset->src[0], newpredset->src[1]);
    258 			std::swap(newpredset->bc.src[0], newpredset->bc.src[1]);
    259 		}
    260 
    261 		unsigned newopcode = get_predsetcc_op(cc, cmptype);
    262 		newpredset->bc.set_op(newopcode);
    263 
    264 		// move the code from the 'false' branch ('else') to the 'true' branch
    265 		nd2->move(nif->next, NULL);
    266 
    267 		// swap phi operands
    268 		for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E;
    269 				++I) {
    270 			node *p = *I;
    271 			assert(p->src.size() == 2);
    272 			std::swap(p->src[0], p->src[1]);
    273 		}
    274 	}
    275 
    276 	return false;
    277 }
    278 
    279 alu_node* if_conversion::convert_phi(value* select, node* phi) {
    280 	assert(phi->dst.size() == 1 || phi->src.size() == 2);
    281 
    282 	value *d = phi->dst[0];
    283 	value *v1 = phi->src[0];
    284 	value *v2 = phi->src[1];
    285 
    286 	assert(d);
    287 
    288 	if (!d->is_any_gpr())
    289 		return NULL;
    290 
    291 	if (v1->is_undef()) {
    292 		if (v2->is_undef()) {
    293 			return NULL;
    294 		} else {
    295 			return sh.create_mov(d, v2);
    296 		}
    297 	} else if (v2->is_undef())
    298 		return sh.create_mov(d, v1);
    299 
    300 	alu_node* n = sh.create_alu();
    301 
    302 	n->bc.set_op(ALU_OP3_CNDE_INT);
    303 	n->dst.push_back(d);
    304 	n->src.push_back(select);
    305 	n->src.push_back(v1);
    306 	n->src.push_back(v2);
    307 
    308 	return n;
    309 }
    310 
    311 } // namespace r600_sb
    312