Home | History | Annotate | Download | only in sb
      1 /*
      2  * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *      Vadim Girlin
     25  */
     26 
     27 #define RA_DEBUG 0
     28 
     29 #if RA_DEBUG
     30 #define RA_DUMP(q) do { q } while (0)
     31 #else
     32 #define RA_DUMP(q)
     33 #endif
     34 
     35 #include "sb_shader.h"
     36 #include "sb_pass.h"
     37 
     38 namespace r600_sb {
     39 
     40 int ra_coalesce::run() {
     41 	return sh.coal.run();
     42 }
     43 
     44 void coalescer::add_edge(value* a, value* b, unsigned cost) {
     45 	assert(a->is_sgpr() && b->is_sgpr());
     46 	edges.insert(new ra_edge(a,b, cost));
     47 }
     48 
     49 void coalescer::create_chunk(value *v) {
     50 
     51 	assert(v->is_sgpr());
     52 
     53 	ra_chunk *c = new ra_chunk();
     54 
     55 	c->values.push_back(v);
     56 
     57 	if (v->is_chan_pinned())
     58 		c->flags |= RCF_PIN_CHAN;
     59 	if (v->is_reg_pinned()) {
     60 		c->flags |= RCF_PIN_REG;
     61 	}
     62 
     63 	c->pin = v->pin_gpr;
     64 
     65 	RA_DUMP(
     66 		sblog << "create_chunk: ";
     67 		dump_chunk(c);
     68 	);
     69 
     70 	all_chunks.push_back(c);
     71 	v->chunk = c;
     72 
     73 }
     74 
     75 void coalescer::unify_chunks(ra_edge *e) {
     76 	ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
     77 
     78 	RA_DUMP(
     79 		sblog << "unify_chunks: ";
     80 		dump_chunk(c1);
     81 		dump_chunk(c2);
     82 	);
     83 
     84 	if (c2->is_chan_pinned() && !c1->is_chan_pinned()) {
     85 		c1->flags |= RCF_PIN_CHAN;
     86 		c1->pin = sel_chan(c1->pin.sel(), c2->pin.chan());
     87 	}
     88 
     89 	if (c2->is_reg_pinned() && !c1->is_reg_pinned()) {
     90 		c1->flags |= RCF_PIN_REG;
     91 		c1->pin = sel_chan(c2->pin.sel(), c1->pin.chan());
     92 	}
     93 
     94 	c1->values.reserve(c1->values.size() + c2->values.size());
     95 
     96 	for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
     97 			++I) {
     98 		(*I)->chunk = c1;
     99 		c1->values.push_back(*I);
    100 	}
    101 
    102 	chunk_vec::iterator F = std::find(all_chunks.begin(), all_chunks.end(), c2);
    103 	assert(F != all_chunks.end());
    104 
    105 	all_chunks.erase(F);
    106 
    107 	c1->cost += c2->cost + e->cost;
    108 	delete c2;
    109 }
    110 
    111 bool coalescer::chunks_interference(ra_chunk *c1, ra_chunk *c2) {
    112 	unsigned pin_flags = (c1->flags & c2->flags) &
    113 			(RCF_PIN_CHAN | RCF_PIN_REG);
    114 
    115 	if ((pin_flags & RCF_PIN_CHAN) &&
    116 			c1->pin.chan() != c2->pin.chan())
    117 		return true;
    118 
    119 	if ((pin_flags & RCF_PIN_REG) &&
    120 			c1->pin.sel() != c2->pin.sel())
    121 		return true;
    122 
    123 	for (vvec::iterator I = c1->values.begin(), E = c1->values.end(); I != E;
    124 			++I) {
    125 		value *v1 = *I;
    126 
    127 		for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
    128 				++I) {
    129 			value *v2 = *I;
    130 
    131 			if (!v1->v_equal(v2) && v1->interferences.contains(v2))
    132 				return true;
    133 		}
    134 	}
    135 	return false;
    136 }
    137 
    138 void coalescer::build_chunks() {
    139 
    140 	for (edge_queue::iterator I = edges.begin(), E = edges.end();
    141 			I != E; ++I) {
    142 
    143 		ra_edge *e = *I;
    144 
    145 		if (!e->a->chunk)
    146 			create_chunk(e->a);
    147 
    148 		if (!e->b->chunk)
    149 			create_chunk(e->b);
    150 
    151 		ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
    152 
    153 		if (c1 == c2) {
    154 			c1->cost += e->cost;
    155 		} else if (!chunks_interference(c1, c2))
    156 			unify_chunks(e);
    157 	}
    158 }
    159 
    160 ra_constraint* coalescer::create_constraint(constraint_kind kind) {
    161 	ra_constraint *c = new ra_constraint(kind);
    162 	all_constraints.push_back(c);
    163 	return c;
    164 }
    165 
    166 void coalescer::dump_edges() {
    167 	sblog << "######## affinity edges\n";
    168 
    169 	for (edge_queue::iterator I = edges.begin(), E = edges.end();
    170 			I != E; ++I) {
    171 		ra_edge* e = *I;
    172 		sblog << "  ra_edge ";
    173 		dump::dump_val(e->a);
    174 		sblog << " <-> ";
    175 		dump::dump_val(e->b);
    176 		sblog << "   cost = " << e->cost << "\n";
    177 	}
    178 }
    179 
    180 void coalescer::dump_chunks() {
    181 	sblog << "######## chunks\n";
    182 
    183 	for (chunk_vec::iterator I = all_chunks.begin(), E = all_chunks.end();
    184 			I != E; ++I) {
    185 		ra_chunk* c = *I;
    186 		dump_chunk(c);
    187 	}
    188 }
    189 
    190 
    191 void coalescer::dump_constraint_queue() {
    192 	sblog << "######## constraints\n";
    193 
    194 	for (constraint_queue::iterator I = constraints.begin(),
    195 			E = constraints.end(); I != E; ++I) {
    196 		ra_constraint* c = *I;
    197 		dump_constraint(c);
    198 	}
    199 }
    200 
    201 void coalescer::dump_chunk(ra_chunk* c) {
    202 	sblog << "  ra_chunk cost = " << c->cost << "  :  ";
    203 	dump::dump_vec(c->values);
    204 
    205 	if (c->flags & RCF_PIN_REG)
    206 		sblog << "   REG = " << c->pin.sel();
    207 
    208 	if (c->flags & RCF_PIN_CHAN)
    209 		sblog << "   CHAN = " << c->pin.chan();
    210 
    211 	sblog << (c->flags & RCF_GLOBAL ? "  GLOBAL" : "");
    212 
    213 	sblog << "\n";
    214 }
    215 
    216 void coalescer::dump_constraint(ra_constraint* c) {
    217 	sblog << "  ra_constraint: ";
    218 	switch (c->kind) {
    219 		case CK_PACKED_BS: sblog << "PACKED_BS"; break;
    220 		case CK_PHI: sblog << "PHI"; break;
    221 		case CK_SAME_REG: sblog << "SAME_REG"; break;
    222 		default: sblog << "UNKNOWN_KIND"; assert(0); break;
    223 	}
    224 
    225 	sblog << "  cost = " << c->cost << "  : ";
    226 	dump::dump_vec(c->values);
    227 
    228 	sblog << "\n";
    229 }
    230 
    231 void coalescer::get_chunk_interferences(ra_chunk *c, val_set &s) {
    232 
    233 	for (vvec::iterator I = c->values.begin(), E = c->values.end(); I != E;
    234 			++I) {
    235 		value *v = *I;
    236 		s.add_set(v->interferences);
    237 	}
    238 	s.remove_vec(c->values);
    239 }
    240 
    241 void coalescer::build_chunk_queue() {
    242 	for (chunk_vec::iterator I = all_chunks.begin(),
    243 			E = all_chunks.end(); I != E; ++I) {
    244 		ra_chunk *c = *I;
    245 
    246 		if (!c->is_fixed())
    247 			chunks.insert(c);
    248 	}
    249 }
    250 
    251 void coalescer::build_constraint_queue() {
    252 	for (constraint_vec::iterator I = all_constraints.begin(),
    253 			E = all_constraints.end(); I != E; ++I) {
    254 		ra_constraint *c = *I;
    255 		unsigned cost = 0;
    256 
    257 		if (c->values.empty() || !c->values.front()->is_sgpr())
    258 			continue;
    259 
    260 		if (c->kind != CK_SAME_REG)
    261 			continue;
    262 
    263 		for (vvec::iterator I = c->values.begin(), E = c->values.end();
    264 				I != E; ++I) {
    265 			value *v = *I;
    266 			if (!v->chunk)
    267 				create_chunk(v);
    268 			else
    269 				cost += v->chunk->cost;
    270 		}
    271 		c->cost = cost;
    272 		constraints.insert(c);
    273 	}
    274 }
    275 
    276 void coalescer::color_chunks() {
    277 
    278 	for (chunk_queue::iterator I = chunks.begin(), E = chunks.end();
    279 			I != E; ++I) {
    280 		ra_chunk *c = *I;
    281 		if (c->is_fixed() || c->values.size() == 1)
    282 			continue;
    283 
    284 		sb_bitset rb;
    285 		val_set interf;
    286 
    287 		get_chunk_interferences(c, interf);
    288 
    289 		RA_DUMP(
    290 			sblog << "color_chunks: ";
    291 			dump_chunk(c);
    292 			sblog << "\n interferences: ";
    293 			dump::dump_set(sh,interf);
    294 			sblog << "\n";
    295 		);
    296 
    297 		init_reg_bitset(rb, interf);
    298 
    299 		unsigned pass = c->is_reg_pinned() ? 0 : 1;
    300 
    301 		unsigned cs = c->is_chan_pinned() ? c->pin.chan() : 0;
    302 		unsigned ce = c->is_chan_pinned() ? cs + 1 : 4;
    303 
    304 		unsigned color = 0;
    305 
    306 		while (pass < 2) {
    307 
    308 			unsigned rs, re;
    309 
    310 			if (pass == 0) {
    311 				rs = c->pin.sel();
    312 				re = rs + 1;
    313 			} else {
    314 				rs = 0;
    315 				re = sh.num_nontemp_gpr();
    316 			}
    317 
    318 			for (unsigned reg = rs; reg < re; ++reg) {
    319 				for (unsigned chan = cs; chan < ce; ++chan) {
    320 					unsigned bit = sel_chan(reg, chan);
    321 					if (bit >= rb.size() || !rb.get(bit)) {
    322 						color = bit;
    323 						break;
    324 					}
    325 				}
    326 				if (color)
    327 					break;
    328 			}
    329 
    330 			if (color)
    331 				break;
    332 
    333 			++pass;
    334 		}
    335 
    336 		assert(color);
    337 		color_chunk(c, color);
    338 	}
    339 }
    340 
    341 void coalescer::init_reg_bitset(sb_bitset &bs, val_set &vs) {
    342 
    343 	for (val_set::iterator I = vs.begin(sh), E = vs.end(sh); I != E; ++I) {
    344 		value *v = *I;
    345 
    346 		if (!v->is_any_gpr())
    347 			continue;
    348 
    349 		unsigned gpr = v->get_final_gpr();
    350 		if (!gpr)
    351 			continue;
    352 
    353 		if (gpr) {
    354 			if (gpr >= bs.size())
    355 				bs.resize(gpr + 64);
    356 			bs.set(gpr, 1);
    357 		}
    358 	}
    359 }
    360 
    361 void coalescer::color_chunk(ra_chunk *c, sel_chan color) {
    362 
    363 	vvec vv = c->values;
    364 
    365 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E;
    366 			++I) {
    367 		value *v = *I;
    368 
    369 		if (v->is_reg_pinned() && v->pin_gpr.sel() != color.sel()) {
    370 			detach_value(v);
    371 			continue;
    372 		}
    373 
    374 		if (v->is_chan_pinned() && v->pin_gpr.chan() != color.chan()) {
    375 			detach_value(v);
    376 			continue;
    377 		}
    378 
    379 		v->gpr = color;
    380 
    381 		if (v->constraint && v->constraint->kind == CK_PHI)
    382 			v->fix();
    383 
    384 
    385 		RA_DUMP(
    386 			sblog << " assigned " << color << " to ";
    387 			dump::dump_val(v);
    388 			sblog << "\n";
    389 		);
    390 	}
    391 
    392 	c->pin = color;
    393 
    394 	if (c->is_reg_pinned()) {
    395 		c->fix();
    396 	}
    397 }
    398 
    399 coalescer::~coalescer() {
    400 
    401 	// FIXME use pool allocator ??
    402 
    403 	for (constraint_vec::iterator I = all_constraints.begin(),
    404 			E = all_constraints.end(); I != E; ++I) {
    405 		delete (*I);
    406 	}
    407 
    408 	for (chunk_vec::iterator I = all_chunks.begin(),
    409 			E = all_chunks.end(); I != E; ++I) {
    410 		delete (*I);
    411 	}
    412 
    413 	for (edge_queue::iterator I = edges.begin(), E = edges.end();
    414 			I != E; ++I) {
    415 		delete (*I);
    416 	}
    417 }
    418 
    419 int coalescer::run() {
    420 	int r;
    421 
    422 	RA_DUMP( dump_edges(); );
    423 
    424 	build_chunks();
    425 	RA_DUMP( dump_chunks(); );
    426 
    427 	build_constraint_queue();
    428 	RA_DUMP( dump_constraint_queue(); );
    429 
    430 	if ((r = color_constraints()))
    431 		return r;
    432 
    433 	build_chunk_queue();
    434 	color_chunks();
    435 
    436 	return 0;
    437 }
    438 
    439 void coalescer::color_phi_constraint(ra_constraint* c) {
    440 }
    441 
    442 ra_chunk* coalescer::detach_value(value *v) {
    443 
    444 	vvec::iterator F = std::find(v->chunk->values.begin(),
    445 	                             v->chunk->values.end(), v);
    446 
    447 	assert(F != v->chunk->values.end());
    448 	v->chunk->values.erase(F);
    449 	create_chunk(v);
    450 
    451 	if (v->is_reg_pinned()) {
    452 		v->chunk->fix();
    453 	}
    454 
    455 	RA_DUMP(
    456 		sblog << "           detached : ";
    457 		dump_chunk(v->chunk);
    458 	);
    459 
    460 	return v->chunk;
    461 
    462 }
    463 
    464 int coalescer::color_reg_constraint(ra_constraint *c) {
    465 	unsigned k, cnt = c->values.size();
    466 	vvec & cv = c->values;
    467 
    468 	ra_chunk *ch[4];
    469 	unsigned swz[4] = {0, 1, 2, 3};
    470 	val_set interf[4];
    471 	sb_bitset rb[4];
    472 
    473 	bool reg_pinned = false;
    474 	unsigned pin_reg = ~0;
    475 
    476 	unsigned chan_mask = 0;
    477 
    478 	k = 0;
    479 	for (vvec::iterator I = cv.begin(), E = cv.end(); I != E; ++I, ++k) {
    480 		value *v = *I;
    481 
    482 		if (!v->chunk)
    483 			create_chunk(v);
    484 
    485 		ch[k] = v->chunk;
    486 
    487 		if (v->chunk->is_chan_pinned()) {
    488 			unsigned chan = 1 << v->chunk->pin.chan();
    489 
    490 			if (chan & chan_mask) { // channel already in use
    491 				ch[k] = detach_value(v);
    492 				assert(!ch[k]->is_chan_pinned());
    493 			} else {
    494 				chan_mask |= chan;
    495 			}
    496 		}
    497 
    498 		if (v->chunk->is_reg_pinned()) {
    499 			if (!reg_pinned) {
    500 				reg_pinned = true;
    501 				pin_reg = v->chunk->pin.sel();
    502 			}
    503 		}
    504 
    505 		get_chunk_interferences(ch[k], interf[k]);
    506 		init_reg_bitset(rb[k], interf[k]);
    507 	}
    508 
    509 	unsigned start_reg, end_reg;
    510 
    511 	start_reg = 0;
    512 	end_reg = sh.num_nontemp_gpr();
    513 
    514 	unsigned min_reg = end_reg;
    515 	unsigned min_swz[4];
    516 	unsigned i, pass = reg_pinned ? 0 : 1;
    517 
    518 	bool done = false;
    519 
    520 	while (pass < 2) {
    521 
    522 		unsigned rs, re;
    523 
    524 		if (pass == 0) {
    525 			re = pin_reg + 1;
    526 			rs = pin_reg;
    527 		} else {
    528 			re = end_reg;
    529 			rs = start_reg;
    530 		}
    531 
    532 		min_reg = re;
    533 
    534 		// cycle on swizzle combinations
    535 		do {
    536 			for (i = 0; i < cnt; ++i) {
    537 				if (ch[i]->flags & RCF_PIN_CHAN)
    538 					if (ch[i]->pin.chan() != swz[i])
    539 						break;
    540 			}
    541 			if (i != cnt)
    542 				continue;
    543 
    544 			// looking for minimal reg number such that the constrained chunks
    545 			// may be colored with the current swizzle combination
    546 			for (unsigned reg = rs; reg < min_reg; ++reg) {
    547 				for (i = 0; i < cnt; ++i) {
    548 					unsigned bit = sel_chan(reg, swz[i]);
    549 					if (bit < rb[i].size() && rb[i].get(bit))
    550 						break;
    551 				}
    552 				if (i == cnt) {
    553 					done = true;
    554 					min_reg = reg;
    555 					std::copy(swz, swz + 4, min_swz);
    556 					break;
    557 				}
    558 			}
    559 
    560 			if (pass == 0 && done)
    561 				break;
    562 
    563 		} while (std::next_permutation(swz, swz + 4));
    564 
    565 		if (!done && pass) {
    566 			sblog << "sb: ra_coalesce - out of registers\n";
    567 			return -1;
    568 		}
    569 
    570 		if (pass == 0 && done)
    571 			break;
    572 
    573 		++pass;
    574 	};
    575 
    576 	assert(done);
    577 
    578 	RA_DUMP(
    579 	sblog << "min reg = " << min_reg << "   min_swz = "
    580 			<< min_swz[0] << min_swz[1] << min_swz[2] << min_swz[3] << "\n";
    581 	);
    582 
    583 	for (i = 0; i < cnt; ++i) {
    584 		sel_chan color(min_reg, min_swz[i]);
    585 		ra_chunk *cc = ch[i];
    586 
    587 		if (cc->is_fixed()) {
    588 			if (cc->pin != color)
    589 				cc = detach_value(cv[i]);
    590 			else
    591 				continue;
    592 		}
    593 
    594 		color_chunk(cc, color);
    595 		cc->fix();
    596 		cc->set_prealloc();
    597 	}
    598 
    599 	return 0;
    600 }
    601 
    602 int coalescer::color_constraints() {
    603 	int r;
    604 
    605 	for (constraint_queue::iterator I = constraints.begin(),
    606 			E = constraints.end(); I != E; ++I) {
    607 
    608 		ra_constraint *c = *I;
    609 
    610 		RA_DUMP(
    611 			sblog << "color_constraints: ";
    612 			dump_constraint(c);
    613 		);
    614 
    615 		if (c->kind == CK_SAME_REG) {
    616 			if ((r = color_reg_constraint(c)))
    617 				return r;
    618 		} else if (c->kind == CK_PHI)
    619 			color_phi_constraint(c);
    620 	}
    621 	return 0;
    622 }
    623 
    624 } // namespace r600_sb
    625