Home | History | Annotate | Download | only in sb
      1 /*
      2  * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *      Vadim Girlin
     25  */
     26 
     27 #define RA_DEBUG 0
     28 
     29 #if RA_DEBUG
     30 #define RA_DUMP(q) do { q } while (0)
     31 #else
     32 #define RA_DUMP(q)
     33 #endif
     34 
     35 #include <cstring>
     36 
     37 #include "sb_bc.h"
     38 #include "sb_shader.h"
     39 #include "sb_pass.h"
     40 
     41 namespace r600_sb {
     42 
     43 class regbits {
     44 	typedef uint32_t basetype;
     45 	static const unsigned bt_bytes = sizeof(basetype);
     46 	static const unsigned bt_index_shift = 5;
     47 	static const unsigned bt_index_mask = (1u << bt_index_shift) - 1;
     48 	static const unsigned bt_bits = bt_bytes << 3;
     49 	static const unsigned size = MAX_GPR * 4 / bt_bits;
     50 
     51 	basetype dta[size];
     52 
     53 	unsigned num_temps;
     54 
     55 public:
     56 
     57 	regbits(unsigned num_temps) : dta(), num_temps(num_temps) {}
     58 	regbits(unsigned num_temps, unsigned value)	: num_temps(num_temps)
     59 	{ set_all(value); }
     60 
     61 	regbits(shader &sh, val_set &vs) : num_temps(sh.get_ctx().alu_temp_gprs)
     62 	{ set_all(1); from_val_set(sh, vs); }
     63 
     64 	void set_all(unsigned val);
     65 	void from_val_set(shader &sh, val_set &vs);
     66 
     67 	void set(unsigned index);
     68 	void clear(unsigned index);
     69 	bool get(unsigned index);
     70 
     71 	void set(unsigned index, unsigned val);
     72 
     73 	sel_chan find_free_bit();
     74 	sel_chan find_free_chans(unsigned mask);
     75 	sel_chan find_free_chan_by_mask(unsigned mask);
     76 	sel_chan find_free_array(unsigned size, unsigned mask);
     77 
     78 	void dump();
     79 };
     80 
     81 // =======================================
     82 
     83 void regbits::dump() {
     84 	for (unsigned i = 0; i < size * bt_bits; ++i) {
     85 
     86 		if (!(i & 31))
     87 			sblog << "\n";
     88 
     89 		if (!(i & 3)) {
     90 			sblog.print_w(i / 4, 7);
     91 			sblog << " ";
     92 		}
     93 
     94 		sblog << (get(i) ? 1 : 0);
     95 	}
     96 }
     97 
     98 
     99 void regbits::set_all(unsigned v) {
    100 	memset(&dta, v ? 0xFF : 0x00, size * bt_bytes);
    101 }
    102 
    103 void regbits::from_val_set(shader &sh, val_set& vs) {
    104 	val_set &s = vs;
    105 	unsigned g;
    106 	for (val_set::iterator I = s.begin(sh), E = s.end(sh); I != E; ++I) {
    107 		value *v = *I;
    108 		if (v->is_any_gpr()) {
    109 			g = v->get_final_gpr();
    110 			if (!g)
    111 				continue;
    112 		} else
    113 			continue;
    114 
    115 		assert(g);
    116 		--g;
    117 		assert(g < 512);
    118 		clear(g);
    119 	}
    120 }
    121 
    122 void regbits::set(unsigned index) {
    123 	unsigned ih = index >> bt_index_shift;
    124 	unsigned il = index & bt_index_mask;
    125 	dta[ih] |= ((basetype)1u << il);
    126 }
    127 
    128 void regbits::clear(unsigned index) {
    129 	unsigned ih = index >> bt_index_shift;
    130 	unsigned il = index & bt_index_mask;
    131 	assert(ih < size);
    132 	dta[ih] &= ~((basetype)1u << il);
    133 }
    134 
    135 bool regbits::get(unsigned index) {
    136 	unsigned ih = index >> bt_index_shift;
    137 	unsigned il = index & bt_index_mask;
    138 	return dta[ih] & ((basetype)1u << il);
    139 }
    140 
    141 void regbits::set(unsigned index, unsigned val) {
    142 	unsigned ih = index >> bt_index_shift;
    143 	unsigned il = index & bt_index_mask;
    144 	basetype bm = 1u << il;
    145 	dta[ih] = (dta[ih] & ~bm) | (val << il);
    146 }
    147 
    148 // free register for ra means the bit is set
    149 sel_chan regbits::find_free_bit() {
    150 	unsigned elt = 0;
    151 	unsigned bit = 0;
    152 
    153 	while (elt < size && !dta[elt])
    154 		++elt;
    155 
    156 	if (elt >= size)
    157 		return 0;
    158 
    159 	bit = __builtin_ctz(dta[elt]) + (elt << bt_index_shift);
    160 
    161 	assert(bit < ((MAX_GPR - num_temps) << 2));
    162 
    163 	return bit + 1;
    164 }
    165 
    166 // find free gpr component to use as indirectly addressable array
    167 sel_chan regbits::find_free_array(unsigned length, unsigned mask) {
    168 	unsigned cc[4] = {};
    169 
    170 	// FIXME optimize this. though hopefully we won't have a lot of arrays
    171 	for (unsigned a = 0; a < MAX_GPR - num_temps; ++a) {
    172 		for(unsigned c = 0; c < MAX_CHAN; ++c) {
    173 			if (mask & (1 << c)) {
    174 				if (get((a << 2) | c)) {
    175 					if (++cc[c] == length)
    176 						return sel_chan(a - length + 1, c);
    177 				} else {
    178 					cc[c] = 0;
    179 				}
    180 			}
    181 		}
    182 	}
    183 	return 0;
    184 }
    185 
    186 sel_chan regbits::find_free_chans(unsigned mask) {
    187 	unsigned elt = 0;
    188 	unsigned bit = 0;
    189 
    190 	assert (!(mask & ~0xF));
    191 	basetype cd = dta[elt];
    192 
    193 	do {
    194 		if (!cd) {
    195 			if (++elt < size) {
    196 				cd = dta[elt];
    197 				bit = 0;
    198 				continue;
    199 			} else
    200 				return 0;
    201 		}
    202 
    203 		unsigned p = __builtin_ctz(cd) & ~(basetype)3u;
    204 
    205 		assert (p <= bt_bits - bit);
    206 		bit += p;
    207 		cd >>= p;
    208 
    209 		if ((cd & mask) == mask) {
    210 			return ((elt << bt_index_shift) | bit) + 1;
    211 		}
    212 
    213 		bit += 4;
    214 		cd >>= 4;
    215 
    216 	} while (1);
    217 
    218 	return 0;
    219 }
    220 
    221 sel_chan regbits::find_free_chan_by_mask(unsigned mask) {
    222 	unsigned elt = 0;
    223 	unsigned bit = 0;
    224 
    225 	assert (!(mask & ~0xF));
    226 	basetype cd = dta[elt];
    227 
    228 	do {
    229 		if (!cd) {
    230 			if (++elt < size) {
    231 				cd = dta[elt];
    232 				bit = 0;
    233 				continue;
    234 			} else
    235 				return 0;
    236 		}
    237 
    238 		unsigned p = __builtin_ctz(cd) & ~(basetype)3u;
    239 
    240 		assert (p <= bt_bits - bit);
    241 		bit += p;
    242 		cd >>= p;
    243 
    244 		if (cd & mask) {
    245 			unsigned nb = __builtin_ctz(cd & mask);
    246 			unsigned ofs = ((elt << bt_index_shift) | bit);
    247 			return nb + ofs + 1;
    248 		}
    249 
    250 		bit += 4;
    251 		cd >>= 4;
    252 
    253 	} while (1);
    254 
    255 	return 0;
    256 }
    257 
    258 // ================================
    259 
    260 void ra_init::alloc_arrays() {
    261 
    262 	gpr_array_vec &ga = sh.arrays();
    263 
    264 	for(gpr_array_vec::iterator I = ga.begin(), E = ga.end(); I != E; ++I) {
    265 		gpr_array *a = *I;
    266 
    267 		RA_DUMP(
    268 			sblog << "array [" << a->array_size << "] at " << a->base_gpr << "\n";
    269 			sblog << "\n";
    270 		);
    271 
    272 		// skip preallocated arrays (e.g. with preloaded inputs)
    273 		if (a->gpr) {
    274 			RA_DUMP( sblog << "   FIXED at " << a->gpr << "\n"; );
    275 			continue;
    276 		}
    277 
    278 		bool dead = a->is_dead();
    279 
    280 		if (dead) {
    281 			RA_DUMP( sblog << "   DEAD\n"; );
    282 			continue;
    283 		}
    284 
    285 		val_set &s = a->interferences;
    286 
    287 
    288 		for (val_set::iterator I = s.begin(sh), E = s.end(sh); I != E; ++I) {
    289 			value *v = *I;
    290 			if (v->array == a)
    291 				s.remove_val(v);
    292 		}
    293 
    294 		RA_DUMP(
    295 			sblog << "  interf: ";
    296 			dump::dump_set(sh, s);
    297 			sblog << "\n";
    298 		);
    299 
    300 		regbits rb(sh, s);
    301 
    302 		sel_chan base = rb.find_free_array(a->array_size,
    303 		                                   (1 << a->base_gpr.chan()));
    304 
    305 		RA_DUMP( sblog << "  found base: " << base << "\n"; );
    306 
    307 		a->gpr = base;
    308 	}
    309 }
    310 
    311 
    312 int ra_init::run() {
    313 
    314 	alloc_arrays();
    315 
    316 	ra_node(sh.root);
    317 	return 0;
    318 }
    319 
    320 void ra_init::ra_node(container_node* c) {
    321 
    322 	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
    323 		node *n = *I;
    324 		if (n->type == NT_OP) {
    325 			process_op(n);
    326 		}
    327 		if (n->is_container() && !n->is_alu_packed()) {
    328 			ra_node(static_cast<container_node*>(n));
    329 		}
    330 	}
    331 }
    332 
    333 void ra_init::process_op(node* n) {
    334 
    335 	bool copy = n->is_copy_mov();
    336 
    337 	RA_DUMP(
    338 		sblog << "ra_init: process_op : ";
    339 		dump::dump_op(n);
    340 		sblog << "\n";
    341 	);
    342 
    343 	if (n->is_alu_packed()) {
    344 		for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) {
    345 			value *v = *I;
    346 			if (v && v->is_sgpr() && v->constraint &&
    347 					v->constraint->kind == CK_PACKED_BS) {
    348 				color_bs_constraint(v->constraint);
    349 				break;
    350 			}
    351 		}
    352 	}
    353 
    354 	if (n->is_fetch_inst() || n->is_cf_inst()) {
    355 		for (vvec::iterator I = n->src.begin(), E = n->src.end(); I != E; ++I) {
    356 			value *v = *I;
    357 			if (v && v->is_sgpr())
    358 				color(v);
    359 		}
    360 	}
    361 
    362 	for (vvec::iterator I = n->dst.begin(), E = n->dst.end(); I != E; ++I) {
    363 		value *v = *I;
    364 		if (!v)
    365 			continue;
    366 		if (v->is_sgpr()) {
    367 			if (!v->gpr) {
    368 				if (copy && !v->constraint) {
    369 					value *s = *(n->src.begin() + (I - n->dst.begin()));
    370 					assert(s);
    371 					if (s->is_sgpr()) {
    372 						assign_color(v, s->gpr);
    373 					}
    374 				} else
    375 					color(v);
    376 			}
    377 		}
    378 	}
    379 }
    380 
    381 void ra_init::color_bs_constraint(ra_constraint* c) {
    382 	vvec &vv = c->values;
    383 	assert(vv.size() <= 8);
    384 
    385 	RA_DUMP(
    386 		sblog << "color_bs_constraint: ";
    387 		dump::dump_vec(vv);
    388 		sblog << "\n";
    389 	);
    390 
    391 	regbits rb(ctx.alu_temp_gprs);
    392 
    393 	unsigned chan_count[4] = {};
    394 	unsigned allowed_chans = 0x0F;
    395 
    396 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
    397 		value *v = *I;
    398 
    399 		if (!v || v->is_dead())
    400 			continue;
    401 
    402 		sel_chan gpr = v->get_final_gpr();
    403 
    404 		val_set interf;
    405 
    406 		if (v->chunk)
    407 			sh.coal.get_chunk_interferences(v->chunk, interf);
    408 		else
    409 			interf = v->interferences;
    410 
    411 		RA_DUMP(
    412 			sblog << "   processing " << *v << "  interferences : ";
    413 			dump::dump_set(sh, interf);
    414 			sblog << "\n";
    415 		);
    416 
    417 		if (gpr) {
    418 			unsigned chan = gpr.chan();
    419 			if (chan_count[chan] < 3) {
    420 				++chan_count[chan];
    421 				continue;
    422 			} else {
    423 				v->flags &= ~VLF_FIXED;
    424 				allowed_chans &= ~(1 << chan);
    425 				assert(allowed_chans);
    426 			}
    427 		}
    428 
    429 		v->gpr = 0;
    430 
    431 		gpr = 1;
    432 		rb.set_all(1);
    433 
    434 
    435 		rb.from_val_set(sh, interf);
    436 
    437 		RA_DUMP(
    438 			sblog << "   regbits : ";
    439 			rb.dump();
    440 			sblog << "\n";
    441 		);
    442 
    443 		while (allowed_chans && gpr.sel() < sh.num_nontemp_gpr()) {
    444 
    445 			while (rb.get(gpr - 1) == 0)
    446 				gpr = gpr + 1;
    447 
    448 			RA_DUMP(
    449 				sblog << "    trying " << gpr << "\n";
    450 			);
    451 
    452 			unsigned chan = gpr.chan();
    453 			if (chan_count[chan] < 3) {
    454 				++chan_count[chan];
    455 
    456 				if (v->chunk) {
    457 					vvec::iterator F = std::find(v->chunk->values.begin(),
    458 					                             v->chunk->values.end(),
    459 					                             v);
    460 					v->chunk->values.erase(F);
    461 					v->chunk = NULL;
    462 				}
    463 
    464 				assign_color(v, gpr);
    465 				break;
    466 			} else {
    467 				allowed_chans &= ~(1 << chan);
    468 			}
    469 			gpr = gpr + 1;
    470 		}
    471 
    472 		if (!gpr) {
    473 			sblog << "color_bs_constraint: failed...\n";
    474 			assert(!"coloring failed");
    475 		}
    476 	}
    477 }
    478 
    479 void ra_init::color(value* v) {
    480 
    481 	if (v->constraint && v->constraint->kind == CK_PACKED_BS) {
    482 		color_bs_constraint(v->constraint);
    483 		return;
    484 	}
    485 
    486 	if (v->chunk && v->chunk->is_fixed())
    487 		return;
    488 
    489 	RA_DUMP(
    490 		sblog << "coloring ";
    491 		dump::dump_val(v);
    492 		sblog << "   interferences ";
    493 		dump::dump_set(sh, v->interferences);
    494 		sblog << "\n";
    495 	);
    496 
    497 	if (v->is_reg_pinned()) {
    498 		assert(v->is_chan_pinned());
    499 		assign_color(v, v->pin_gpr);
    500 		return;
    501 	}
    502 
    503 	regbits rb(sh, v->interferences);
    504 	sel_chan c;
    505 
    506 	if (v->is_chan_pinned()) {
    507 		RA_DUMP( sblog << "chan_pinned = " << v->pin_gpr.chan() << "  ";	);
    508 		unsigned mask = 1 << v->pin_gpr.chan();
    509 		c = rb.find_free_chans(mask) + v->pin_gpr.chan();
    510 	} else {
    511 		unsigned cm = get_preferable_chan_mask();
    512 		RA_DUMP( sblog << "pref chan mask: " << cm << "\n"; );
    513 		c = rb.find_free_chan_by_mask(cm);
    514 	}
    515 
    516 	assert(c && c.sel() < 128 - ctx.alu_temp_gprs && "color failed");
    517 	assign_color(v, c);
    518 }
    519 
    520 void ra_init::assign_color(value* v, sel_chan c) {
    521 	add_prev_chan(c.chan());
    522 	v->gpr = c;
    523 	RA_DUMP(
    524 		sblog << "colored ";
    525 		dump::dump_val(v);
    526 		sblog << " to " << c << "\n";
    527 	);
    528 }
    529 
    530 // ===================================================
    531 
    532 int ra_split::run() {
    533 	split(sh.root);
    534 	return 0;
    535 }
    536 
    537 void ra_split::split_phi_src(container_node *loc, container_node *c,
    538                              unsigned id, bool loop) {
    539 	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
    540 		node *p = *I;
    541 		value* &v = p->src[id], *d = p->dst[0];
    542 		assert(v);
    543 
    544 		if (!d->is_sgpr() || v->is_undef())
    545 			continue;
    546 
    547 		value *t = sh.create_temp_value();
    548 		if (loop && id == 0)
    549 			loc->insert_before(sh.create_copy_mov(t, v));
    550 		else
    551 			loc->push_back(sh.create_copy_mov(t, v));
    552 		v = t;
    553 
    554 		sh.coal.add_edge(v, d, coalescer::phi_cost);
    555 	}
    556 }
    557 
    558 void ra_split::split_phi_dst(node* loc, container_node *c, bool loop) {
    559 	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
    560 		node *p = *I;
    561 		value* &v = p->dst[0];
    562 		assert(v);
    563 
    564 		if (!v->is_sgpr())
    565 			continue;
    566 
    567 		value *t = sh.create_temp_value();
    568 		node *cp = sh.create_copy_mov(v, t);
    569 		if (loop)
    570 			static_cast<container_node*>(loc)->push_front(cp);
    571 		else
    572 			loc->insert_after(cp);
    573 		v = t;
    574 	}
    575 }
    576 
    577 
    578 void ra_split::init_phi_constraints(container_node *c) {
    579 	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
    580 		node *p = *I;
    581 		ra_constraint *cc = sh.coal.create_constraint(CK_PHI);
    582 		cc->values.push_back(p->dst[0]);
    583 
    584 		for (vvec::iterator I = p->src.begin(), E = p->src.end(); I != E; ++I) {
    585 			value *v = *I;
    586 			if (v->is_sgpr())
    587 				cc->values.push_back(v);
    588 		}
    589 
    590 		cc->update_values();
    591 	}
    592 }
    593 
    594 void ra_split::split(container_node* n) {
    595 
    596 	if (n->type == NT_DEPART) {
    597 		depart_node *d = static_cast<depart_node*>(n);
    598 		if (d->target->phi)
    599 			split_phi_src(d, d->target->phi, d->dep_id, false);
    600 	} else if (n->type == NT_REPEAT) {
    601 		repeat_node *r = static_cast<repeat_node*>(n);
    602 		if (r->target->loop_phi)
    603 			split_phi_src(r, r->target->loop_phi, r->rep_id, true);
    604 	} else if (n->type == NT_REGION) {
    605 		region_node *r = static_cast<region_node*>(n);
    606 		if (r->phi) {
    607 			split_phi_dst(r, r->phi, false);
    608 		}
    609 		if (r->loop_phi) {
    610 			split_phi_dst(r->get_entry_code_location(), r->loop_phi,
    611 					true);
    612 			split_phi_src(r, r->loop_phi, 0, true);
    613 		}
    614 	}
    615 
    616 	for (node_riterator N, I = n->rbegin(), E = n->rend(); I != E; I = N) {
    617 		N = I;
    618 		++N;
    619 		node *o = *I;
    620 		if (o->type == NT_OP) {
    621 			split_op(o);
    622 		} else if (o->is_container()) {
    623 			split(static_cast<container_node*>(o));
    624 		}
    625 	}
    626 
    627 	if (n->type == NT_REGION) {
    628 		region_node *r = static_cast<region_node*>(n);
    629 		if (r->phi)
    630 			init_phi_constraints(r->phi);
    631 		if (r->loop_phi)
    632 			init_phi_constraints(r->loop_phi);
    633 	}
    634 }
    635 
    636 void ra_split::split_op(node* n) {
    637 	switch(n->subtype) {
    638 		case NST_ALU_PACKED_INST:
    639 			split_alu_packed(static_cast<alu_packed_node*>(n));
    640 			break;
    641 		case NST_FETCH_INST:
    642 		case NST_CF_INST:
    643 			split_vector_inst(n);
    644 		default:
    645 			break;
    646 	}
    647 }
    648 
    649 void ra_split::split_packed_ins(alu_packed_node *n) {
    650 	vvec vv = n->src;
    651 	vvec sv, dv;
    652 
    653 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
    654 
    655 		value *&v = *I;
    656 
    657 		if (v && v->is_any_gpr() && !v->is_undef()) {
    658 
    659 			vvec::iterator F = std::find(sv.begin(), sv.end(), v);
    660 			value *t;
    661 
    662 			if (F != sv.end()) {
    663 				t = *(dv.begin() + (F - sv.begin()));
    664 			} else {
    665 				t = sh.create_temp_value();
    666 				sv.push_back(v);
    667 				dv.push_back(t);
    668 			}
    669 			v = t;
    670 		}
    671 	}
    672 
    673 	unsigned cnt = sv.size();
    674 
    675 	if (cnt > 0) {
    676 		n->src = vv;
    677 		for (vvec::iterator SI = sv.begin(), DI = dv.begin(), SE = sv.end();
    678 				SI != SE; ++SI, ++DI) {
    679 			n->insert_before(sh.create_copy_mov(*DI, *SI));
    680 		}
    681 
    682 		ra_constraint *c = sh.coal.create_constraint(CK_PACKED_BS);
    683 		c->values = dv;
    684 		c->update_values();
    685 	}
    686 }
    687 
    688 // TODO handle other packed ops for cayman
    689 void ra_split::split_alu_packed(alu_packed_node* n) {
    690 	switch (n->op()) {
    691 		case ALU_OP2_DOT4:
    692 		case ALU_OP2_CUBE:
    693 			split_packed_ins(n);
    694 			break;
    695 		default:
    696 			break;
    697 	}
    698 }
    699 
    700 void ra_split::split_vec(vvec &vv, vvec &v1, vvec &v2, bool allow_swz) {
    701 	unsigned ch = 0;
    702 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I, ++ch) {
    703 
    704 		value* &o = *I;
    705 
    706 		if (o) {
    707 
    708 			assert(!o->is_dead());
    709 
    710 			if (o->is_undef() || o->is_geometry_emit())
    711 				continue;
    712 
    713 			if (allow_swz && o->is_float_0_or_1())
    714 				continue;
    715 
    716 			value *t;
    717 			vvec::iterator F =
    718 					allow_swz ? std::find(v2.begin(), v2.end(), o) : v2.end();
    719 
    720 			if (F != v2.end()) {
    721 				t = *(v1.begin() + (F - v2.begin()));
    722 			} else {
    723 				t = sh.create_temp_value();
    724 
    725 				if (!allow_swz) {
    726 					t->flags |= VLF_PIN_CHAN;
    727 					t->pin_gpr = sel_chan(0, ch);
    728 				}
    729 
    730 				v2.push_back(o);
    731 				v1.push_back(t);
    732 			}
    733 			o = t;
    734 		}
    735 	}
    736 }
    737 
    738 void ra_split::split_vector_inst(node* n) {
    739 	ra_constraint *c;
    740 
    741 	bool call_fs = n->is_cf_op(CF_OP_CALL_FS);
    742 	bool no_src_swizzle = n->is_cf_inst() && (n->cf_op_flags() & CF_MEM);
    743 
    744 	no_src_swizzle |= n->is_fetch_op(FETCH_OP_VFETCH) ||
    745 			n->is_fetch_op(FETCH_OP_SEMFETCH);
    746 
    747 	if (!n->src.empty() && !call_fs) {
    748 
    749 		// we may have more than one source vector -
    750 		// fetch instructions with FF_USEGRAD have gradient values in
    751 		// src vectors 1 (src[4-7] and 2 (src[8-11])
    752 
    753 		unsigned nvec = n->src.size() >> 2;
    754 		assert(nvec << 2 <= n->src.size());
    755 
    756 		for (unsigned nv = 0; nv < nvec; ++nv) {
    757 			vvec sv, tv, nsrc(4);
    758 			unsigned arg_start = nv << 2;
    759 
    760 			std::copy(n->src.begin() + arg_start,
    761 			          n->src.begin() + arg_start + 4,
    762 			          nsrc.begin());
    763 
    764 			split_vec(nsrc, tv, sv, !no_src_swizzle);
    765 
    766 			unsigned cnt = sv.size();
    767 
    768 			if (no_src_swizzle || cnt) {
    769 
    770 				std::copy(nsrc.begin(), nsrc.end(), n->src.begin() + arg_start);
    771 
    772 				for(unsigned i = 0, s = tv.size(); i < s; ++i) {
    773 					n->insert_before(sh.create_copy_mov(tv[i], sv[i]));
    774 				}
    775 
    776 				c = sh.coal.create_constraint(CK_SAME_REG);
    777 				c->values = tv;
    778 				c->update_values();
    779 			}
    780 		}
    781 	}
    782 
    783 	if (!n->dst.empty()) {
    784 		vvec sv, tv, ndst = n->dst;
    785 
    786 		split_vec(ndst, tv, sv, true);
    787 
    788 		if (sv.size()) {
    789 			n->dst = ndst;
    790 
    791 			node *lp = n;
    792 			for(unsigned i = 0, s = tv.size(); i < s; ++i) {
    793 				lp->insert_after(sh.create_copy_mov(sv[i], tv[i]));
    794 				lp = lp->next;
    795 			}
    796 
    797 			if (call_fs) {
    798 				for (unsigned i = 0, cnt = tv.size(); i < cnt; ++i) {
    799 					value *v = tv[i];
    800 					value *s = sv[i];
    801 					if (!v)
    802 						continue;
    803 
    804 					v->flags |= VLF_PIN_REG | VLF_PIN_CHAN;
    805 					s->flags &= ~(VLF_PIN_REG | VLF_PIN_CHAN);
    806 					sel_chan sel;
    807 
    808 					if (s->is_rel()) {
    809 						assert(s->rel->is_const());
    810 						sel = sel_chan(s->select.sel() +
    811 										 s->rel->get_const_value().u,
    812 						             s->select.chan());
    813 					} else
    814 						sel = s->select;
    815 
    816 					v->gpr = v->pin_gpr = sel;
    817 					v->fix();
    818 				}
    819 			} else {
    820 				c = sh.coal.create_constraint(CK_SAME_REG);
    821 				c->values = tv;
    822 				c->update_values();
    823 			}
    824 		}
    825 	}
    826 }
    827 
    828 void ra_init::add_prev_chan(unsigned chan) {
    829 	prev_chans = (prev_chans << 4) | (1 << chan);
    830 }
    831 
    832 unsigned ra_init::get_preferable_chan_mask() {
    833 	unsigned i, used_chans = 0;
    834 	unsigned chans = prev_chans;
    835 
    836 	for (i = 0; i < ra_tune; ++i) {
    837 		used_chans |= chans;
    838 		chans >>= 4;
    839 	}
    840 
    841 	return (~used_chans) & 0xF;
    842 }
    843 
    844 } // namespace r600_sb
    845