Home | History | Annotate | Download | only in sb
      1 /*
      2  * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *      Vadim Girlin
     25  */
     26 
     27 #define PSC_DEBUG 0
     28 
     29 #if PSC_DEBUG
     30 #define PSC_DUMP(a) do { a } while (0)
     31 #else
     32 #define PSC_DUMP(a)
     33 #endif
     34 
     35 #include "sb_bc.h"
     36 #include "sb_shader.h"
     37 #include "sb_pass.h"
     38 #include "sb_sched.h"
     39 #include "eg_sq.h" // V_SQ_CF_INDEX_NONE/0/1
     40 
     41 namespace r600_sb {
     42 
     43 rp_kcache_tracker::rp_kcache_tracker(shader &sh) : rp(), uc(),
     44 		// FIXME: for now we'll use "two const pairs" limit for r600, same as
     45 		// for other chips, otherwise additional check in alu_group_tracker is
     46 		// required to make sure that all 4 consts in the group fit into 2
     47 		// kcache sets
     48 		sel_count(2) {}
     49 
     50 bool rp_kcache_tracker::try_reserve(sel_chan r) {
     51 	unsigned sel = kc_sel(r);
     52 
     53 	for (unsigned i = 0; i < sel_count; ++i) {
     54 		if (rp[i] == 0) {
     55 			rp[i] = sel;
     56 			++uc[i];
     57 			return true;
     58 		}
     59 		if (rp[i] == sel) {
     60 			++uc[i];
     61 			return true;
     62 		}
     63 	}
     64 	return false;
     65 }
     66 
     67 bool rp_kcache_tracker::try_reserve(node* n) {
     68 	bool need_unreserve = false;
     69 	vvec::iterator I(n->src.begin()), E(n->src.end());
     70 
     71 	for (; I != E; ++I) {
     72 		value *v = *I;
     73 		if (v->is_kcache()) {
     74 			if (!try_reserve(v->select))
     75 				break;
     76 			else
     77 				need_unreserve = true;
     78 		}
     79 	}
     80 	if (I == E)
     81 		return true;
     82 
     83 	if (need_unreserve && I != n->src.begin()) {
     84 		do {
     85 			--I;
     86 			value *v =*I;
     87 			if (v->is_kcache())
     88 				unreserve(v->select);
     89 		} while (I != n->src.begin());
     90 	}
     91 	return false;
     92 }
     93 
     94 inline
     95 void rp_kcache_tracker::unreserve(node* n) {
     96 	vvec::iterator I(n->src.begin()), E(n->src.end());
     97 	for (; I != E; ++I) {
     98 		value *v = *I;
     99 		if (v->is_kcache())
    100 			unreserve(v->select);
    101 	}
    102 }
    103 
    104 void rp_kcache_tracker::unreserve(sel_chan r) {
    105 	unsigned sel = kc_sel(r);
    106 
    107 	for (unsigned i = 0; i < sel_count; ++i)
    108 		if (rp[i] == sel) {
    109 			if (--uc[i] == 0)
    110 				rp[i] = 0;
    111 			return;
    112 		}
    113 	assert(0);
    114 	return;
    115 }
    116 
    117 bool literal_tracker::try_reserve(alu_node* n) {
    118 	bool need_unreserve = false;
    119 
    120 	vvec::iterator I(n->src.begin()), E(n->src.end());
    121 
    122 	for (; I != E; ++I) {
    123 		value *v = *I;
    124 		if (v->is_literal()) {
    125 			if (!try_reserve(v->literal_value))
    126 				break;
    127 			else
    128 				need_unreserve = true;
    129 		}
    130 	}
    131 	if (I == E)
    132 		return true;
    133 
    134 	if (need_unreserve && I != n->src.begin()) {
    135 		do {
    136 			--I;
    137 			value *v =*I;
    138 			if (v->is_literal())
    139 				unreserve(v->literal_value);
    140 		} while (I != n->src.begin());
    141 	}
    142 	return false;
    143 }
    144 
    145 void literal_tracker::unreserve(alu_node* n) {
    146 	unsigned nsrc = n->bc.op_ptr->src_count, i;
    147 
    148 	for (i = 0; i < nsrc; ++i) {
    149 		value *v = n->src[i];
    150 		if (v->is_literal())
    151 			unreserve(v->literal_value);
    152 	}
    153 }
    154 
    155 bool literal_tracker::try_reserve(literal l) {
    156 
    157 	PSC_DUMP( sblog << "literal reserve " << l.u << "  " << l.f << "\n"; );
    158 
    159 	for (unsigned i = 0; i < MAX_ALU_LITERALS; ++i) {
    160 		if (lt[i] == 0) {
    161 			lt[i] = l;
    162 			++uc[i];
    163 			PSC_DUMP( sblog << "  reserved new uc = " << uc[i] << "\n"; );
    164 			return true;
    165 		} else if (lt[i] == l) {
    166 			++uc[i];
    167 			PSC_DUMP( sblog << "  reserved uc = " << uc[i] << "\n"; );
    168 			return true;
    169 		}
    170 	}
    171 	PSC_DUMP( sblog << "  failed to reserve literal\n"; );
    172 	return false;
    173 }
    174 
    175 void literal_tracker::unreserve(literal l) {
    176 
    177 	PSC_DUMP( sblog << "literal unreserve " << l.u << "  " << l.f << "\n"; );
    178 
    179 	for (unsigned i = 0; i < MAX_ALU_LITERALS; ++i) {
    180 		if (lt[i] == l) {
    181 			if (--uc[i] == 0)
    182 				lt[i] = 0;
    183 			return;
    184 		}
    185 	}
    186 	assert(0);
    187 	return;
    188 }
    189 
    190 static inline unsigned bs_cycle_vector(unsigned bs, unsigned src) {
    191 	static const unsigned swz[VEC_NUM][3] = {
    192 		{0, 1, 2}, {0, 2, 1}, {1, 2, 0}, {1, 0, 2}, {2, 0, 1}, {2, 1, 0}
    193 	};
    194 	assert(bs < VEC_NUM && src < 3);
    195 	return swz[bs][src];
    196 }
    197 
    198 static inline unsigned bs_cycle_scalar(unsigned bs, unsigned src) {
    199 	static const unsigned swz[SCL_NUM][3] = {
    200 		{2, 1, 0}, {1, 2, 2}, {2, 1, 2}, {2, 2, 1}
    201 	};
    202 
    203 	if (bs >= SCL_NUM || src >= 3) {
    204 		// this prevents gcc warning "array subscript is above array bounds"
    205 		// AFAICS we should never hit this path
    206 		abort();
    207 	}
    208 	return swz[bs][src];
    209 }
    210 
    211 static inline unsigned bs_cycle(bool trans, unsigned bs, unsigned src) {
    212 	return trans ? bs_cycle_scalar(bs, src) : bs_cycle_vector(bs, src);
    213 }
    214 
    215 inline
    216 bool rp_gpr_tracker::try_reserve(unsigned cycle, unsigned sel, unsigned chan) {
    217 	++sel;
    218 	if (rp[cycle][chan] == 0) {
    219 		rp[cycle][chan] = sel;
    220 		++uc[cycle][chan];
    221 		return true;
    222 	} else if (rp[cycle][chan] == sel) {
    223 		++uc[cycle][chan];
    224 		return true;
    225 	}
    226 	return false;
    227 }
    228 
    229 inline
    230 void rp_gpr_tracker::unreserve(alu_node* n) {
    231 	unsigned nsrc = n->bc.op_ptr->src_count, i;
    232 	unsigned trans = n->bc.slot == SLOT_TRANS;
    233 	unsigned bs = n->bc.bank_swizzle;
    234 	unsigned opt = !trans
    235 			&& n->bc.src[0].sel == n->bc.src[1].sel
    236 			&& n->bc.src[0].chan == n->bc.src[1].chan;
    237 
    238 	for (i = 0; i < nsrc; ++i) {
    239 		value *v = n->src[i];
    240 		if (v->is_readonly() || v->is_undef())
    241 			continue;
    242 		if (i == 1 && opt)
    243 			continue;
    244 		unsigned cycle = bs_cycle(trans, bs, i);
    245 		unreserve(cycle, n->bc.src[i].sel, n->bc.src[i].chan);
    246 	}
    247 }
    248 
    249 inline
    250 void rp_gpr_tracker::unreserve(unsigned cycle, unsigned sel, unsigned chan) {
    251 	++sel;
    252 	assert(rp[cycle][chan] == sel && uc[cycle][chan]);
    253 	if (--uc[cycle][chan] == 0)
    254 		rp[cycle][chan] = 0;
    255 }
    256 
    257 inline
    258 bool rp_gpr_tracker::try_reserve(alu_node* n) {
    259 	unsigned nsrc = n->bc.op_ptr->src_count, i;
    260 	unsigned trans = n->bc.slot == SLOT_TRANS;
    261 	unsigned bs = n->bc.bank_swizzle;
    262 	unsigned opt = !trans && nsrc >= 2 &&
    263 			n->src[0] == n->src[1];
    264 
    265 	bool need_unreserve = false;
    266 	unsigned const_count = 0, min_gpr_cycle = 3;
    267 
    268 	for (i = 0; i < nsrc; ++i) {
    269 		value *v = n->src[i];
    270 		if (v->is_readonly() || v->is_undef()) {
    271 			const_count++;
    272 			if (trans && const_count == 3)
    273 				break;
    274 		} else {
    275 			if (i == 1 && opt)
    276 				continue;
    277 
    278 			unsigned cycle = bs_cycle(trans, bs, i);
    279 
    280 			if (trans && cycle < min_gpr_cycle)
    281 				min_gpr_cycle = cycle;
    282 
    283 			if (const_count && cycle < const_count && trans)
    284 				break;
    285 
    286 			if (!try_reserve(cycle, n->bc.src[i].sel, n->bc.src[i].chan))
    287 				break;
    288 			else
    289 				need_unreserve = true;
    290 		}
    291 	}
    292 
    293 	if ((i == nsrc) && (min_gpr_cycle + 1 > const_count))
    294 		return true;
    295 
    296 	if (need_unreserve && i--) {
    297 		do {
    298 			value *v = n->src[i];
    299 			if (!v->is_readonly() && !v->is_undef()) {
    300 			if (i == 1 && opt)
    301 				continue;
    302 			unreserve(bs_cycle(trans, bs, i), n->bc.src[i].sel,
    303 			          n->bc.src[i].chan);
    304 			}
    305 		} while (i--);
    306 	}
    307 	return false;
    308 }
    309 
    310 alu_group_tracker::alu_group_tracker(shader &sh)
    311 	: sh(sh), kc(sh),
    312 	  gpr(), lt(), slots(),
    313 	  max_slots(sh.get_ctx().is_cayman() ? 4 : 5),
    314 	  has_mova(), uses_ar(), has_predset(), has_kill(),
    315 	  updates_exec_mask(), chan_count(), interp_param(), next_id() {
    316 
    317 	available_slots = sh.get_ctx().has_trans ? 0x1F : 0x0F;
    318 }
    319 
    320 inline
    321 sel_chan alu_group_tracker::get_value_id(value* v) {
    322 	unsigned &id = vmap[v];
    323 	if (!id)
    324 		id = ++next_id;
    325 	return sel_chan(id, v->get_final_chan());
    326 }
    327 
    328 inline
    329 void alu_group_tracker::assign_slot(unsigned slot, alu_node* n) {
    330 	update_flags(n);
    331 	slots[slot] = n;
    332 	available_slots &= ~(1 << slot);
    333 
    334 	unsigned param = n->interp_param();
    335 
    336 	if (param) {
    337 		assert(!interp_param || interp_param == param);
    338 		interp_param = param;
    339 	}
    340 }
    341 
    342 
    343 void alu_group_tracker::discard_all_slots(container_node &removed_nodes) {
    344 	PSC_DUMP( sblog << "agt::discard_all_slots\n"; );
    345 	discard_slots(~available_slots & ((1 << max_slots) - 1), removed_nodes);
    346 }
    347 
    348 void alu_group_tracker::discard_slots(unsigned slot_mask,
    349                                     container_node &removed_nodes) {
    350 
    351 	PSC_DUMP(
    352 		sblog << "discard_slots : packed_ops : "
    353 			<< (unsigned)packed_ops.size() << "\n";
    354 	);
    355 
    356 	for (node_vec::iterator N, I = packed_ops.begin();
    357 			I != packed_ops.end(); I = N) {
    358 		N = I; ++N;
    359 
    360 		alu_packed_node *n = static_cast<alu_packed_node*>(*I);
    361 		unsigned pslots = n->get_slot_mask();
    362 
    363 		PSC_DUMP(
    364 			sblog << "discard_slots : packed slot_mask : " << pslots << "\n";
    365 		);
    366 
    367 		if (pslots & slot_mask) {
    368 
    369 			PSC_DUMP(
    370 				sblog << "discard_slots : discarding packed...\n";
    371 			);
    372 
    373 			removed_nodes.push_back(n);
    374 			slot_mask &= ~pslots;
    375 			N = packed_ops.erase(I);
    376 			available_slots |= pslots;
    377 			for (unsigned k = 0; k < max_slots; ++k) {
    378 				if (pslots & (1 << k))
    379 					slots[k] = NULL;
    380 			}
    381 		}
    382 	}
    383 
    384 	for (unsigned slot = 0; slot < max_slots; ++slot) {
    385 		unsigned slot_bit = 1 << slot;
    386 
    387 		if (slot_mask & slot_bit) {
    388 			assert(!(available_slots & slot_bit));
    389 			assert(slots[slot]);
    390 
    391 			assert(!(slots[slot]->bc.slot_flags & AF_4SLOT));
    392 
    393 			PSC_DUMP(
    394 				sblog << "discarding slot " << slot << " : ";
    395 				dump::dump_op(slots[slot]);
    396 				sblog << "\n";
    397 			);
    398 
    399 			removed_nodes.push_back(slots[slot]);
    400 			slots[slot] = NULL;
    401 			available_slots |= slot_bit;
    402 		}
    403 	}
    404 
    405 	alu_node *t = slots[4];
    406 	if (t && (t->bc.slot_flags & AF_V)) {
    407 		unsigned chan = t->bc.dst_chan;
    408 		if (!slots[chan]) {
    409 			PSC_DUMP(
    410 				sblog << "moving ";
    411 				dump::dump_op(t);
    412 				sblog << " from trans slot to free slot " << chan << "\n";
    413 			);
    414 
    415 			slots[chan] = t;
    416 			slots[4] = NULL;
    417 			t->bc.slot = chan;
    418 		}
    419 	}
    420 
    421 	reinit();
    422 }
    423 
    424 alu_group_node* alu_group_tracker::emit() {
    425 
    426 	alu_group_node *g = sh.create_alu_group();
    427 
    428 	lt.init_group_literals(g);
    429 
    430 	for (unsigned i = 0; i < max_slots; ++i) {
    431 		alu_node *n = slots[i];
    432 		if (n) {
    433 			g->push_back(n);
    434 		}
    435 	}
    436 	return g;
    437 }
    438 
    439 bool alu_group_tracker::try_reserve(alu_node* n) {
    440 	unsigned nsrc = n->bc.op_ptr->src_count;
    441 	unsigned slot = n->bc.slot;
    442 	bool trans = slot == 4;
    443 
    444 	if (slots[slot])
    445 		return false;
    446 
    447 	unsigned flags = n->bc.op_ptr->flags;
    448 
    449 	unsigned param = n->interp_param();
    450 
    451 	if (param && interp_param && interp_param != param)
    452 		return false;
    453 
    454 	if ((flags & AF_KILL) && has_predset)
    455 		return false;
    456 	if ((flags & AF_ANY_PRED) && (has_kill || has_predset))
    457 		return false;
    458 	if ((flags & AF_MOVA) && (has_mova || uses_ar))
    459 		return false;
    460 
    461 	if (n->uses_ar() && has_mova)
    462 		return false;
    463 
    464 	for (unsigned i = 0; i < nsrc; ++i) {
    465 
    466 		unsigned last_id = next_id;
    467 
    468 		value *v = n->src[i];
    469 		if (!v->is_any_gpr() && !v->is_rel())
    470 			continue;
    471 		sel_chan vid = get_value_id(n->src[i]);
    472 
    473 		if (vid > last_id && chan_count[vid.chan()] == 3) {
    474 			return false;
    475 		}
    476 
    477 		n->bc.src[i].sel = vid.sel();
    478 		n->bc.src[i].chan = vid.chan();
    479 	}
    480 
    481 	if (!lt.try_reserve(n))
    482 		return false;
    483 
    484 	if (!kc.try_reserve(n)) {
    485 		lt.unreserve(n);
    486 		return false;
    487 	}
    488 
    489 	unsigned fbs = n->forced_bank_swizzle();
    490 
    491 	n->bc.bank_swizzle = 0;
    492 
    493 	if (!trans && fbs)
    494 		n->bc.bank_swizzle = VEC_210;
    495 
    496 	if (gpr.try_reserve(n)) {
    497 		assign_slot(slot, n);
    498 		return true;
    499 	}
    500 
    501 	if (!fbs) {
    502 		unsigned swz_num = trans ? SCL_NUM : VEC_NUM;
    503 		for (unsigned bs = 0; bs < swz_num; ++bs) {
    504 			n->bc.bank_swizzle = bs;
    505 			if (gpr.try_reserve(n)) {
    506 				assign_slot(slot, n);
    507 				return true;
    508 			}
    509 		}
    510 	}
    511 
    512 	gpr.reset();
    513 
    514 	slots[slot] = n;
    515 	unsigned forced_swz_slots = 0;
    516 	int first_slot = ~0, first_nf = ~0, last_slot = ~0;
    517 	unsigned save_bs[5];
    518 
    519 	for (unsigned i = 0; i < max_slots; ++i) {
    520 		alu_node *a = slots[i];
    521 		if (a) {
    522 			if (first_slot == ~0)
    523 				first_slot = i;
    524 			last_slot = i;
    525 			save_bs[i] = a->bc.bank_swizzle;
    526 			if (a->forced_bank_swizzle()) {
    527 				assert(i != SLOT_TRANS);
    528 				forced_swz_slots |= (1 << i);
    529 				a->bc.bank_swizzle = VEC_210;
    530 				if (!gpr.try_reserve(a))
    531 					assert(!"internal reservation error");
    532 			} else {
    533 				if (first_nf == ~0)
    534 					first_nf = i;
    535 
    536 				a->bc.bank_swizzle = 0;
    537 			}
    538 		}
    539 	}
    540 
    541 	if (first_nf == ~0) {
    542 		assign_slot(slot, n);
    543 		return true;
    544 	}
    545 
    546 	assert(first_slot != ~0 && last_slot != ~0);
    547 
    548 	// silence "array subscript is above array bounds" with gcc 4.8
    549 	if (last_slot >= 5)
    550 		abort();
    551 
    552 	int i = first_nf;
    553 	alu_node *a = slots[i];
    554 	bool backtrack = false;
    555 
    556 	while (1) {
    557 
    558 		PSC_DUMP(
    559 			sblog << " bs: trying s" << i << " bs:" << a->bc.bank_swizzle
    560 				<< " bt:" << backtrack << "\n";
    561 		);
    562 
    563 		if (!backtrack && gpr.try_reserve(a)) {
    564 			PSC_DUMP(
    565 				sblog << " bs: reserved s" << i << " bs:" << a->bc.bank_swizzle
    566 					<< "\n";
    567 			);
    568 
    569 			while ((++i <= last_slot) && !slots[i]);
    570 			if (i <= last_slot)
    571 				a = slots[i];
    572 			else
    573 				break;
    574 		} else {
    575 			bool itrans = i == SLOT_TRANS;
    576 			unsigned max_swz = itrans ? SCL_221 : VEC_210;
    577 
    578 			if (a->bc.bank_swizzle < max_swz) {
    579 				++a->bc.bank_swizzle;
    580 
    581 				PSC_DUMP(
    582 					sblog << " bs: inc s" << i << " bs:" << a->bc.bank_swizzle
    583 						<< "\n";
    584 				);
    585 
    586 			} else {
    587 
    588 				a->bc.bank_swizzle = 0;
    589 				while ((--i >= first_nf) && !slots[i]);
    590 				if (i < first_nf)
    591 					break;
    592 				a = slots[i];
    593 				PSC_DUMP(
    594 					sblog << " bs: unreserve s" << i << " bs:" << a->bc.bank_swizzle
    595 						<< "\n";
    596 				);
    597 				gpr.unreserve(a);
    598 				backtrack = true;
    599 
    600 				continue;
    601 			}
    602 		}
    603 		backtrack = false;
    604 	}
    605 
    606 	if (i == last_slot + 1) {
    607 		assign_slot(slot, n);
    608 		return true;
    609 	}
    610 
    611 	// reservation failed, restore previous state
    612 	slots[slot] = NULL;
    613 	gpr.reset();
    614 	for (unsigned i = 0; i < max_slots; ++i) {
    615 		alu_node *a = slots[i];
    616 		if (a) {
    617 			a->bc.bank_swizzle = save_bs[i];
    618 			bool b = gpr.try_reserve(a);
    619 			assert(b);
    620 		}
    621 	}
    622 
    623 	kc.unreserve(n);
    624 	lt.unreserve(n);
    625 	return false;
    626 }
    627 
    628 bool alu_group_tracker::try_reserve(alu_packed_node* p) {
    629 	bool need_unreserve = false;
    630 	node_iterator I(p->begin()), E(p->end());
    631 
    632 	for (; I != E; ++I) {
    633 		alu_node *n = static_cast<alu_node*>(*I);
    634 		if (!try_reserve(n))
    635 			break;
    636 		else
    637 			need_unreserve = true;
    638 	}
    639 
    640 	if (I == E)  {
    641 		packed_ops.push_back(p);
    642 		return true;
    643 	}
    644 
    645 	if (need_unreserve) {
    646 		while (--I != E) {
    647 			alu_node *n = static_cast<alu_node*>(*I);
    648 			slots[n->bc.slot] = NULL;
    649 		}
    650 		reinit();
    651 	}
    652 	return false;
    653 }
    654 
    655 void alu_group_tracker::reinit() {
    656 	alu_node * s[5];
    657 	memcpy(s, slots, sizeof(slots));
    658 
    659 	reset(true);
    660 
    661 	for (int i = max_slots - 1; i >= 0; --i) {
    662 		if (s[i] && !try_reserve(s[i])) {
    663 			sblog << "alu_group_tracker: reinit error on slot " << i <<  "\n";
    664 			for (unsigned i = 0; i < max_slots; ++i) {
    665 				sblog << "  slot " << i << " : ";
    666 				if (s[i])
    667 					dump::dump_op(s[i]);
    668 
    669 				sblog << "\n";
    670 			}
    671 			assert(!"alu_group_tracker: reinit error");
    672 		}
    673 	}
    674 }
    675 
    676 void alu_group_tracker::reset(bool keep_packed) {
    677 	kc.reset();
    678 	gpr.reset();
    679 	lt.reset();
    680 	memset(slots, 0, sizeof(slots));
    681 	vmap.clear();
    682 	next_id = 0;
    683 	has_mova = false;
    684 	uses_ar = false;
    685 	has_predset = false;
    686 	has_kill = false;
    687 	updates_exec_mask = false;
    688 	available_slots = sh.get_ctx().has_trans ? 0x1F : 0x0F;
    689 	interp_param = 0;
    690 
    691 	chan_count[0] = 0;
    692 	chan_count[1] = 0;
    693 	chan_count[2] = 0;
    694 	chan_count[3] = 0;
    695 
    696 	if (!keep_packed)
    697 		packed_ops.clear();
    698 }
    699 
    700 void alu_group_tracker::update_flags(alu_node* n) {
    701 	unsigned flags = n->bc.op_ptr->flags;
    702 	has_kill |= (flags & AF_KILL);
    703 	has_mova |= (flags & AF_MOVA);
    704 	has_predset |= (flags & AF_ANY_PRED);
    705 	uses_ar |= n->uses_ar();
    706 
    707 	if (flags & AF_ANY_PRED) {
    708 		if (n->dst[2] != NULL)
    709 			updates_exec_mask = true;
    710 	}
    711 }
    712 
    713 int post_scheduler::run() {
    714 	run_on(sh.root);
    715 	return 0;
    716 }
    717 
    718 void post_scheduler::run_on(container_node* n) {
    719 
    720 	for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) {
    721 		if (I->is_container()) {
    722 			if (I->subtype == NST_BB) {
    723 				bb_node* bb = static_cast<bb_node*>(*I);
    724 				schedule_bb(bb);
    725 			} else {
    726 				run_on(static_cast<container_node*>(*I));
    727 			}
    728 		}
    729 	}
    730 }
    731 
    732 void post_scheduler::init_uc_val(container_node *c, value *v) {
    733 	node *d = v->any_def();
    734 	if (d && d->parent == c)
    735 		++ucm[d];
    736 }
    737 
    738 void post_scheduler::init_uc_vec(container_node *c, vvec &vv, bool src) {
    739 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
    740 		value *v = *I;
    741 		if (!v || v->is_readonly())
    742 			continue;
    743 
    744 		if (v->is_rel()) {
    745 			init_uc_val(c, v->rel);
    746 			init_uc_vec(c, v->muse, true);
    747 		} if (src) {
    748 			init_uc_val(c, v);
    749 		}
    750 	}
    751 }
    752 
    753 unsigned post_scheduler::init_ucm(container_node *c, node *n) {
    754 	init_uc_vec(c, n->src, true);
    755 	init_uc_vec(c, n->dst, false);
    756 
    757 	uc_map::iterator F = ucm.find(n);
    758 	return F == ucm.end() ? 0 : F->second;
    759 }
    760 
    761 void post_scheduler::schedule_bb(bb_node* bb) {
    762 	PSC_DUMP(
    763 		sblog << "scheduling BB " << bb->id << "\n";
    764 		if (!pending.empty())
    765 			dump::dump_op_list(&pending);
    766 	);
    767 
    768 	assert(pending.empty());
    769 	assert(bb_pending.empty());
    770 	assert(ready.empty());
    771 
    772 	bb_pending.append_from(bb);
    773 	cur_bb = bb;
    774 
    775 	node *n;
    776 
    777 	while ((n = bb_pending.back())) {
    778 
    779 		PSC_DUMP(
    780 			sblog << "post_sched_bb ";
    781 			dump::dump_op(n);
    782 			sblog << "\n";
    783 		);
    784 
    785 		// May require emitting ALU ops to load index registers
    786 		if (n->is_fetch_clause()) {
    787 			n->remove();
    788 			process_fetch(static_cast<container_node *>(n));
    789 			continue;
    790 		}
    791 
    792 		if (n->is_alu_clause()) {
    793 			n->remove();
    794 			process_alu(static_cast<container_node*>(n));
    795 			continue;
    796 		}
    797 
    798 		n->remove();
    799 		bb->push_front(n);
    800 	}
    801 
    802 	this->cur_bb = NULL;
    803 }
    804 
    805 void post_scheduler::init_regmap() {
    806 
    807 	regmap.clear();
    808 
    809 	PSC_DUMP(
    810 		sblog << "init_regmap: live: ";
    811 		dump::dump_set(sh, live);
    812 		sblog << "\n";
    813 	);
    814 
    815 	for (val_set::iterator I = live.begin(sh), E = live.end(sh); I != E; ++I) {
    816 		value *v = *I;
    817 		assert(v);
    818 		if (!v->is_sgpr() || !v->is_prealloc())
    819 			continue;
    820 
    821 		sel_chan r = v->gpr;
    822 
    823 		PSC_DUMP(
    824 			sblog << "init_regmap:  " << r << " <= ";
    825 			dump::dump_val(v);
    826 			sblog << "\n";
    827 		);
    828 
    829 		assert(r);
    830 		regmap[r] = v;
    831 	}
    832 }
    833 
    834 static alu_node *create_set_idx(shader &sh, unsigned ar_idx) {
    835 	alu_node *a = sh.create_alu();
    836 
    837 	assert(ar_idx == V_SQ_CF_INDEX_0 || ar_idx == V_SQ_CF_INDEX_1);
    838 	if (ar_idx == V_SQ_CF_INDEX_0)
    839 		a->bc.set_op(ALU_OP0_SET_CF_IDX0);
    840 	else
    841 		a->bc.set_op(ALU_OP0_SET_CF_IDX1);
    842 	a->bc.slot = SLOT_X;
    843 	a->dst.resize(1); // Dummy needed for recolor
    844 
    845 	PSC_DUMP(
    846 		sblog << "created IDX load: ";
    847 		dump::dump_op(a);
    848 		sblog << "\n";
    849 	);
    850 
    851 	return a;
    852 }
    853 
    854 void post_scheduler::load_index_register(value *v, unsigned ar_idx)
    855 {
    856 	alu.reset();
    857 
    858 	if (!sh.get_ctx().is_cayman()) {
    859 		// Evergreen has to first load address register, then use CF_SET_IDX0/1
    860 		alu_group_tracker &rt = alu.grp();
    861 		alu_node *set_idx = create_set_idx(sh, ar_idx);
    862 		if (!rt.try_reserve(set_idx)) {
    863 			sblog << "can't emit SET_CF_IDX";
    864 			dump::dump_op(set_idx);
    865 			sblog << "\n";
    866 		}
    867 		process_group();
    868 
    869 		if (!alu.check_clause_limits()) {
    870 			// Can't happen since clause only contains MOVA/CF_SET_IDX0/1
    871 		}
    872 		alu.emit_group();
    873 	}
    874 
    875 	alu_group_tracker &rt = alu.grp();
    876 	alu_node *a = alu.create_ar_load(v, ar_idx == V_SQ_CF_INDEX_1 ? SEL_Z : SEL_Y);
    877 
    878 	if (!rt.try_reserve(a)) {
    879 		sblog << "can't emit AR load : ";
    880 		dump::dump_op(a);
    881 		sblog << "\n";
    882 	}
    883 
    884 	process_group();
    885 
    886 	if (!alu.check_clause_limits()) {
    887 		// Can't happen since clause only contains MOVA/CF_SET_IDX0/1
    888 	}
    889 
    890 	alu.emit_group();
    891 	alu.emit_clause(cur_bb);
    892 }
    893 
    894 void post_scheduler::process_fetch(container_node *c) {
    895 	if (c->empty())
    896 		return;
    897 
    898 	for (node_iterator N, I = c->begin(), E = c->end(); I != E; I = N) {
    899 		N = I;
    900 		++N;
    901 
    902 		node *n = *I;
    903 
    904 		fetch_node *f = static_cast<fetch_node*>(n);
    905 
    906 		PSC_DUMP(
    907 			sblog << "process_tex ";
    908 			dump::dump_op(n);
    909 			sblog << "  ";
    910 		);
    911 
    912 		// TODO: If same values used can avoid reloading index register
    913 		if (f->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE ||
    914 			f->bc.resource_index_mode != V_SQ_CF_INDEX_NONE) {
    915 			unsigned index_mode = f->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE ?
    916 				f->bc.sampler_index_mode : f->bc.resource_index_mode;
    917 
    918 			// Currently require prior opt passes to use one TEX per indexed op
    919 			assert(f->parent->count() == 1);
    920 
    921 			value *v = f->src.back(); // Last src is index offset
    922 			assert(v);
    923 
    924 			cur_bb->push_front(c);
    925 
    926 			load_index_register(v, index_mode);
    927 			f->src.pop_back(); // Don't need index value any more
    928 
    929 			return;
    930 		}
    931 	}
    932 
    933 	cur_bb->push_front(c);
    934 }
    935 
    936 void post_scheduler::process_alu(container_node *c) {
    937 
    938 	if (c->empty())
    939 		return;
    940 
    941 	ucm.clear();
    942 	alu.reset();
    943 
    944 	live = c->live_after;
    945 
    946 	init_globals(c->live_after, true);
    947 	init_globals(c->live_before, true);
    948 
    949 	init_regmap();
    950 
    951 	update_local_interferences();
    952 
    953 	for (node_riterator N, I = c->rbegin(), E = c->rend(); I != E; I = N) {
    954 		N = I;
    955 		++N;
    956 
    957 		node *n = *I;
    958 		unsigned uc = init_ucm(c, n);
    959 
    960 		PSC_DUMP(
    961 			sblog << "process_alu uc=" << uc << "  ";
    962 			dump::dump_op(n);
    963 			sblog << "  ";
    964 		);
    965 
    966 		if (uc) {
    967 			n->remove();
    968 
    969 			pending.push_back(n);
    970 			PSC_DUMP( sblog << "pending\n"; );
    971 		} else {
    972 			release_op(n);
    973 		}
    974 	}
    975 
    976 	schedule_alu(c);
    977 }
    978 
    979 void post_scheduler::update_local_interferences() {
    980 
    981 	PSC_DUMP(
    982 		sblog << "update_local_interferences : ";
    983 		dump::dump_set(sh, live);
    984 		sblog << "\n";
    985 	);
    986 
    987 
    988 	for (val_set::iterator I = live.begin(sh), E = live.end(sh); I != E; ++I) {
    989 		value *v = *I;
    990 		if (v->is_prealloc())
    991 			continue;
    992 
    993 		v->interferences.add_set(live);
    994 	}
    995 }
    996 
    997 void post_scheduler::update_live_src_vec(vvec &vv, val_set *born, bool src) {
    998 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
    999 		value *v = *I;
   1000 
   1001 		if (!v)
   1002 			continue;
   1003 
   1004 		if (src && v->is_any_gpr()) {
   1005 			if (live.add_val(v)) {
   1006 				if (!v->is_prealloc()) {
   1007 					if (!cleared_interf.contains(v)) {
   1008 						PSC_DUMP(
   1009 							sblog << "clearing interferences for " << *v << "\n";
   1010 						);
   1011 						v->interferences.clear();
   1012 						cleared_interf.add_val(v);
   1013 					}
   1014 				}
   1015 				if (born)
   1016 					born->add_val(v);
   1017 			}
   1018 		} else if (v->is_rel()) {
   1019 			if (!v->rel->is_any_gpr())
   1020 				live.add_val(v->rel);
   1021 			update_live_src_vec(v->muse, born, true);
   1022 		}
   1023 	}
   1024 }
   1025 
   1026 void post_scheduler::update_live_dst_vec(vvec &vv) {
   1027 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
   1028 		value *v = *I;
   1029 		if (!v)
   1030 			continue;
   1031 
   1032 		if (v->is_rel()) {
   1033 			update_live_dst_vec(v->mdef);
   1034 		} else if (v->is_any_gpr()) {
   1035 			if (!live.remove_val(v)) {
   1036 				PSC_DUMP(
   1037 						sblog << "failed to remove ";
   1038 				dump::dump_val(v);
   1039 				sblog << " from live : ";
   1040 				dump::dump_set(sh, live);
   1041 				sblog << "\n";
   1042 				);
   1043 			}
   1044 		}
   1045 	}
   1046 }
   1047 
   1048 void post_scheduler::update_live(node *n, val_set *born) {
   1049 	update_live_dst_vec(n->dst);
   1050 	update_live_src_vec(n->src, born, true);
   1051 	update_live_src_vec(n->dst, born, false);
   1052 }
   1053 
   1054 void post_scheduler::process_group() {
   1055 	alu_group_tracker &rt = alu.grp();
   1056 
   1057 	val_set vals_born;
   1058 
   1059 	recolor_locals();
   1060 
   1061 	PSC_DUMP(
   1062 		sblog << "process_group: live_before : ";
   1063 		dump::dump_set(sh, live);
   1064 		sblog << "\n";
   1065 	);
   1066 
   1067 	for (unsigned s = 0; s < ctx.num_slots; ++s) {
   1068 		alu_node *n = rt.slot(s);
   1069 		if (!n)
   1070 			continue;
   1071 
   1072 		update_live(n, &vals_born);
   1073 	}
   1074 
   1075 	PSC_DUMP(
   1076 		sblog << "process_group: live_after : ";
   1077 		dump::dump_set(sh, live);
   1078 		sblog << "\n";
   1079 	);
   1080 
   1081 	update_local_interferences();
   1082 
   1083 	for (unsigned i = 0; i < 5; ++i) {
   1084 		node *n = rt.slot(i);
   1085 		if (n && !n->is_mova()) {
   1086 			release_src_values(n);
   1087 		}
   1088 	}
   1089 }
   1090 
   1091 void post_scheduler::init_globals(val_set &s, bool prealloc) {
   1092 
   1093 	PSC_DUMP(
   1094 		sblog << "init_globals: ";
   1095 		dump::dump_set(sh, s);
   1096 		sblog << "\n";
   1097 	);
   1098 
   1099 	for (val_set::iterator I = s.begin(sh), E = s.end(sh); I != E; ++I) {
   1100 		value *v = *I;
   1101 		if (v->is_sgpr() && !v->is_global()) {
   1102 			v->set_global();
   1103 
   1104 			if (prealloc && v->is_fixed()) {
   1105 				v->set_prealloc();
   1106 			}
   1107 		}
   1108 	}
   1109 }
   1110 
   1111 void post_scheduler::emit_index_registers() {
   1112 	for (unsigned i = 0; i < 2; i++) {
   1113 		if (alu.current_idx[i]) {
   1114 			regmap = prev_regmap;
   1115 			alu.discard_current_group();
   1116 
   1117 			load_index_register(alu.current_idx[i], KC_INDEX_0 + i);
   1118 			alu.current_idx[i] = NULL;
   1119 		}
   1120 	}
   1121 }
   1122 
   1123 void post_scheduler::emit_clause() {
   1124 
   1125 	if (alu.current_ar) {
   1126 		emit_load_ar();
   1127 		process_group();
   1128 		alu.emit_group();
   1129 	}
   1130 
   1131 	if (!alu.is_empty()) {
   1132 		alu.emit_clause(cur_bb);
   1133 	}
   1134 
   1135 	emit_index_registers();
   1136 }
   1137 
   1138 void post_scheduler::schedule_alu(container_node *c) {
   1139 
   1140 	assert(!ready.empty() || !ready_copies.empty());
   1141 
   1142 	while (1) {
   1143 
   1144 		prev_regmap = regmap;
   1145 
   1146 		if (!prepare_alu_group()) {
   1147 			if (alu.current_idx[0] || alu.current_idx[1]) {
   1148 				regmap = prev_regmap;
   1149 				emit_clause();
   1150 				init_globals(live, false);
   1151 
   1152 				continue;
   1153 			}
   1154 
   1155 			if (alu.current_ar) {
   1156 				emit_load_ar();
   1157 				continue;
   1158 			} else
   1159 				break;
   1160 		}
   1161 
   1162 		if (!alu.check_clause_limits()) {
   1163 			regmap = prev_regmap;
   1164 			emit_clause();
   1165 			init_globals(live, false);
   1166 
   1167 			continue;
   1168 		}
   1169 
   1170 		process_group();
   1171 		alu.emit_group();
   1172 	};
   1173 
   1174 	if (!alu.is_empty()) {
   1175 		emit_clause();
   1176 	}
   1177 
   1178 	if (!ready.empty()) {
   1179 		sblog << "##post_scheduler: unscheduled ready instructions :";
   1180 		dump::dump_op_list(&ready);
   1181 		assert(!"unscheduled ready instructions");
   1182 	}
   1183 
   1184 	if (!pending.empty()) {
   1185 		sblog << "##post_scheduler: unscheduled pending instructions :";
   1186 		dump::dump_op_list(&pending);
   1187 		assert(!"unscheduled pending instructions");
   1188 	}
   1189 }
   1190 
   1191 void post_scheduler::add_interferences(value *v, sb_bitset &rb, val_set &vs) {
   1192 	unsigned chan = v->gpr.chan();
   1193 
   1194 	for (val_set::iterator I = vs.begin(sh), E = vs.end(sh);
   1195 			I != E; ++I) {
   1196 		value *vi = *I;
   1197 		sel_chan gpr = vi->get_final_gpr();
   1198 
   1199 		if (vi->is_any_gpr() && gpr && vi != v &&
   1200 				(!v->chunk || v->chunk != vi->chunk) &&
   1201 				vi->is_fixed() && gpr.chan() == chan) {
   1202 
   1203 			unsigned r = gpr.sel();
   1204 
   1205 			PSC_DUMP(
   1206 				sblog << "\tadd_interferences: " << *vi << "\n";
   1207 			);
   1208 
   1209 			if (rb.size() <= r)
   1210 				rb.resize(r + 32);
   1211 			rb.set(r);
   1212 		}
   1213 	}
   1214 }
   1215 
   1216 void post_scheduler::set_color_local_val(value *v, sel_chan color) {
   1217 	v->gpr = color;
   1218 
   1219 	PSC_DUMP(
   1220 		sblog << "     recolored: ";
   1221 		dump::dump_val(v);
   1222 		sblog << "\n";
   1223 	);
   1224 }
   1225 
   1226 void post_scheduler::set_color_local(value *v, sel_chan color) {
   1227 	if (v->chunk) {
   1228 		vvec &vv = v->chunk->values;
   1229 		for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
   1230 			value *v2 =*I;
   1231 			set_color_local_val(v2, color);
   1232 		}
   1233 		v->chunk->fix();
   1234 	} else {
   1235 		set_color_local_val(v, color);
   1236 		v->fix();
   1237 	}
   1238 }
   1239 
   1240 bool post_scheduler::recolor_local(value *v) {
   1241 
   1242 	sb_bitset rb;
   1243 
   1244 	assert(v->is_sgpr());
   1245 	assert(!v->is_prealloc());
   1246 	assert(v->gpr);
   1247 
   1248 	unsigned chan = v->gpr.chan();
   1249 
   1250 	PSC_DUMP(
   1251 		sblog << "recolor_local: ";
   1252 		dump::dump_val(v);
   1253 		sblog << "   interferences: ";
   1254 		dump::dump_set(sh, v->interferences);
   1255 		sblog << "\n";
   1256 		if (v->chunk) {
   1257 			sblog << "     in chunk: ";
   1258 			coalescer::dump_chunk(v->chunk);
   1259 			sblog << "\n";
   1260 		}
   1261 	);
   1262 
   1263 	if (v->chunk) {
   1264 		for (vvec::iterator I = v->chunk->values.begin(),
   1265 				E = v->chunk->values.end(); I != E; ++I) {
   1266 			value *v2 = *I;
   1267 
   1268 			PSC_DUMP( sblog << "   add_interferences for " << *v2 << " :\n"; );
   1269 
   1270 			add_interferences(v, rb, v2->interferences);
   1271 		}
   1272 	} else {
   1273 		add_interferences(v, rb, v->interferences);
   1274 	}
   1275 
   1276 	PSC_DUMP(
   1277 		unsigned sz = rb.size();
   1278 		sblog << "registers bits: " << sz;
   1279 		for (unsigned r = 0; r < sz; ++r) {
   1280 			if ((r & 7) == 0)
   1281 				sblog << "\n  " << r << "   ";
   1282 			sblog << (rb.get(r) ? 1 : 0);
   1283 		}
   1284 	);
   1285 
   1286 	bool no_temp_gprs = v->is_global();
   1287 	unsigned rs, re, pass = no_temp_gprs ? 1 : 0;
   1288 
   1289 	while (pass < 2) {
   1290 
   1291 		if (pass == 0) {
   1292 			rs = sh.first_temp_gpr();
   1293 			re = MAX_GPR;
   1294 		} else {
   1295 			rs = 0;
   1296 			re = sh.num_nontemp_gpr();
   1297 		}
   1298 
   1299 		for (unsigned reg = rs; reg < re; ++reg) {
   1300 			if (reg >= rb.size() || !rb.get(reg)) {
   1301 				// color found
   1302 				set_color_local(v, sel_chan(reg, chan));
   1303 				return true;
   1304 			}
   1305 		}
   1306 		++pass;
   1307 	}
   1308 
   1309 	assert(!"recolor_local failed");
   1310 	return true;
   1311 }
   1312 
   1313 void post_scheduler::emit_load_ar() {
   1314 
   1315 	regmap = prev_regmap;
   1316 	alu.discard_current_group();
   1317 
   1318 	alu_group_tracker &rt = alu.grp();
   1319 	alu_node *a = alu.create_ar_load(alu.current_ar, SEL_X);
   1320 
   1321 	if (!rt.try_reserve(a)) {
   1322 		sblog << "can't emit AR load : ";
   1323 		dump::dump_op(a);
   1324 		sblog << "\n";
   1325 	}
   1326 
   1327 	alu.current_ar = 0;
   1328 }
   1329 
   1330 bool post_scheduler::unmap_dst_val(value *d) {
   1331 
   1332 	if (d == alu.current_ar) {
   1333 		emit_load_ar();
   1334 		return false;
   1335 	}
   1336 
   1337 	if (d->is_prealloc()) {
   1338 		sel_chan gpr = d->get_final_gpr();
   1339 		rv_map::iterator F = regmap.find(gpr);
   1340 		value *c = NULL;
   1341 		if (F != regmap.end())
   1342 			c = F->second;
   1343 
   1344 		if (c && c!=d && (!c->chunk || c->chunk != d->chunk)) {
   1345 			PSC_DUMP(
   1346 				sblog << "dst value conflict : ";
   1347 				dump::dump_val(d);
   1348 				sblog << "   regmap contains ";
   1349 				dump::dump_val(c);
   1350 				sblog << "\n";
   1351 			);
   1352 			assert(!"scheduler error");
   1353 			return false;
   1354 		} else if (c) {
   1355 			regmap.erase(F);
   1356 		}
   1357 	}
   1358 	return true;
   1359 }
   1360 
   1361 bool post_scheduler::unmap_dst(alu_node *n) {
   1362 	value *d = n->dst.empty() ? NULL : n->dst[0];
   1363 
   1364 	if (!d)
   1365 		return true;
   1366 
   1367 	if (!d->is_rel()) {
   1368 		if (d && d->is_any_reg()) {
   1369 
   1370 			if (d->is_AR()) {
   1371 				if (alu.current_ar != d) {
   1372 					sblog << "loading wrong ar value\n";
   1373 					assert(0);
   1374 				} else {
   1375 					alu.current_ar = NULL;
   1376 				}
   1377 
   1378 			} else if (d->is_any_gpr()) {
   1379 				if (!unmap_dst_val(d))
   1380 					return false;
   1381 			}
   1382 		}
   1383 	} else {
   1384 		for (vvec::iterator I = d->mdef.begin(), E = d->mdef.end();
   1385 				I != E; ++I) {
   1386 			d = *I;
   1387 			if (!d)
   1388 				continue;
   1389 
   1390 			assert(d->is_any_gpr());
   1391 
   1392 			if (!unmap_dst_val(d))
   1393 				return false;
   1394 		}
   1395 	}
   1396 	return true;
   1397 }
   1398 
   1399 bool post_scheduler::map_src_val(value *v) {
   1400 
   1401 	if (!v->is_prealloc())
   1402 		return true;
   1403 
   1404 	sel_chan gpr = v->get_final_gpr();
   1405 	rv_map::iterator F = regmap.find(gpr);
   1406 	value *c = NULL;
   1407 	if (F != regmap.end()) {
   1408 		c = F->second;
   1409 		if (!v->v_equal(c)) {
   1410 			PSC_DUMP(
   1411 				sblog << "can't map src value ";
   1412 				dump::dump_val(v);
   1413 				sblog << ", regmap contains ";
   1414 				dump::dump_val(c);
   1415 				sblog << "\n";
   1416 			);
   1417 			return false;
   1418 		}
   1419 	} else {
   1420 		regmap.insert(std::make_pair(gpr, v));
   1421 	}
   1422 	return true;
   1423 }
   1424 
   1425 bool post_scheduler::map_src_vec(vvec &vv, bool src) {
   1426 	if (src) {
   1427 		// Handle possible UBO indexing
   1428 		bool ubo_indexing[2] = { false, false };
   1429 		for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
   1430 			value *v = *I;
   1431 			if (!v)
   1432 				continue;
   1433 
   1434 			if (v->is_kcache()) {
   1435 				unsigned index_mode = v->select.kcache_index_mode();
   1436 				if (index_mode == KC_INDEX_0 || index_mode == KC_INDEX_1) {
   1437 					ubo_indexing[index_mode - KC_INDEX_0] = true;
   1438 				}
   1439 			}
   1440 		}
   1441 
   1442 		// idx values stored at end of src vec, see bc_parser::prepare_alu_group
   1443 		for (unsigned i = 2; i != 0; i--) {
   1444 			if (ubo_indexing[i-1]) {
   1445 				// TODO: skip adding value to kcache reservation somehow, causes
   1446 				// unnecessary group breaks and cache line locks
   1447 				value *v = vv.back();
   1448 				if (alu.current_idx[i-1] && alu.current_idx[i-1] != v) {
   1449 					PSC_DUMP(
   1450 						sblog << "IDX" << i-1 << " already set to " <<
   1451 						*alu.current_idx[i-1] << ", trying to set " << *v << "\n";
   1452 					);
   1453 					return false;
   1454 				}
   1455 
   1456 				alu.current_idx[i-1] = v;
   1457 				PSC_DUMP(sblog << "IDX" << i-1 << " set to " << *v << "\n";);
   1458 			}
   1459 		}
   1460 	}
   1461 
   1462 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
   1463 		value *v = *I;
   1464 		if (!v)
   1465 			continue;
   1466 
   1467 		if ((!v->is_any_gpr() || !v->is_fixed()) && !v->is_rel())
   1468 			continue;
   1469 
   1470 		if (v->is_rel()) {
   1471 			value *rel = v->rel;
   1472 			assert(rel);
   1473 
   1474 			if (!rel->is_const()) {
   1475 				if (!map_src_vec(v->muse, true))
   1476 					return false;
   1477 
   1478 				if (rel != alu.current_ar) {
   1479 					if (alu.current_ar) {
   1480 						PSC_DUMP(
   1481 							sblog << "  current_AR is " << *alu.current_ar
   1482 								<< "  trying to use " << *rel << "\n";
   1483 						);
   1484 						return false;
   1485 					}
   1486 
   1487 					alu.current_ar = rel;
   1488 
   1489 					PSC_DUMP(
   1490 						sblog << "  new current_AR assigned: " << *alu.current_ar
   1491 							<< "\n";
   1492 					);
   1493 				}
   1494 			}
   1495 
   1496 		} else if (src) {
   1497 			if (!map_src_val(v)) {
   1498 				return false;
   1499 			}
   1500 		}
   1501 	}
   1502 	return true;
   1503 }
   1504 
   1505 bool post_scheduler::map_src(alu_node *n) {
   1506 	if (!map_src_vec(n->dst, false))
   1507 		return false;
   1508 
   1509 	if (!map_src_vec(n->src, true))
   1510 		return false;
   1511 
   1512 	return true;
   1513 }
   1514 
   1515 void post_scheduler::dump_regmap() {
   1516 
   1517 	sblog << "# REGMAP :\n";
   1518 
   1519 	for(rv_map::iterator I = regmap.begin(), E = regmap.end(); I != E; ++I) {
   1520 		sblog << "  # " << I->first << " => " << *(I->second) << "\n";
   1521 	}
   1522 
   1523 	if (alu.current_ar)
   1524 		sblog << "    current_AR: " << *alu.current_ar << "\n";
   1525 	if (alu.current_pr)
   1526 		sblog << "    current_PR: " << *alu.current_pr << "\n";
   1527 	if (alu.current_idx[0])
   1528 		sblog << "    current IDX0: " << *alu.current_idx[0] << "\n";
   1529 	if (alu.current_idx[1])
   1530 		sblog << "    current IDX1: " << *alu.current_idx[1] << "\n";
   1531 }
   1532 
   1533 void post_scheduler::recolor_locals() {
   1534 	alu_group_tracker &rt = alu.grp();
   1535 
   1536 	for (unsigned s = 0; s < ctx.num_slots; ++s) {
   1537 		alu_node *n = rt.slot(s);
   1538 		if (n) {
   1539 			value *d = n->dst[0];
   1540 			if (d && d->is_sgpr() && !d->is_prealloc()) {
   1541 				recolor_local(d);
   1542 			}
   1543 		}
   1544 	}
   1545 }
   1546 
   1547 // returns true if there are interferences
   1548 bool post_scheduler::check_interferences() {
   1549 
   1550 	alu_group_tracker &rt = alu.grp();
   1551 
   1552 	unsigned interf_slots;
   1553 
   1554 	bool discarded = false;
   1555 
   1556 	PSC_DUMP(
   1557 			sblog << "check_interferences: before: \n";
   1558 	dump_regmap();
   1559 	);
   1560 
   1561 	do {
   1562 
   1563 		interf_slots = 0;
   1564 
   1565 		for (unsigned s = 0; s < ctx.num_slots; ++s) {
   1566 			alu_node *n = rt.slot(s);
   1567 			if (n) {
   1568 				if (!unmap_dst(n)) {
   1569 					return true;
   1570 				}
   1571 			}
   1572 		}
   1573 
   1574 		for (unsigned s = 0; s < ctx.num_slots; ++s) {
   1575 			alu_node *n = rt.slot(s);
   1576 			if (n) {
   1577 				if (!map_src(n)) {
   1578 					interf_slots |= (1 << s);
   1579 				}
   1580 			}
   1581 		}
   1582 
   1583 		PSC_DUMP(
   1584 				for (unsigned i = 0; i < 5; ++i) {
   1585 					if (interf_slots & (1 << i)) {
   1586 						sblog << "!!!!!! interf slot: " << i << "  : ";
   1587 						dump::dump_op(rt.slot(i));
   1588 						sblog << "\n";
   1589 					}
   1590 				}
   1591 		);
   1592 
   1593 		if (!interf_slots)
   1594 			break;
   1595 
   1596 		PSC_DUMP( sblog << "ci: discarding slots " << interf_slots << "\n"; );
   1597 
   1598 		rt.discard_slots(interf_slots, alu.conflict_nodes);
   1599 		regmap = prev_regmap;
   1600 		discarded = true;
   1601 
   1602 	} while(1);
   1603 
   1604 	PSC_DUMP(
   1605 		sblog << "check_interferences: after: \n";
   1606 		dump_regmap();
   1607 	);
   1608 
   1609 	return discarded;
   1610 }
   1611 
   1612 // add instruction(s) (alu_node or contents of alu_packed_node) to current group
   1613 // returns the number of added instructions on success
   1614 unsigned post_scheduler::try_add_instruction(node *n) {
   1615 
   1616 	alu_group_tracker &rt = alu.grp();
   1617 
   1618 	unsigned avail_slots = rt.avail_slots();
   1619 
   1620 	// Cannot schedule in same clause as instructions using this index value
   1621 	if (!n->dst.empty() && n->dst[0] &&
   1622 		(n->dst[0] == alu.current_idx[0] || n->dst[0] == alu.current_idx[1])) {
   1623 		PSC_DUMP(sblog << "   CF_IDX source: " << *n->dst[0] << "\n";);
   1624 		return 0;
   1625 	}
   1626 
   1627 	if (n->is_alu_packed()) {
   1628 		alu_packed_node *p = static_cast<alu_packed_node*>(n);
   1629 		unsigned slots = p->get_slot_mask();
   1630 		unsigned cnt = __builtin_popcount(slots);
   1631 
   1632 		if ((slots & avail_slots) != slots) {
   1633 			PSC_DUMP( sblog << "   no slots \n"; );
   1634 			return 0;
   1635 		}
   1636 
   1637 		p->update_packed_items(ctx);
   1638 
   1639 		if (!rt.try_reserve(p)) {
   1640 			PSC_DUMP( sblog << "   reservation failed \n"; );
   1641 			return 0;
   1642 		}
   1643 
   1644 		p->remove();
   1645 		return cnt;
   1646 
   1647 	} else {
   1648 		alu_node *a = static_cast<alu_node*>(n);
   1649 		value *d = a->dst.empty() ? NULL : a->dst[0];
   1650 
   1651 		if (d && d->is_special_reg()) {
   1652 			assert((a->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit());
   1653 			d = NULL;
   1654 		}
   1655 
   1656 		unsigned allowed_slots = ctx.alu_slots_mask(a->bc.op_ptr);
   1657 		unsigned slot;
   1658 
   1659 		allowed_slots &= avail_slots;
   1660 
   1661 		if (!allowed_slots)
   1662 			return 0;
   1663 
   1664 		if (d) {
   1665 			slot = d->get_final_chan();
   1666 			a->bc.dst_chan = slot;
   1667 			allowed_slots &= (1 << slot) | 0x10;
   1668 		} else {
   1669 			if (a->bc.op_ptr->flags & AF_MOVA) {
   1670 				if (a->bc.slot_flags & AF_V)
   1671 					allowed_slots &= (1 << SLOT_X);
   1672 				else
   1673 					allowed_slots &= (1 << SLOT_TRANS);
   1674 			}
   1675 		}
   1676 
   1677 		// FIXME workaround for some problems with MULADD in trans slot on r700,
   1678 		// (is it really needed on r600?)
   1679 		if ((a->bc.op == ALU_OP3_MULADD || a->bc.op == ALU_OP3_MULADD_IEEE) &&
   1680 				!ctx.is_egcm()) {
   1681 			allowed_slots &= 0x0F;
   1682 		}
   1683 
   1684 		if (!allowed_slots) {
   1685 			PSC_DUMP( sblog << "   no suitable slots\n"; );
   1686 			return 0;
   1687 		}
   1688 
   1689 		slot = __builtin_ctz(allowed_slots);
   1690 		a->bc.slot = slot;
   1691 
   1692 		PSC_DUMP( sblog << "slot: " << slot << "\n"; );
   1693 
   1694 		if (!rt.try_reserve(a)) {
   1695 			PSC_DUMP( sblog << "   reservation failed\n"; );
   1696 			return 0;
   1697 		}
   1698 
   1699 		a->remove();
   1700 		return 1;
   1701 	}
   1702 }
   1703 
   1704 bool post_scheduler::check_copy(node *n) {
   1705 	if (!n->is_copy_mov())
   1706 		return false;
   1707 
   1708 	value *s = n->src[0];
   1709 	value *d = n->dst[0];
   1710 
   1711 	if (!s->is_sgpr() || !d->is_sgpr())
   1712 		return false;
   1713 
   1714 	if (!s->is_prealloc()) {
   1715 		recolor_local(s);
   1716 
   1717 		if (!s->chunk || s->chunk != d->chunk)
   1718 			return false;
   1719 	}
   1720 
   1721 	if (s->gpr == d->gpr) {
   1722 
   1723 		PSC_DUMP(
   1724 			sblog << "check_copy: ";
   1725 			dump::dump_op(n);
   1726 			sblog << "\n";
   1727 		);
   1728 
   1729 		rv_map::iterator F = regmap.find(d->gpr);
   1730 		bool gpr_free = (F == regmap.end());
   1731 
   1732 		if (d->is_prealloc()) {
   1733 			if (gpr_free) {
   1734 				PSC_DUMP( sblog << "    copy not ready...\n";);
   1735 				return true;
   1736 			}
   1737 
   1738 			value *rv = F->second;
   1739 			if (rv != d && (!rv->chunk || rv->chunk != d->chunk)) {
   1740 				PSC_DUMP( sblog << "    copy not ready(2)...\n";);
   1741 				return true;
   1742 			}
   1743 
   1744 			unmap_dst(static_cast<alu_node*>(n));
   1745 		}
   1746 
   1747 		if (s->is_prealloc() && !map_src_val(s))
   1748 			return true;
   1749 
   1750 		update_live(n, NULL);
   1751 
   1752 		release_src_values(n);
   1753 		n->remove();
   1754 		PSC_DUMP( sblog << "    copy coalesced...\n";);
   1755 		return true;
   1756 	}
   1757 	return false;
   1758 }
   1759 
   1760 void post_scheduler::dump_group(alu_group_tracker &rt) {
   1761 	for (unsigned i = 0; i < 5; ++i) {
   1762 		node *n = rt.slot(i);
   1763 		if (n) {
   1764 			sblog << "slot " << i << " : ";
   1765 			dump::dump_op(n);
   1766 			sblog << "\n";
   1767 		}
   1768 	}
   1769 }
   1770 
   1771 void post_scheduler::process_ready_copies() {
   1772 
   1773 	node *last;
   1774 
   1775 	do {
   1776 		last = ready_copies.back();
   1777 
   1778 		for (node_iterator N, I = ready_copies.begin(), E = ready_copies.end();
   1779 				I != E; I = N) {
   1780 			N = I; ++N;
   1781 
   1782 			node *n = *I;
   1783 
   1784 			if (!check_copy(n)) {
   1785 				n->remove();
   1786 				ready.push_back(n);
   1787 			}
   1788 		}
   1789 	} while (last != ready_copies.back());
   1790 
   1791 	update_local_interferences();
   1792 }
   1793 
   1794 
   1795 bool post_scheduler::prepare_alu_group() {
   1796 
   1797 	alu_group_tracker &rt = alu.grp();
   1798 
   1799 	unsigned i1 = 0;
   1800 
   1801 	PSC_DUMP(
   1802 		sblog << "prepare_alu_group: starting...\n";
   1803 		dump_group(rt);
   1804 	);
   1805 
   1806 	ready.append_from(&alu.conflict_nodes);
   1807 
   1808 	// FIXME rework this loop
   1809 
   1810 	do {
   1811 
   1812 		process_ready_copies();
   1813 
   1814 		++i1;
   1815 
   1816 		for (node_iterator N, I = ready.begin(), E = ready.end(); I != E;
   1817 				I = N) {
   1818 			N = I; ++N;
   1819 			node *n = *I;
   1820 
   1821 			PSC_DUMP(
   1822 				sblog << "p_a_g: ";
   1823 				dump::dump_op(n);
   1824 				sblog << "\n";
   1825 			);
   1826 
   1827 
   1828 			unsigned cnt = try_add_instruction(n);
   1829 
   1830 			if (!cnt)
   1831 				continue;
   1832 
   1833 			PSC_DUMP(
   1834 				sblog << "current group:\n";
   1835 				dump_group(rt);
   1836 			);
   1837 
   1838 			if (rt.inst_count() == ctx.num_slots) {
   1839 				PSC_DUMP( sblog << " all slots used\n"; );
   1840 				break;
   1841 			}
   1842 		}
   1843 
   1844 		if (!check_interferences())
   1845 			break;
   1846 
   1847 		// don't try to add more instructions to the group with mova if this
   1848 		// can lead to breaking clause slot count limit - we don't want mova to
   1849 		// end up in the end of the new clause instead of beginning of the
   1850 		// current clause.
   1851 		if (rt.has_ar_load() && alu.total_slots() > 121)
   1852 			break;
   1853 
   1854 		if (rt.inst_count() && i1 > 50)
   1855 			break;
   1856 
   1857 		regmap = prev_regmap;
   1858 
   1859 	} while (1);
   1860 
   1861 	PSC_DUMP(
   1862 		sblog << " prepare_alu_group done, " << rt.inst_count()
   1863 	          << " slot(s) \n";
   1864 
   1865 		sblog << "$$$$$$$$PAG i1=" << i1
   1866 				<< "  ready " << ready.count()
   1867 				<< "  pending " << pending.count()
   1868 				<< "  conflicting " << alu.conflict_nodes.count()
   1869 				<<"\n";
   1870 
   1871 	);
   1872 
   1873 	return rt.inst_count();
   1874 }
   1875 
   1876 void post_scheduler::release_src_values(node* n) {
   1877 	release_src_vec(n->src, true);
   1878 	release_src_vec(n->dst, false);
   1879 }
   1880 
   1881 void post_scheduler::release_op(node *n) {
   1882 	PSC_DUMP(
   1883 		sblog << "release_op ";
   1884 		dump::dump_op(n);
   1885 		sblog << "\n";
   1886 	);
   1887 
   1888 	n->remove();
   1889 
   1890 	if (n->is_copy_mov()) {
   1891 		ready_copies.push_back(n);
   1892 	} else if (n->is_mova() || n->is_pred_set()) {
   1893 		ready.push_front(n);
   1894 	} else {
   1895 		ready.push_back(n);
   1896 	}
   1897 }
   1898 
   1899 void post_scheduler::release_src_val(value *v) {
   1900 	node *d = v->any_def();
   1901 	if (d) {
   1902 		if (!--ucm[d])
   1903 			release_op(d);
   1904 	}
   1905 }
   1906 
   1907 void post_scheduler::release_src_vec(vvec& vv, bool src) {
   1908 
   1909 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
   1910 		value *v = *I;
   1911 		if (!v || v->is_readonly())
   1912 			continue;
   1913 
   1914 		if (v->is_rel()) {
   1915 			release_src_val(v->rel);
   1916 			release_src_vec(v->muse, true);
   1917 
   1918 		} else if (src) {
   1919 			release_src_val(v);
   1920 		}
   1921 	}
   1922 }
   1923 
   1924 void literal_tracker::reset() {
   1925 	memset(lt, 0, sizeof(lt));
   1926 	memset(uc, 0, sizeof(uc));
   1927 }
   1928 
   1929 void rp_gpr_tracker::reset() {
   1930 	memset(rp, 0, sizeof(rp));
   1931 	memset(uc, 0, sizeof(uc));
   1932 }
   1933 
   1934 void rp_kcache_tracker::reset() {
   1935 	memset(rp, 0, sizeof(rp));
   1936 	memset(uc, 0, sizeof(uc));
   1937 }
   1938 
   1939 void alu_kcache_tracker::reset() {
   1940 	memset(kc, 0, sizeof(kc));
   1941 	lines.clear();
   1942 }
   1943 
   1944 void alu_clause_tracker::reset() {
   1945 	group = 0;
   1946 	slot_count = 0;
   1947 	grp0.reset();
   1948 	grp1.reset();
   1949 }
   1950 
   1951 alu_clause_tracker::alu_clause_tracker(shader &sh)
   1952 	: sh(sh), kt(sh.get_ctx().hw_class), slot_count(),
   1953 	  grp0(sh), grp1(sh),
   1954 	  group(), clause(),
   1955 	  push_exec_mask(),
   1956 	  current_ar(), current_pr(), current_idx() {}
   1957 
   1958 void alu_clause_tracker::emit_group() {
   1959 
   1960 	assert(grp().inst_count());
   1961 
   1962 	alu_group_node *g = grp().emit();
   1963 
   1964 	if (grp().has_update_exec_mask()) {
   1965 		assert(!push_exec_mask);
   1966 		push_exec_mask = true;
   1967 	}
   1968 
   1969 	assert(g);
   1970 
   1971 	if (!clause) {
   1972 		clause = sh.create_clause(NST_ALU_CLAUSE);
   1973 	}
   1974 
   1975 	clause->push_front(g);
   1976 
   1977 	slot_count += grp().slot_count();
   1978 
   1979 	new_group();
   1980 
   1981 	PSC_DUMP( sblog << "   #### group emitted\n"; );
   1982 }
   1983 
   1984 void alu_clause_tracker::emit_clause(container_node *c) {
   1985 	assert(clause);
   1986 
   1987 	kt.init_clause(clause->bc);
   1988 
   1989 	assert(!current_ar);
   1990 	assert(!current_pr);
   1991 
   1992 	if (push_exec_mask)
   1993 		clause->bc.set_op(CF_OP_ALU_PUSH_BEFORE);
   1994 
   1995 	c->push_front(clause);
   1996 
   1997 	clause = NULL;
   1998 	push_exec_mask = false;
   1999 	slot_count = 0;
   2000 	kt.reset();
   2001 
   2002 	PSC_DUMP( sblog << "######### ALU clause emitted\n"; );
   2003 }
   2004 
   2005 bool alu_clause_tracker::check_clause_limits() {
   2006 
   2007 	alu_group_tracker &gt = grp();
   2008 
   2009 	unsigned slots = gt.slot_count();
   2010 
   2011 	// reserving slots to load AR and PR values
   2012 	unsigned reserve_slots = (current_ar ? 1 : 0) + (current_pr ? 1 : 0);
   2013 	// ...and index registers
   2014 	reserve_slots += (current_idx[0] != NULL) + (current_idx[1] != NULL);
   2015 
   2016 	if (slot_count + slots > MAX_ALU_SLOTS - reserve_slots)
   2017 		return false;
   2018 
   2019 	if (!kt.try_reserve(gt))
   2020 		return false;
   2021 
   2022 	return true;
   2023 }
   2024 
   2025 void alu_clause_tracker::new_group() {
   2026 	group = !group;
   2027 	grp().reset();
   2028 }
   2029 
   2030 bool alu_clause_tracker::is_empty() {
   2031 	return clause == NULL;
   2032 }
   2033 
   2034 void literal_tracker::init_group_literals(alu_group_node* g) {
   2035 
   2036 	g->literals.clear();
   2037 	for (unsigned i = 0; i < 4; ++i) {
   2038 		if (!lt[i])
   2039 			break;
   2040 
   2041 		g->literals.push_back(lt[i]);
   2042 
   2043 		PSC_DUMP(
   2044 			sblog << "literal emitted: " << lt[i].f;
   2045 			sblog.print_zw_hex(lt[i].u, 8);
   2046 			sblog << "   " << lt[i].i << "\n";
   2047 		);
   2048 	}
   2049 }
   2050 
   2051 bool alu_kcache_tracker::try_reserve(alu_group_tracker& gt) {
   2052 	rp_kcache_tracker &kt = gt.kcache();
   2053 
   2054 	if (!kt.num_sels())
   2055 		return true;
   2056 
   2057 	sb_set<unsigned> group_lines;
   2058 
   2059 	unsigned nl = kt.get_lines(group_lines);
   2060 	assert(nl);
   2061 
   2062 	sb_set<unsigned> clause_lines(lines);
   2063 	lines.add_set(group_lines);
   2064 
   2065 	if (clause_lines.size() == lines.size())
   2066 		return true;
   2067 
   2068 	if (update_kc())
   2069 		return true;
   2070 
   2071 	lines = clause_lines;
   2072 
   2073 	return false;
   2074 }
   2075 
   2076 unsigned rp_kcache_tracker::get_lines(kc_lines& lines) {
   2077 	unsigned cnt = 0;
   2078 
   2079 	for (unsigned i = 0; i < sel_count; ++i) {
   2080 		unsigned line = rp[i] & 0x1fffffffu;
   2081 		unsigned index_mode = rp[i] >> 29;
   2082 
   2083 		if (!line)
   2084 			return cnt;
   2085 
   2086 		--line;
   2087 		line = (sel_count == 2) ? line >> 5 : line >> 6;
   2088 		line |= index_mode << 29;
   2089 
   2090 		if (lines.insert(line).second)
   2091 			++cnt;
   2092 	}
   2093 	return cnt;
   2094 }
   2095 
   2096 bool alu_kcache_tracker::update_kc() {
   2097 	unsigned c = 0;
   2098 
   2099 	bc_kcache old_kc[4];
   2100 	memcpy(old_kc, kc, sizeof(kc));
   2101 
   2102 	for (kc_lines::iterator I = lines.begin(), E = lines.end(); I != E; ++I) {
   2103 		unsigned index_mode = *I >> 29;
   2104 		unsigned line = *I & 0x1fffffffu;
   2105 		unsigned bank = line >> 8;
   2106 
   2107 		assert(index_mode <= KC_INDEX_INVALID);
   2108 		line &= 0xFF;
   2109 
   2110 		if (c && (bank == kc[c-1].bank) && (kc[c-1].addr + 1 == line) &&
   2111 			kc[c-1].index_mode == index_mode)
   2112 		{
   2113 			kc[c-1].mode = KC_LOCK_2;
   2114 		} else {
   2115 			if (c == max_kcs) {
   2116 				memcpy(kc, old_kc, sizeof(kc));
   2117 				return false;
   2118 			}
   2119 
   2120 			kc[c].mode = KC_LOCK_1;
   2121 
   2122 			kc[c].bank = bank;
   2123 			kc[c].addr = line;
   2124 			kc[c].index_mode = index_mode;
   2125 			++c;
   2126 		}
   2127 	}
   2128 	return true;
   2129 }
   2130 
   2131 alu_node* alu_clause_tracker::create_ar_load(value *v, chan_select ar_channel) {
   2132 	alu_node *a = sh.create_alu();
   2133 
   2134 	if (sh.get_ctx().uses_mova_gpr) {
   2135 		a->bc.set_op(ALU_OP1_MOVA_GPR_INT);
   2136 		a->bc.slot = SLOT_TRANS;
   2137 	} else {
   2138 		a->bc.set_op(ALU_OP1_MOVA_INT);
   2139 		a->bc.slot = SLOT_X;
   2140 	}
   2141 	a->bc.dst_chan = ar_channel;
   2142 	if (ar_channel != SEL_X && sh.get_ctx().is_cayman()) {
   2143 		a->bc.dst_gpr = ar_channel == SEL_Y ? CM_V_SQ_MOVA_DST_CF_IDX0 : CM_V_SQ_MOVA_DST_CF_IDX1;
   2144 	}
   2145 
   2146 	a->dst.resize(1);
   2147 	a->src.push_back(v);
   2148 
   2149 	PSC_DUMP(
   2150 		sblog << "created AR load: ";
   2151 		dump::dump_op(a);
   2152 		sblog << "\n";
   2153 	);
   2154 
   2155 	return a;
   2156 }
   2157 
   2158 void alu_clause_tracker::discard_current_group() {
   2159 	PSC_DUMP( sblog << "act::discard_current_group\n"; );
   2160 	grp().discard_all_slots(conflict_nodes);
   2161 }
   2162 
   2163 void rp_gpr_tracker::dump() {
   2164 	sblog << "=== gpr_tracker dump:\n";
   2165 	for (int c = 0; c < 3; ++c) {
   2166 		sblog << "cycle " << c << "      ";
   2167 		for (int h = 0; h < 4; ++h) {
   2168 			sblog << rp[c][h] << ":" << uc[c][h] << "   ";
   2169 		}
   2170 		sblog << "\n";
   2171 	}
   2172 }
   2173 
   2174 } // namespace r600_sb
   2175