Home | History | Annotate | Download | only in sb
      1 /*
      2  * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *      Vadim Girlin
     25  */
     26 
     27 #define GCM_DEBUG 0
     28 
     29 #if GCM_DEBUG
     30 #define GCM_DUMP(a) do { a } while(0);
     31 #else
     32 #define GCM_DUMP(a)
     33 #endif
     34 
     35 #include <map>
     36 
     37 #include "sb_bc.h"
     38 #include "sb_shader.h"
     39 #include "sb_pass.h"
     40 #include "eg_sq.h" // V_SQ_CF_INDEX_NONE
     41 
     42 namespace r600_sb {
     43 
     44 int gcm::run() {
     45 
     46 	GCM_DUMP( sblog << "==== GCM ==== \n"; sh.dump_ir(); );
     47 
     48 	collect_instructions(sh.root, true);
     49 
     50 	init_def_count(uses, pending);
     51 
     52 	for (node_iterator N, I = pending.begin(), E = pending.end();
     53 			I != E; I = N) {
     54 		N = I;
     55 		++N;
     56 		node *o = *I;
     57 
     58 		GCM_DUMP(
     59 			sblog << "pending : ";
     60 			dump::dump_op(o);
     61 			sblog << "\n";
     62 		);
     63 
     64 		if (td_is_ready(o)) {
     65 
     66 			GCM_DUMP(
     67 				sblog << "  ready: ";
     68 				dump::dump_op(o);
     69 				sblog << "\n";
     70 			);
     71 			pending.remove_node(o);
     72 			ready.push_back(o);
     73 		} else {
     74 		}
     75 	}
     76 
     77 	sched_early(sh.root);
     78 
     79 	if (!pending.empty()) {
     80 		sblog << "##### gcm_sched_early_pass: unscheduled ops:\n";
     81 		dump::dump_op(pending.front());
     82 	}
     83 
     84 	assert(pending.empty());
     85 
     86 	GCM_DUMP( sh.dump_ir(); );
     87 
     88 	GCM_DUMP( sblog << "\n\n ############## gcm late\n\n"; );
     89 
     90 	collect_instructions(sh.root, false);
     91 
     92 	init_use_count(uses, pending);
     93 
     94 	sched_late(sh.root);
     95 	if (!pending.empty()) {
     96 		sblog << "##### gcm_sched_late_pass: unscheduled ops:\n";
     97 		dump::dump_op(pending.front());
     98 	}
     99 
    100 	assert(ucs_level == 0);
    101 	assert(pending.empty());
    102 
    103 	return 0;
    104 }
    105 
    106 
    107 void gcm::collect_instructions(container_node *c, bool early_pass) {
    108 	if (c->is_bb()) {
    109 
    110 		if (early_pass) {
    111 			for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
    112 				node *n = *I;
    113 				if (n->flags & NF_DONT_MOVE) {
    114 					op_info &o = op_map[n];
    115 					o.top_bb = o.bottom_bb = static_cast<bb_node*>(c);
    116 				}
    117 			}
    118 		}
    119 
    120 		pending.append_from(c);
    121 		return;
    122 	}
    123 
    124 	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
    125 		if (I->is_container()) {
    126 			collect_instructions(static_cast<container_node*>(*I), early_pass);
    127 		}
    128 	}
    129 }
    130 
    131 void gcm::sched_early(container_node *n) {
    132 
    133 	region_node *r =
    134 			(n->type == NT_REGION) ? static_cast<region_node*>(n) : NULL;
    135 
    136 	if (r && r->loop_phi) {
    137 		sched_early(r->loop_phi);
    138 	}
    139 
    140 	for (node_iterator I = n->begin(), E = n->end(); I != E; ++I) {
    141 		if (I->type == NT_OP) {
    142 			node *op = *I;
    143 			if (op->subtype == NST_PHI) {
    144 				td_release_uses(op->dst);
    145 			}
    146 		} else if (I->is_container()) {
    147 			if (I->subtype == NST_BB) {
    148 				bb_node* bb = static_cast<bb_node*>(*I);
    149 				td_sched_bb(bb);
    150 			} else {
    151 				sched_early(static_cast<container_node*>(*I));
    152 			}
    153 		}
    154 	}
    155 
    156 	if (r && r->phi) {
    157 		sched_early(r->phi);
    158 	}
    159 }
    160 
    161 void gcm::td_schedule(bb_node *bb, node *n) {
    162 	GCM_DUMP(
    163 		sblog << "scheduling : ";
    164 		dump::dump_op(n);
    165 		sblog << "\n";
    166 	);
    167 	td_release_uses(n->dst);
    168 
    169 	bb->push_back(n);
    170 
    171 	op_map[n].top_bb = bb;
    172 
    173 }
    174 
    175 void gcm::td_sched_bb(bb_node* bb) {
    176 	GCM_DUMP(
    177 	sblog << "td scheduling BB_" << bb->id << "\n";
    178 	);
    179 
    180 	while (!ready.empty()) {
    181 		for (sq_iterator N, I = ready.begin(), E = ready.end(); I != E;
    182 				I = N) {
    183 			N = I; ++N;
    184 			td_schedule(bb, *I);
    185 			ready.erase(I);
    186 		}
    187 	}
    188 }
    189 
    190 bool gcm::td_is_ready(node* n) {
    191 	return uses[n] == 0;
    192 }
    193 
    194 void gcm::td_release_val(value *v) {
    195 
    196 	GCM_DUMP(
    197 		sblog << "td checking uses: ";
    198 		dump::dump_val(v);
    199 		sblog << "\n";
    200 	);
    201 
    202 	for (uselist::iterator I = v->uses.begin(), E = v->uses.end(); I != E; ++I) {
    203 		use_info *u = *I;
    204 		if (u->op->parent != &pending) {
    205 			continue;
    206 		}
    207 
    208 		GCM_DUMP(
    209 			sblog << "td    used in ";
    210 			dump::dump_op(u->op);
    211 			sblog << "\n";
    212 		);
    213 
    214 		assert(uses[u->op] > 0);
    215 		if (--uses[u->op] == 0) {
    216 			GCM_DUMP(
    217 				sblog << "td        released : ";
    218 				dump::dump_op(u->op);
    219 				sblog << "\n";
    220 			);
    221 
    222 			pending.remove_node(u->op);
    223 			ready.push_back(u->op);
    224 		}
    225 	}
    226 
    227 }
    228 
    229 void gcm::td_release_uses(vvec& v) {
    230 	for (vvec::iterator I = v.begin(), E = v.end(); I != E; ++I) {
    231 		value *v = *I;
    232 		if (!v)
    233 			continue;
    234 
    235 		if (v->is_rel())
    236 			td_release_uses(v->mdef);
    237 		else
    238 			td_release_val(v);
    239 	}
    240 }
    241 
    242 void gcm::sched_late(container_node *n) {
    243 
    244 	bool stack_pushed = false;
    245 
    246 	if (n->is_depart()) {
    247 		depart_node *d = static_cast<depart_node*>(n);
    248 		push_uc_stack();
    249 		stack_pushed = true;
    250 		bu_release_phi_defs(d->target->phi, d->dep_id);
    251 	} else if (n->is_repeat()) {
    252 		repeat_node *r = static_cast<repeat_node*>(n);
    253 		assert(r->target->loop_phi);
    254 		push_uc_stack();
    255 		stack_pushed = true;
    256 		bu_release_phi_defs(r->target->loop_phi, r->rep_id);
    257 	}
    258 
    259 	for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) {
    260 		if (I->is_container()) {
    261 			if (I->subtype == NST_BB) {
    262 				bb_node* bb = static_cast<bb_node*>(*I);
    263 				bu_sched_bb(bb);
    264 			} else {
    265 				sched_late(static_cast<container_node*>(*I));
    266 			}
    267 		}
    268 	}
    269 
    270 	if (n->type == NT_IF) {
    271 		if_node *f = static_cast<if_node*>(n);
    272 		if (f->cond)
    273 			pending_defs.push_back(f->cond);
    274 	} else if (n->type == NT_REGION) {
    275 		region_node *r = static_cast<region_node*>(n);
    276 		if (r->loop_phi)
    277 			bu_release_phi_defs(r->loop_phi, 0);
    278 	}
    279 
    280 	if (stack_pushed)
    281 		pop_uc_stack();
    282 
    283 }
    284 
    285 void gcm::bu_sched_bb(bb_node* bb) {
    286 	GCM_DUMP(
    287 	sblog << "bu scheduling BB_" << bb->id << "\n";
    288 	);
    289 
    290 	bu_bb = bb;
    291 
    292 	if (!pending_nodes.empty()) {
    293 		GCM_DUMP(
    294 				sblog << "pending nodes:\n";
    295 		);
    296 
    297 		// TODO consider sorting the exports by array_base,
    298 		// possibly it can improve performance
    299 
    300 		for (node_list::iterator I = pending_nodes.begin(),
    301 				E = pending_nodes.end(); I != E; ++I) {
    302 			bu_release_op(*I);
    303 		}
    304 		pending_nodes.clear();
    305 		GCM_DUMP(
    306 			sblog << "pending nodes processed...\n";
    307 		);
    308 	}
    309 
    310 
    311 	if (!pending_defs.empty()) {
    312 		for (vvec::iterator I = pending_defs.begin(), E = pending_defs.end();
    313 				I != E; ++I) {
    314 			bu_release_val(*I);
    315 		}
    316 		pending_defs.clear();
    317 	}
    318 
    319 	for (sched_queue::iterator N, I = ready_above.begin(), E = ready_above.end();
    320 			I != E;	I = N) {
    321 		N = I;
    322 		++N;
    323 		node *n = *I;
    324 		if (op_map[n].bottom_bb == bb) {
    325 			add_ready(*I);
    326 			ready_above.erase(I);
    327 		}
    328 	}
    329 
    330 	unsigned cnt_ready[SQ_NUM];
    331 
    332 	container_node *clause = NULL;
    333 	unsigned last_inst_type = ~0;
    334 	unsigned last_count = 0;
    335 
    336 	bool s = true;
    337 	while (s) {
    338 		node *n;
    339 
    340 		s = false;
    341 
    342 		unsigned ready_mask = 0;
    343 
    344 		for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
    345 			if (!bu_ready[sq].empty() || !bu_ready_next[sq].empty())
    346 				ready_mask |= (1 << sq);
    347 		}
    348 
    349 		if (!ready_mask) {
    350 			for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
    351 				if (!bu_ready_early[sq].empty()) {
    352 					node *n = bu_ready_early[sq].front();
    353 					bu_ready_early[sq].pop_front();
    354 					bu_ready[sq].push_back(n);
    355 					break;
    356 				}
    357 			}
    358 		}
    359 
    360 		for (unsigned sq = SQ_CF; sq < SQ_NUM; ++sq) {
    361 
    362 			if (sq == SQ_CF && pending_exec_mask_update) {
    363 				pending_exec_mask_update = false;
    364 				sq = SQ_ALU;
    365 				--sq;
    366 				continue;
    367 			}
    368 
    369 			if (!bu_ready_next[sq].empty())
    370 				bu_ready[sq].splice(bu_ready[sq].end(), bu_ready_next[sq]);
    371 
    372 			cnt_ready[sq] = bu_ready[sq].size();
    373 
    374 			if ((sq == SQ_TEX || sq == SQ_VTX) && live_count <= rp_threshold &&
    375 					cnt_ready[sq] < ctx.max_fetch/2	&&
    376 					!bu_ready_next[SQ_ALU].empty()) {
    377 				sq = SQ_ALU;
    378 				--sq;
    379 				continue;
    380 			}
    381 
    382 			while (!bu_ready[sq].empty()) {
    383 
    384 				if (last_inst_type != sq) {
    385 					clause = NULL;
    386 					last_count = 0;
    387 					last_inst_type = sq;
    388 				}
    389 
    390 				// simple heuristic to limit register pressure,
    391 				if (sq == SQ_ALU && live_count > rp_threshold &&
    392 						(!bu_ready[SQ_TEX].empty() ||
    393 						 !bu_ready[SQ_VTX].empty() ||
    394 						 !bu_ready_next[SQ_TEX].empty() ||
    395 						 !bu_ready_next[SQ_VTX].empty())) {
    396 					GCM_DUMP( sblog << "switching to fetch (regpressure)\n"; );
    397 					break;
    398 				}
    399 
    400 				n = bu_ready[sq].front();
    401 
    402 				// real count (e.g. SAMPLE_G will be expanded to 3 instructions,
    403 				// 2 SET_GRAD_ + 1 SAMPLE_G
    404 				unsigned ncnt = 1;
    405 				if (n->is_fetch_inst() && n->src.size() == 12) {
    406 					ncnt = 3;
    407 				}
    408 
    409 				bool sampler_indexing = false;
    410 				if (n->is_fetch_inst() &&
    411 					static_cast<fetch_node *>(n)->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE)
    412 				{
    413 					sampler_indexing = true; // Give sampler indexed ops get their own clause
    414 					ncnt = sh.get_ctx().is_cayman() ? 2 : 3; // MOVA + SET_CF_IDX0/1
    415 				}
    416 
    417 				if ((sq == SQ_TEX || sq == SQ_VTX) &&
    418 						((last_count >= ctx.max_fetch/2 &&
    419 						check_alu_ready_count(24)) ||
    420 								last_count + ncnt > ctx.max_fetch))
    421 					break;
    422 				else if (sq == SQ_CF && last_count > 4 &&
    423 						check_alu_ready_count(24))
    424 					break;
    425 
    426 				bu_ready[sq].pop_front();
    427 
    428 				if (sq != SQ_CF) {
    429 					if (!clause || sampler_indexing) {
    430 						clause = sh.create_clause(sq == SQ_ALU ?
    431 								NST_ALU_CLAUSE :
    432 									sq == SQ_TEX ? NST_TEX_CLAUSE :
    433 											NST_VTX_CLAUSE);
    434 						bb->push_front(clause);
    435 					}
    436 				} else {
    437 					clause = bb;
    438 				}
    439 
    440 				bu_schedule(clause, n);
    441 				s = true;
    442 				last_count += ncnt;
    443 			}
    444 		}
    445 	}
    446 
    447 	bu_bb = NULL;
    448 
    449 	GCM_DUMP(
    450 		sblog << "bu finished scheduling BB_" << bb->id << "\n";
    451 	);
    452 }
    453 
    454 void gcm::bu_release_defs(vvec& v, bool src) {
    455 	for (vvec::reverse_iterator I = v.rbegin(), E = v.rend(); I != E; ++I) {
    456 		value *v = *I;
    457 		if (!v || v->is_readonly())
    458 			continue;
    459 
    460 		if (v->is_rel()) {
    461 			if (!v->rel->is_readonly())
    462 				bu_release_val(v->rel);
    463 			bu_release_defs(v->muse, true);
    464 		} else if (src)
    465 			bu_release_val(v);
    466 		else {
    467 			if (live.remove_val(v)) {
    468 				--live_count;
    469 			}
    470 		}
    471 	}
    472 }
    473 
    474 void gcm::push_uc_stack() {
    475 	GCM_DUMP(
    476 		sblog << "pushing use count stack prev_level " << ucs_level
    477 			<< "   new level " << (ucs_level + 1) << "\n";
    478 	);
    479 	++ucs_level;
    480 	if (ucs_level == nuc_stk.size()) {
    481 		nuc_stk.resize(ucs_level + 1);
    482 	}
    483 	else {
    484 		nuc_stk[ucs_level].clear();
    485 	}
    486 }
    487 
    488 bool gcm::bu_is_ready(node* n) {
    489 	nuc_map &cm = nuc_stk[ucs_level];
    490 	nuc_map::iterator F = cm.find(n);
    491 	unsigned uc = (F == cm.end() ? 0 : F->second);
    492 	return uc == uses[n];
    493 }
    494 
    495 void gcm::bu_schedule(container_node* c, node* n) {
    496 	GCM_DUMP(
    497 		sblog << "bu scheduling : ";
    498 		dump::dump_op(n);
    499 		sblog << "\n";
    500 	);
    501 
    502 	assert(op_map[n].bottom_bb == bu_bb);
    503 
    504 	bu_release_defs(n->src, true);
    505 	bu_release_defs(n->dst, false);
    506 
    507 	c->push_front(n);
    508 }
    509 
    510 void gcm::dump_uc_stack() {
    511 	sblog << "##### uc_stk start ####\n";
    512 	for (unsigned l = 0; l <= ucs_level; ++l) {
    513 		nuc_map &m = nuc_stk[l];
    514 
    515 		sblog << "nuc_stk[" << l << "] :   @" << &m << "\n";
    516 
    517 		for (nuc_map::iterator I = m.begin(), E = m.end(); I != E; ++I) {
    518 			sblog << "    uc " << I->second << " for ";
    519 			dump::dump_op(I->first);
    520 			sblog << "\n";
    521 		}
    522 	}
    523 	sblog << "##### uc_stk end ####\n";
    524 }
    525 
    526 void gcm::pop_uc_stack() {
    527 	nuc_map &pm = nuc_stk[ucs_level];
    528 	--ucs_level;
    529 	nuc_map &cm = nuc_stk[ucs_level];
    530 
    531 	GCM_DUMP(
    532 		sblog << "merging use stack from level " << (ucs_level+1)
    533 			<< " to " << ucs_level << "\n";
    534 	);
    535 
    536 	for (nuc_map::iterator N, I = pm.begin(), E = pm.end(); I != E; ++I) {
    537 		node *n = I->first;
    538 
    539 		GCM_DUMP(
    540 			sblog << "      " << cm[n] << " += " << I->second << "  for ";
    541 			dump::dump_op(n);
    542 			sblog << "\n";
    543 		);
    544 
    545 		unsigned uc = cm[n] += I->second;
    546 
    547 		if (n->parent == &pending && uc == uses[n]) {
    548 			cm.erase(n);
    549 			pending_nodes.push_back(n);
    550 			GCM_DUMP(
    551 				sblog << "pushed pending_node due to stack pop ";
    552 				dump::dump_op(n);
    553 				sblog << "\n";
    554 			);
    555 		}
    556 	}
    557 }
    558 
    559 void gcm::bu_find_best_bb(node *n, op_info &oi) {
    560 
    561 	GCM_DUMP(
    562 		sblog << "  find best bb : ";
    563 		dump::dump_op(n);
    564 		sblog << "\n";
    565 	);
    566 
    567 	if (oi.bottom_bb)
    568 		return;
    569 
    570 	// don't hoist generated copies
    571 	if (n->flags & NF_DONT_HOIST) {
    572 		oi.bottom_bb = bu_bb;
    573 		return;
    574 	}
    575 
    576 	bb_node* best_bb = bu_bb;
    577 	bb_node* top_bb = oi.top_bb;
    578 	assert(oi.top_bb && !oi.bottom_bb);
    579 
    580 	node *c = best_bb;
    581 
    582 	// FIXME top_bb may be located inside the loop so we'll never enter it
    583 	// in the loop below, and the instruction will be incorrectly placed at the
    584 	// beginning of the shader.
    585 	// For now just check if top_bb's loop_level is higher than of
    586 	// current bb and abort the search for better bb in such case,
    587 	// but this problem may require more complete (and more expensive) fix
    588 	if (top_bb->loop_level <= best_bb->loop_level) {
    589 		while (c && c != top_bb) {
    590 
    591 			if (c->prev) {
    592 				c = c->prev;
    593 			} else {
    594 				c = c->parent;
    595 				if (!c)
    596 					break;
    597 				continue;
    598 			}
    599 
    600 			if (c->subtype == NST_BB) {
    601 				bb_node *bb = static_cast<bb_node*>(c);
    602 				if (bb->loop_level < best_bb->loop_level)
    603 					best_bb = bb;
    604 			}
    605 		}
    606 	}
    607 
    608 	oi.bottom_bb = best_bb;
    609 }
    610 
    611 void gcm::add_ready(node *n) {
    612 	sched_queue_id sq = sh.get_queue_id(n);
    613 	if (n->flags & NF_SCHEDULE_EARLY)
    614 		bu_ready_early[sq].push_back(n);
    615 	else if (sq == SQ_ALU && n->is_copy_mov())
    616 		bu_ready[sq].push_front(n);
    617 	else if (n->is_alu_inst()) {
    618 		alu_node *a = static_cast<alu_node*>(n);
    619 		if (a->bc.op_ptr->flags & AF_PRED && a->dst[2]) {
    620 			// PRED_SET instruction that updates exec mask
    621 			pending_exec_mask_update = true;
    622 		}
    623 		bu_ready_next[sq].push_back(n);
    624 	} else
    625 		bu_ready_next[sq].push_back(n);
    626 }
    627 
    628 void gcm::bu_release_op(node * n) {
    629 	op_info &oi = op_map[n];
    630 
    631 	GCM_DUMP(
    632 	sblog << "  bu release op  ";
    633 	dump::dump_op(n);
    634 	);
    635 
    636 	nuc_stk[ucs_level].erase(n);
    637 	pending.remove_node(n);
    638 
    639 	bu_find_best_bb(n, oi);
    640 
    641 	if (oi.bottom_bb == bu_bb) {
    642 		GCM_DUMP( sblog << "   ready\n";);
    643 		add_ready(n);
    644 	} else {
    645 		GCM_DUMP( sblog << "   ready_above\n";);
    646 		ready_above.push_back(n);
    647 	}
    648 }
    649 
    650 void gcm::bu_release_phi_defs(container_node* p, unsigned op)
    651 {
    652 	for (node_riterator I = p->rbegin(), E = p->rend(); I != E; ++I) {
    653 		node *o = *I;
    654 		value *v = o->src[op];
    655 		if (v && !v->is_readonly())
    656 			pending_defs.push_back(o->src[op]);
    657 
    658 	}
    659 }
    660 
    661 unsigned gcm::get_uc_vec(vvec &vv) {
    662 	unsigned c = 0;
    663 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
    664 		value *v = *I;
    665 		if (!v)
    666 			continue;
    667 
    668 		if (v->is_rel())
    669 			c += get_uc_vec(v->mdef);
    670 		else
    671 			c += v->use_count();
    672 	}
    673 	return c;
    674 }
    675 
    676 void gcm::init_use_count(nuc_map& m, container_node &s) {
    677 	m.clear();
    678 	for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) {
    679 		node *n = *I;
    680 		unsigned uc = get_uc_vec(n->dst);
    681 		GCM_DUMP(
    682 			sblog << "uc " << uc << "  ";
    683 			dump::dump_op(n);
    684 			sblog << "\n";
    685 		);
    686 		if (!uc) {
    687 			pending_nodes.push_back(n);
    688 			GCM_DUMP(
    689 				sblog << "pushed pending_node in init ";
    690 				dump::dump_op(n);
    691 				sblog << "\n";
    692 			);
    693 
    694 		} else
    695 			m[n] = uc;
    696 	}
    697 }
    698 
    699 void gcm::bu_release_val(value* v) {
    700 	node *n = v->any_def();
    701 
    702 	if (n && n->parent == &pending) {
    703 		nuc_map &m = nuc_stk[ucs_level];
    704 		unsigned uc = ++m[n];
    705 		unsigned uc2 = uses[n];
    706 
    707 		if (live.add_val(v)) {
    708 			++live_count;
    709 			GCM_DUMP ( sblog << "live_count: " << live_count << "\n"; );
    710 		}
    711 
    712 		GCM_DUMP(
    713 			sblog << "release val ";
    714 			dump::dump_val(v);
    715 			sblog << "  for node ";
    716 			dump::dump_op(n);
    717 			sblog << "    new uc=" << uc << ", total " << uc2 << "\n";
    718 		);
    719 
    720 		if (uc == uc2)
    721 			bu_release_op(n);
    722 	}
    723 
    724 }
    725 
    726 void gcm::init_def_count(nuc_map& m, container_node& s) {
    727 	m.clear();
    728 	for (node_iterator I = s.begin(), E = s.end(); I != E; ++I) {
    729 		node *n = *I;
    730 		unsigned dc = get_dc_vec(n->src, true) + get_dc_vec(n->dst, false);
    731 		m[n] = dc;
    732 
    733 		GCM_DUMP(
    734 			sblog << "dc " << dc << "  ";
    735 			dump::dump_op(n);
    736 			sblog << "\n";
    737 		);
    738 	}
    739 }
    740 
    741 unsigned gcm::get_dc_vec(vvec& vv, bool src) {
    742 	unsigned c = 0;
    743 	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
    744 		value *v = *I;
    745 		if (!v || v->is_readonly())
    746 			continue;
    747 
    748 		if (v->is_rel()) {
    749 			c += v->rel->def != NULL;
    750 			c += get_dc_vec(v->muse, true);
    751 		}
    752 		else if (src) {
    753 			c += v->def != NULL;
    754 			c += v->adef != NULL;
    755 		}
    756 	}
    757 	return c;
    758 }
    759 
    760 unsigned gcm::real_alu_count(sched_queue& q, unsigned max) {
    761 	sq_iterator I(q.begin()), E(q.end());
    762 	unsigned c = 0;
    763 
    764 	while (I != E && c < max) {
    765 		node *n = *I;
    766 		if (n->is_alu_inst()) {
    767 			if (!n->is_copy_mov() || !n->src[0]->is_any_gpr())
    768 				++c;
    769 		} else if (n->is_alu_packed()) {
    770 			c += static_cast<container_node*>(n)->count();
    771 		}
    772 		++I;
    773 	}
    774 
    775 	return c;
    776 }
    777 
    778 bool gcm::check_alu_ready_count(unsigned threshold) {
    779 	unsigned r = real_alu_count(bu_ready[SQ_ALU], threshold);
    780 	if (r >= threshold)
    781 		return true;
    782 	r += real_alu_count(bu_ready_next[SQ_ALU], threshold - r);
    783 	return r >= threshold;
    784 }
    785 
    786 } // namespace r600_sb
    787