Home | History | Annotate | Download | only in sb
      1 /*
      2  * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *      Vadim Girlin
     25  */
     26 
     27 #include "sb_bc.h"
     28 #include "sb_shader.h"
     29 #include "sb_pass.h"
     30 
     31 namespace r600_sb {
     32 
     33 shader::shader(sb_context &sctx, shader_target t, unsigned id)
     34 : ctx(sctx), next_temp_value_index(temp_regid_offset),
     35   prep_regs_count(), pred_sels(),
     36   regions(), inputs(), undef(), val_pool(sizeof(value)),
     37   pool(), all_nodes(), src_stats(), opt_stats(), errors(),
     38   optimized(), id(id),
     39   coal(*this), bbs(),
     40   target(t), vt(ex), ex(*this), root(),
     41   compute_interferences(),
     42   has_alu_predication(),
     43   uses_gradients(), safe_math(), ngpr(), nstack(), dce_flags() {}
     44 
     45 bool shader::assign_slot(alu_node* n, alu_node *slots[5]) {
     46 
     47 	unsigned slot_flags = ctx.alu_slots(n->bc.op);
     48 	unsigned slot = n->bc.dst_chan;
     49 
     50 	if (!ctx.is_cayman() && (!(slot_flags & AF_V) || slots[slot]) &&
     51 			(slot_flags & AF_S))
     52 		slot = SLOT_TRANS;
     53 
     54 	if (slots[slot])
     55 		return false;
     56 
     57 	n->bc.slot = slot;
     58 	slots[slot] = n;
     59 	return true;
     60 }
     61 
     62 void shader::add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask,
     63                             bool src) {
     64 	unsigned chan = 0;
     65 	while (comp_mask) {
     66 		if (comp_mask & 1) {
     67 			value *v = get_gpr_value(src, gpr, chan, false);
     68 			v->flags |= (VLF_PIN_REG | VLF_PIN_CHAN);
     69 			if (!v->is_rel()) {
     70 				v->gpr = v->pin_gpr = v->select;
     71 				v->fix();
     72 			}
     73 			if (v->array && !v->array->gpr) {
     74 				// if pinned value can be accessed with indirect addressing
     75 				// pin the entire array to its original location
     76 				v->array->gpr = v->array->base_gpr;
     77 			}
     78 			vec.push_back(v);
     79 		}
     80 		comp_mask >>= 1;
     81 		++chan;
     82 	}
     83 }
     84 
     85 cf_node* shader::create_clause(node_subtype nst) {
     86 	cf_node *n = create_cf();
     87 
     88 	n->subtype = nst;
     89 
     90 	switch (nst) {
     91 	case NST_ALU_CLAUSE: n->bc.set_op(CF_OP_ALU); break;
     92 	case NST_TEX_CLAUSE: n->bc.set_op(CF_OP_TEX); break;
     93 	case NST_VTX_CLAUSE: n->bc.set_op(CF_OP_VTX); break;
     94 	case NST_GDS_CLAUSE: n->bc.set_op(CF_OP_GDS); break;
     95 	default: assert(!"invalid clause type"); break;
     96 	}
     97 
     98 	n->bc.barrier = 1;
     99 	return n;
    100 }
    101 
    102 void shader::create_bbs() {
    103 	create_bbs(root, bbs);
    104 }
    105 
    106 void shader::expand_bbs() {
    107 	expand_bbs(bbs);
    108 }
    109 
    110 alu_node* shader::create_mov(value* dst, value* src) {
    111 	alu_node *n = create_alu();
    112 	n->bc.set_op(ALU_OP1_MOV);
    113 	n->dst.push_back(dst);
    114 	n->src.push_back(src);
    115 	dst->def = n;
    116 
    117 	return n;
    118 }
    119 
    120 alu_node* shader::create_copy_mov(value* dst, value* src, unsigned affcost) {
    121 	alu_node *n = create_mov(dst, src);
    122 
    123 	dst->assign_source(src);
    124 	n->flags |= NF_COPY_MOV | NF_DONT_HOIST;
    125 
    126 	if (affcost && dst->is_sgpr() && src->is_sgpr())
    127 		coal.add_edge(src, dst, affcost);
    128 
    129 	return n;
    130 }
    131 
    132 value* shader::get_value(value_kind kind, sel_chan id,
    133                          unsigned version) {
    134 	if (version == 0 && kind == VLK_REG && id.sel() < prep_regs_count)
    135 		return val_pool[id - 1];
    136 
    137 
    138 	unsigned key = (kind << 28) | (version << 16) | id;
    139 	value_map::iterator i = reg_values.find(key);
    140 	if (i != reg_values.end()) {
    141 		return i->second;
    142 	}
    143 	value *v = create_value(kind, id, version);
    144 	reg_values.insert(std::make_pair(key, v));
    145 	return v;
    146 }
    147 
    148 value* shader::get_special_value(unsigned sv_id, unsigned version) {
    149 	sel_chan id(sv_id, 0);
    150 	return get_value(VLK_SPECIAL_REG, id, version);
    151 }
    152 
    153 void shader::fill_array_values(gpr_array *a, vvec &vv) {
    154 	unsigned sz = a->array_size;
    155 	vv.resize(sz);
    156 	for (unsigned i = 0; i < a->array_size; ++i) {
    157 		vv[i] = get_gpr_value(true, a->base_gpr.sel() + i, a->base_gpr.chan(),
    158 		                      false);
    159 	}
    160 }
    161 
    162 value* shader::get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel,
    163                              unsigned version) {
    164 	sel_chan id(reg, chan);
    165 	value *v;
    166 	gpr_array *a = get_gpr_array(reg, chan);
    167 	if (rel) {
    168 		assert(a);
    169 		v = create_value(VLK_REL_REG, id, 0);
    170 		v->rel = get_special_value(SV_AR_INDEX);
    171 		fill_array_values(a, v->muse);
    172 		if (!src)
    173 			fill_array_values(a, v->mdef);
    174 	} else {
    175 		if (version == 0 && reg < prep_regs_count)
    176 			return (val_pool[id - 1]);
    177 
    178 		v = get_value(VLK_REG, id, version);
    179 	}
    180 
    181 	v->array = a;
    182 	v->pin_gpr = v->select;
    183 
    184 	return v;
    185 }
    186 
    187 value* shader::create_temp_value() {
    188 	sel_chan id(++next_temp_value_index, 0);
    189 	return get_value(VLK_TEMP, id, 0);
    190 }
    191 
    192 value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan, alu_kcache_index_mode index_mode) {
    193 	return get_ro_value(kcache_values, VLK_KCACHE,
    194 			sel_chan(bank, index, chan, index_mode));
    195 }
    196 
    197 void shader::add_input(unsigned gpr, bool preloaded, unsigned comp_mask) {
    198 	if (inputs.size() <= gpr)
    199 		inputs.resize(gpr+1);
    200 
    201 	shader_input &i = inputs[gpr];
    202 	i.preloaded = preloaded;
    203 	i.comp_mask = comp_mask;
    204 
    205 	if (preloaded) {
    206 		add_pinned_gpr_values(root->dst, gpr, comp_mask, true);
    207 	}
    208 
    209 }
    210 
    211 void shader::init() {
    212 	assert(!root);
    213 	root = create_container();
    214 }
    215 
    216 void shader::init_call_fs(cf_node* cf) {
    217 	unsigned gpr = 0;
    218 
    219 	assert(target == TARGET_LS || target == TARGET_VS || target == TARGET_ES);
    220 
    221 	for(inputs_vec::const_iterator I = inputs.begin(),
    222 			E = inputs.end(); I != E; ++I, ++gpr) {
    223 		if (!I->preloaded)
    224 			add_pinned_gpr_values(cf->dst, gpr, I->comp_mask, false);
    225 		else
    226 			add_pinned_gpr_values(cf->src, gpr, I->comp_mask, true);
    227 	}
    228 }
    229 
    230 void shader::set_undef(val_set& s) {
    231 	value *undefined = get_undef_value();
    232 	if (!undefined->gvn_source)
    233 		vt.add_value(undefined);
    234 
    235 	val_set &vs = s;
    236 
    237 	for (val_set::iterator I = vs.begin(*this), E = vs.end(*this); I != E; ++I) {
    238 		value *v = *I;
    239 
    240 		assert(!v->is_readonly() && !v->is_rel());
    241 
    242 		v->gvn_source = undefined->gvn_source;
    243 	}
    244 }
    245 
    246 value* shader::create_value(value_kind k, sel_chan regid, unsigned ver) {
    247 	value *v = val_pool.create(k, regid, ver);
    248 	return v;
    249 }
    250 
    251 value* shader::get_undef_value() {
    252 	if (!undef)
    253 		undef = create_value(VLK_UNDEF, 0, 0);
    254 	return undef;
    255 }
    256 
    257 node* shader::create_node(node_type nt, node_subtype nst, node_flags flags) {
    258 	node *n = new (pool.allocate(sizeof(node))) node(nt, nst, flags);
    259 	all_nodes.push_back(n);
    260 	return n;
    261 }
    262 
    263 alu_node* shader::create_alu() {
    264 	alu_node* n = new (pool.allocate(sizeof(alu_node))) alu_node();
    265 	all_nodes.push_back(n);
    266 	return n;
    267 }
    268 
    269 alu_group_node* shader::create_alu_group() {
    270 	alu_group_node* n =
    271 			new (pool.allocate(sizeof(alu_group_node))) alu_group_node();
    272 	all_nodes.push_back(n);
    273 	return n;
    274 }
    275 
    276 alu_packed_node* shader::create_alu_packed() {
    277 	alu_packed_node* n =
    278 			new (pool.allocate(sizeof(alu_packed_node))) alu_packed_node();
    279 	all_nodes.push_back(n);
    280 	return n;
    281 }
    282 
    283 cf_node* shader::create_cf() {
    284 	cf_node* n = new (pool.allocate(sizeof(cf_node))) cf_node();
    285 	n->bc.barrier = 1;
    286 	all_nodes.push_back(n);
    287 	return n;
    288 }
    289 
    290 fetch_node* shader::create_fetch() {
    291 	fetch_node* n = new (pool.allocate(sizeof(fetch_node))) fetch_node();
    292 	all_nodes.push_back(n);
    293 	return n;
    294 }
    295 
    296 region_node* shader::create_region() {
    297 	region_node *n = new (pool.allocate(sizeof(region_node)))
    298 			region_node(regions.size());
    299 	regions.push_back(n);
    300 	all_nodes.push_back(n);
    301 	return n;
    302 }
    303 
    304 depart_node* shader::create_depart(region_node* target) {
    305 	depart_node* n = new (pool.allocate(sizeof(depart_node)))
    306 			depart_node(target, target->departs.size());
    307 	target->departs.push_back(n);
    308 	all_nodes.push_back(n);
    309 	return n;
    310 }
    311 
    312 repeat_node* shader::create_repeat(region_node* target) {
    313 	repeat_node* n = new (pool.allocate(sizeof(repeat_node)))
    314 			repeat_node(target, target->repeats.size() + 1);
    315 	target->repeats.push_back(n);
    316 	all_nodes.push_back(n);
    317 	return n;
    318 }
    319 
    320 container_node* shader::create_container(node_type nt, node_subtype nst,
    321 		                                 node_flags flags) {
    322 	container_node *n = new (pool.allocate(sizeof(container_node)))
    323 			container_node(nt, nst, flags);
    324 	all_nodes.push_back(n);
    325 	return n;
    326 }
    327 
    328 if_node* shader::create_if() {
    329 	if_node* n = new (pool.allocate(sizeof(if_node))) if_node();
    330 	all_nodes.push_back(n);
    331 	return n;
    332 }
    333 
    334 bb_node* shader::create_bb(unsigned id, unsigned loop_level) {
    335 	bb_node* n = new (pool.allocate(sizeof(bb_node))) bb_node(id, loop_level);
    336 	all_nodes.push_back(n);
    337 	return n;
    338 }
    339 
    340 value* shader::get_special_ro_value(unsigned sel) {
    341 	return get_ro_value(special_ro_values, VLK_PARAM, sel);
    342 }
    343 
    344 value* shader::get_const_value(const literal &v) {
    345 	value *val = get_ro_value(const_values, VLK_CONST, v);
    346 	val->literal_value = v;
    347 	return val;
    348 }
    349 
    350 shader::~shader() {
    351 	for (node_vec::iterator I = all_nodes.begin(), E = all_nodes.end();
    352 			I != E; ++I)
    353 		(*I)->~node();
    354 
    355 	for (gpr_array_vec::iterator I = gpr_arrays.begin(), E = gpr_arrays.end();
    356 			I != E; ++I) {
    357 		delete *I;
    358 	}
    359 }
    360 
    361 void shader::dump_ir() {
    362 	if (ctx.dump_pass)
    363 		dump(*this).run();
    364 }
    365 
    366 value* shader::get_value_version(value* v, unsigned ver) {
    367 	assert(!v->is_readonly() && !v->is_rel());
    368 	value *vv = get_value(v->kind, v->select, ver);
    369 	assert(vv);
    370 
    371 	if (v->array) {
    372 		vv->array = v->array;
    373 	}
    374 
    375 	return vv;
    376 }
    377 
    378 gpr_array* shader::get_gpr_array(unsigned reg, unsigned chan) {
    379 
    380 	for (regarray_vec::iterator I = gpr_arrays.begin(),
    381 			E = gpr_arrays.end(); I != E; ++I) {
    382 		gpr_array* a = *I;
    383 		unsigned achan = a->base_gpr.chan();
    384 		unsigned areg = a->base_gpr.sel();
    385 		if (achan == chan && (reg >= areg && reg < areg+a->array_size))
    386 			return a;
    387 	}
    388 	return NULL;
    389 }
    390 
    391 void shader::add_gpr_array(unsigned gpr_start, unsigned gpr_count,
    392 					   unsigned comp_mask) {
    393 	unsigned chan = 0;
    394 	while (comp_mask) {
    395 		if (comp_mask & 1) {
    396 			gpr_array *a = new gpr_array(
    397 					sel_chan(gpr_start, chan), gpr_count);
    398 
    399 			SB_DUMP_PASS( sblog << "add_gpr_array: @" << a->base_gpr
    400 			         << " [" << a->array_size << "]\n";
    401 			);
    402 
    403 			gpr_arrays.push_back(a);
    404 		}
    405 		comp_mask >>= 1;
    406 		++chan;
    407 	}
    408 }
    409 
    410 value* shader::get_pred_sel(int sel) {
    411 	assert(sel == 0 || sel == 1);
    412 	if (!pred_sels[sel])
    413 		pred_sels[sel] = get_const_value(sel);
    414 
    415 	return pred_sels[sel];
    416 }
    417 
    418 cf_node* shader::create_cf(unsigned op) {
    419 	cf_node *c = create_cf();
    420 	c->bc.set_op(op);
    421 	c->bc.barrier = 1;
    422 	return c;
    423 }
    424 
    425 std::string shader::get_full_target_name() {
    426 	std::string s = get_shader_target_name();
    427 	s += "/";
    428 	s += ctx.get_hw_chip_name();
    429 	s += "/";
    430 	s += ctx.get_hw_class_name();
    431 	return s;
    432 }
    433 
    434 const char* shader::get_shader_target_name() {
    435 	switch (target) {
    436 		case TARGET_VS: return "VS";
    437 		case TARGET_ES: return "ES";
    438 		case TARGET_PS: return "PS";
    439 		case TARGET_GS: return "GS";
    440 		case TARGET_HS: return "HS";
    441 		case TARGET_LS: return "LS";
    442 		case TARGET_COMPUTE: return "COMPUTE";
    443 		case TARGET_FETCH: return "FETCH";
    444 		default:
    445 			return "INVALID_TARGET";
    446 	}
    447 }
    448 
    449 void shader::simplify_dep_rep(node* dr) {
    450 	container_node *p = dr->parent;
    451 	if (p->is_repeat()) {
    452 		repeat_node *r = static_cast<repeat_node*>(p);
    453 		r->target->expand_repeat(r);
    454 	} else if (p->is_depart()) {
    455 		depart_node *d = static_cast<depart_node*>(p);
    456 		d->target->expand_depart(d);
    457 	}
    458 	if (dr->next)
    459 		dr->parent->cut(dr->next, NULL);
    460 }
    461 
    462 
    463 // FIXME this is used in some places as the max non-temp gpr,
    464 // (MAX_GPR - 2 * ctx.alu_temp_gprs) should be used for that instead.
    465 unsigned shader::first_temp_gpr() {
    466 	return MAX_GPR - ctx.alu_temp_gprs;
    467 }
    468 
    469 unsigned shader::num_nontemp_gpr() {
    470 	return MAX_GPR - 2 * ctx.alu_temp_gprs;
    471 }
    472 
    473 void shader::set_uses_kill() {
    474 	if (root->src.empty())
    475 		root->src.resize(1);
    476 
    477 	if (!root->src[0])
    478 		root->src[0] = get_special_value(SV_VALID_MASK);
    479 }
    480 
    481 alu_node* shader::clone(alu_node* n) {
    482 	alu_node *c = create_alu();
    483 
    484 	// FIXME: this may be wrong with indirect operands
    485 	c->src = n->src;
    486 	c->dst = n->dst;
    487 
    488 	c->bc = n->bc;
    489 	c->pred = n->pred;
    490 
    491 	return c;
    492 }
    493 
    494 void shader::collect_stats(bool opt) {
    495 	if (!sb_context::dump_stat)
    496 		return;
    497 
    498 	shader_stats &s = opt ? opt_stats : src_stats;
    499 
    500 	s.shaders = 1;
    501 	s.ngpr = ngpr;
    502 	s.nstack = nstack;
    503 	s.collect(root);
    504 
    505 	if (opt)
    506 		ctx.opt_stats.accumulate(s);
    507 	else
    508 		ctx.src_stats.accumulate(s);
    509 }
    510 
    511 value* shader::get_ro_value(value_map& vm, value_kind vk, unsigned key) {
    512 	value_map::iterator I = vm.find(key);
    513 	if (I != vm.end())
    514 		return I->second;
    515 	value *v = create_value(vk, key, 0);
    516 	v->flags = VLF_READONLY;
    517 	vm.insert(std::make_pair(key, v));
    518 	return v;
    519 }
    520 
    521 void shader::create_bbs(container_node* n, bbs_vec &bbs, int loop_level) {
    522 
    523 	bool inside_bb = false;
    524 	bool last_inside_bb = true;
    525 	node_iterator bb_start(n->begin()), I(bb_start), E(n->end());
    526 
    527 	for (; I != E; ++I) {
    528 		node *k = *I;
    529 		inside_bb = k->type == NT_OP;
    530 
    531 		if (inside_bb && !last_inside_bb)
    532 			bb_start = I;
    533 		else if (!inside_bb) {
    534 			if (last_inside_bb
    535 					&& I->type != NT_REPEAT
    536 					&& I->type != NT_DEPART
    537 					&& I->type != NT_IF) {
    538 				bb_node *bb = create_bb(bbs.size(), loop_level);
    539 				bbs.push_back(bb);
    540 				n->insert_node_before(*bb_start, bb);
    541 				if (bb_start != I)
    542 					bb->move(bb_start, I);
    543 			}
    544 
    545 			if (k->is_container()) {
    546 
    547 				bool loop = false;
    548 				if (k->type == NT_REGION) {
    549 					loop = static_cast<region_node*>(k)->is_loop();
    550 				}
    551 
    552 				create_bbs(static_cast<container_node*>(k), bbs,
    553 				           loop_level + loop);
    554 			}
    555 		}
    556 
    557 		if (k->type == NT_DEPART)
    558 			return;
    559 
    560 		last_inside_bb = inside_bb;
    561 	}
    562 
    563 	if (last_inside_bb) {
    564 		bb_node *bb = create_bb(bbs.size(), loop_level);
    565 		bbs.push_back(bb);
    566 		if (n->empty())
    567 				n->push_back(bb);
    568 		else {
    569 			n->insert_node_before(*bb_start, bb);
    570 			if (bb_start != n->end())
    571 				bb->move(bb_start, n->end());
    572 		}
    573 	} else {
    574 		if (n->last && n->last->type == NT_IF) {
    575 			bb_node *bb = create_bb(bbs.size(), loop_level);
    576 			bbs.push_back(bb);
    577 			n->push_back(bb);
    578 		}
    579 	}
    580 }
    581 
    582 void shader::expand_bbs(bbs_vec &bbs) {
    583 
    584 	for (bbs_vec::iterator I = bbs.begin(), E = bbs.end(); I != E; ++I) {
    585 		bb_node *b = *I;
    586 		b->expand();
    587 	}
    588 }
    589 
    590 sched_queue_id shader::get_queue_id(node* n) {
    591 	switch (n->subtype) {
    592 		case NST_ALU_INST:
    593 		case NST_ALU_PACKED_INST:
    594 		case NST_COPY:
    595 		case NST_PSI:
    596 			return SQ_ALU;
    597 		case NST_FETCH_INST: {
    598 			fetch_node *f = static_cast<fetch_node*>(n);
    599 			if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX))
    600 				return SQ_VTX;
    601 			if (f->bc.op_ptr->flags & FF_GDS)
    602 				return SQ_GDS;
    603 			return SQ_TEX;
    604 		}
    605 		case NST_CF_INST:
    606 			return SQ_CF;
    607 		default:
    608 			assert(0);
    609 			return SQ_NUM;
    610 	}
    611 }
    612 
    613 void shader_stats::collect(node *n) {
    614 	if (n->is_alu_inst())
    615 		++alu;
    616 	else if (n->is_fetch_inst())
    617 		++fetch;
    618 	else if (n->is_container()) {
    619 		container_node *c = static_cast<container_node*>(n);
    620 
    621 		if (n->is_alu_group())
    622 			++alu_groups;
    623 		else if (n->is_alu_clause())
    624 			++alu_clauses;
    625 		else if (n->is_fetch_clause())
    626 			++fetch_clauses;
    627 		else if (n->is_cf_inst())
    628 			++cf;
    629 
    630 		if (!c->empty()) {
    631 			for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
    632 				collect(*I);
    633 			}
    634 		}
    635 	}
    636 }
    637 
    638 void shader_stats::accumulate(shader_stats& s) {
    639 	++shaders;
    640 	ndw += s.ndw;
    641 	ngpr += s.ngpr;
    642 	nstack += s.nstack;
    643 
    644 	alu += s.alu;
    645 	alu_groups += s.alu_groups;
    646 	alu_clauses += s.alu_clauses;
    647 	fetch += s.fetch;
    648 	fetch_clauses += s.fetch_clauses;
    649 	cf += s.cf;
    650 }
    651 
    652 void shader_stats::dump() {
    653 	sblog << "dw:" << ndw << ", gpr:" << ngpr << ", stk:" << nstack
    654 			<< ", alu groups:" << alu_groups << ", alu clauses: " << alu_clauses
    655 			<< ", alu:" << alu << ", fetch:" << fetch
    656 			<< ", fetch clauses:" << fetch_clauses
    657 			<< ", cf:" << cf;
    658 
    659 	if (shaders > 1)
    660 		sblog << ", shaders:" << shaders;
    661 
    662 	sblog << "\n";
    663 }
    664 
    665 static void print_diff(unsigned d1, unsigned d2) {
    666 	if (d1)
    667 		sblog << ((int)d2 - (int)d1) * 100 / (int)d1 << "%";
    668 	else if (d2)
    669 		sblog << "N/A";
    670 	else
    671 		sblog << "0%";
    672 }
    673 
    674 void shader_stats::dump_diff(shader_stats& s) {
    675 	sblog << "dw:"; print_diff(ndw, s.ndw);
    676 	sblog << ", gpr:" ; print_diff(ngpr, s.ngpr);
    677 	sblog << ", stk:" ; print_diff(nstack, s.nstack);
    678 	sblog << ", alu groups:" ; print_diff(alu_groups, s.alu_groups);
    679 	sblog << ", alu clauses: " ; print_diff(alu_clauses, s.alu_clauses);
    680 	sblog << ", alu:" ; print_diff(alu, s.alu);
    681 	sblog << ", fetch:" ; print_diff(fetch, s.fetch);
    682 	sblog << ", fetch clauses:" ; print_diff(fetch_clauses, s.fetch_clauses);
    683 	sblog << ", cf:" ; print_diff(cf, s.cf);
    684 	sblog << "\n";
    685 }
    686 
    687 } // namespace r600_sb
    688