Home | History | Annotate | Download | only in sb
      1 /*
      2  * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *      Vadim Girlin
     25  */
     26 
     27 #define FBC_DEBUG 0
     28 
     29 #if FBC_DEBUG
     30 #define FBC_DUMP(q) do { q } while (0)
     31 #else
     32 #define FBC_DUMP(q)
     33 #endif
     34 
     35 #include "sb_bc.h"
     36 #include "sb_shader.h"
     37 #include "sb_pass.h"
     38 
     39 namespace r600_sb {
     40 
     41 void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) {
     42 
     43 	alu_group_node *g = sh.create_alu_group();
     44 	alu_node *a = sh.create_alu();
     45 
     46 	a->bc.set_op(ALU_OP0_NOP);
     47 	a->bc.last = 1;
     48 
     49 	g->push_back(a);
     50 	b4->insert_before(g);
     51 }
     52 
     53 int bc_finalizer::run() {
     54 
     55 	run_on(sh.root);
     56 
     57 	regions_vec &rv = sh.get_regions();
     58 	for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
     59 			++I) {
     60 		region_node *r = *I;
     61 
     62 		assert(r);
     63 
     64 		bool loop = r->is_loop();
     65 
     66 		if (loop)
     67 			finalize_loop(r);
     68 		else
     69 			finalize_if(r);
     70 
     71 		r->expand();
     72 	}
     73 
     74 	cf_peephole();
     75 
     76 	// workaround for some problems on r6xx/7xx
     77 	// add ALU NOP to each vertex shader
     78 	if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
     79 		cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
     80 
     81 		alu_group_node *g = sh.create_alu_group();
     82 
     83 		alu_node *a = sh.create_alu();
     84 		a->bc.set_op(ALU_OP0_NOP);
     85 		a->bc.last = 1;
     86 
     87 		g->push_back(a);
     88 		c->push_back(g);
     89 
     90 		sh.root->push_back(c);
     91 
     92 		c = sh.create_cf(CF_OP_NOP);
     93 		sh.root->push_back(c);
     94 
     95 		last_cf = c;
     96 	}
     97 
     98 	if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) {
     99 		last_cf = sh.create_cf(CF_OP_NOP);
    100 		sh.root->push_back(last_cf);
    101 	}
    102 
    103 	if (ctx.is_cayman()) {
    104 		if (!last_cf) {
    105 			cf_node *c = sh.create_cf(CF_OP_CF_END);
    106 			sh.root->push_back(c);
    107 		} else
    108 			last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
    109 	} else
    110 		last_cf->bc.end_of_program = 1;
    111 
    112 	for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
    113 		cf_node *le = last_export[t];
    114 		if (le)
    115 			le->bc.set_op(CF_OP_EXPORT_DONE);
    116 	}
    117 
    118 	sh.ngpr = ngpr;
    119 	sh.nstack = nstack;
    120 	return 0;
    121 }
    122 
    123 void bc_finalizer::finalize_loop(region_node* r) {
    124 
    125 	update_nstack(r);
    126 
    127 	cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
    128 	cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
    129 
    130 	// Update last_cf, but don't overwrite it if it's outside the current loop nest since
    131 	// it may point to a cf that is later in program order.
    132 	// The single parent level check is sufficient since finalize_loop() is processed in
    133 	// reverse order from innermost to outermost loop nest level.
    134 	if (!last_cf || last_cf->get_parent_region() == r) {
    135 		last_cf = loop_end;
    136 	}
    137 
    138 	loop_start->jump_after(loop_end);
    139 	loop_end->jump_after(loop_start);
    140 
    141 	for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
    142 			I != E; ++I) {
    143 		depart_node *dep = *I;
    144 		cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
    145 		loop_break->jump(loop_end);
    146 		dep->push_back(loop_break);
    147 		dep->expand();
    148 	}
    149 
    150 	// FIXME produces unnecessary LOOP_CONTINUE
    151 	for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end();
    152 			I != E; ++I) {
    153 		repeat_node *rep = *I;
    154 		if (!(rep->parent == r && rep->prev == NULL)) {
    155 			cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE);
    156 			loop_cont->jump(loop_end);
    157 			rep->push_back(loop_cont);
    158 		}
    159 		rep->expand();
    160 	}
    161 
    162 	r->push_front(loop_start);
    163 	r->push_back(loop_end);
    164 }
    165 
    166 void bc_finalizer::finalize_if(region_node* r) {
    167 
    168 	update_nstack(r);
    169 
    170 	// expecting the following control flow structure here:
    171 	//   - region
    172 	//     {
    173 	//       - depart/repeat 1 (it may be depart/repeat for some outer region)
    174 	//         {
    175 	//           - if
    176 	//             {
    177 	//               - depart/repeat 2 (possibly for outer region)
    178 	//                 {
    179 	//                   - some optional code
    180 	//                 }
    181 	//             }
    182 	//           - optional <else> code> ...
    183 	//         }
    184 	//     }
    185 
    186 	container_node *repdep1 = static_cast<container_node*>(r->first);
    187 	assert(repdep1->is_depart() || repdep1->is_repeat());
    188 
    189 	if_node *n_if = static_cast<if_node*>(repdep1->first);
    190 
    191 	if (n_if) {
    192 
    193 
    194 		assert(n_if->is_if());
    195 
    196 		container_node *repdep2 = static_cast<container_node*>(n_if->first);
    197 		assert(repdep2->is_depart() || repdep2->is_repeat());
    198 
    199 		cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
    200 		cf_node *if_pop = sh.create_cf(CF_OP_POP);
    201 
    202 		if (!last_cf || last_cf->get_parent_region() == r) {
    203 			last_cf = if_pop;
    204 		}
    205 		if_pop->bc.pop_count = 1;
    206 		if_pop->jump_after(if_pop);
    207 
    208 		r->push_front(if_jump);
    209 		r->push_back(if_pop);
    210 
    211 		bool has_else = n_if->next;
    212 
    213 		if (has_else) {
    214 			cf_node *nelse = sh.create_cf(CF_OP_ELSE);
    215 			n_if->insert_after(nelse);
    216 			if_jump->jump(nelse);
    217 			nelse->jump_after(if_pop);
    218 			nelse->bc.pop_count = 1;
    219 
    220 		} else {
    221 			if_jump->jump_after(if_pop);
    222 			if_jump->bc.pop_count = 1;
    223 		}
    224 
    225 		n_if->expand();
    226 	}
    227 
    228 	for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
    229 			I != E; ++I) {
    230 		(*I)->expand();
    231 	}
    232 	r->departs.clear();
    233 	assert(r->repeats.empty());
    234 }
    235 
    236 void bc_finalizer::run_on(container_node* c) {
    237 	node *prev_node = NULL;
    238 	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
    239 		node *n = *I;
    240 
    241 		if (n->is_alu_group()) {
    242 			finalize_alu_group(static_cast<alu_group_node*>(n), prev_node);
    243 		} else {
    244 			if (n->is_alu_clause()) {
    245 				cf_node *c = static_cast<cf_node*>(n);
    246 
    247 				if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) {
    248 					if (ctx.stack_workaround_8xx) {
    249 						region_node *r = c->get_parent_region();
    250 						if (r) {
    251 							unsigned ifs, loops;
    252 							unsigned elems = get_stack_depth(r, loops, ifs);
    253 							unsigned dmod1 = elems % ctx.stack_entry_size;
    254 							unsigned dmod2 = (elems + 1) % ctx.stack_entry_size;
    255 
    256 							if (elems && (!dmod1 || !dmod2))
    257 								c->flags |= NF_ALU_STACK_WORKAROUND;
    258 						}
    259 					} else if (ctx.stack_workaround_9xx) {
    260 						region_node *r = c->get_parent_region();
    261 						if (r) {
    262 							unsigned ifs, loops;
    263 							get_stack_depth(r, loops, ifs);
    264 							if (loops >= 2)
    265 								c->flags |= NF_ALU_STACK_WORKAROUND;
    266 						}
    267 					}
    268 				}
    269 			} else if (n->is_fetch_inst()) {
    270 				finalize_fetch(static_cast<fetch_node*>(n));
    271 			} else if (n->is_cf_inst()) {
    272 				finalize_cf(static_cast<cf_node*>(n));
    273 			}
    274 			if (n->is_container())
    275 				run_on(static_cast<container_node*>(n));
    276 		}
    277 		prev_node = n;
    278 	}
    279 }
    280 
    281 void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
    282 
    283 	alu_node *last = NULL;
    284 	alu_group_node *prev_g = NULL;
    285 	bool add_nop = false;
    286 	if (prev_node && prev_node->is_alu_group()) {
    287 		prev_g = static_cast<alu_group_node*>(prev_node);
    288 	}
    289 
    290 	for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
    291 		alu_node *n = static_cast<alu_node*>(*I);
    292 		unsigned slot = n->bc.slot;
    293 		value *d = n->dst.empty() ? NULL : n->dst[0];
    294 
    295 		if (d && d->is_special_reg()) {
    296 			assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit());
    297 			d = NULL;
    298 		}
    299 
    300 		sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0);
    301 
    302 		if (d) {
    303 			assert(fdst.chan() == slot || slot == SLOT_TRANS);
    304 		}
    305 
    306 		if (!(n->bc.op_ptr->flags & AF_MOVA && ctx.is_cayman()))
    307 			n->bc.dst_gpr = fdst.sel();
    308 		n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
    309 
    310 
    311 		if (d && d->is_rel() && d->rel && !d->rel->is_const()) {
    312 			n->bc.dst_rel = 1;
    313 			update_ngpr(d->array->gpr.sel() + d->array->array_size -1);
    314 		} else {
    315 			n->bc.dst_rel = 0;
    316 		}
    317 
    318 		n->bc.write_mask = d != NULL;
    319 		n->bc.last = 0;
    320 
    321 		if (n->bc.op_ptr->flags & AF_PRED) {
    322 			n->bc.update_pred = (n->dst[1] != NULL);
    323 			n->bc.update_exec_mask = (n->dst[2] != NULL);
    324 		}
    325 
    326 		// FIXME handle predication here
    327 		n->bc.pred_sel = PRED_SEL_OFF;
    328 
    329 		update_ngpr(n->bc.dst_gpr);
    330 
    331 		add_nop |= finalize_alu_src(g, n, prev_g);
    332 
    333 		last = n;
    334 	}
    335 
    336 	if (add_nop) {
    337 		if (sh.get_ctx().r6xx_gpr_index_workaround) {
    338 			insert_rv6xx_load_ar_workaround(g);
    339 		}
    340 	}
    341 	last->bc.last = 1;
    342 }
    343 
    344 bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) {
    345 	vvec &sv = a->src;
    346 	bool add_nop = false;
    347 	FBC_DUMP(
    348 		sblog << "finalize_alu_src: ";
    349 		dump::dump_op(a);
    350 		sblog << "\n";
    351 	);
    352 
    353 	unsigned si = 0;
    354 
    355 	for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) {
    356 		value *v = *I;
    357 		assert(v);
    358 
    359 		bc_alu_src &src = a->bc.src[si];
    360 		sel_chan sc;
    361 		src.rel = 0;
    362 
    363 		sel_chan gpr;
    364 
    365 		switch (v->kind) {
    366 		case VLK_REL_REG:
    367 			sc = v->get_final_gpr();
    368 			src.sel = sc.sel();
    369 			src.chan = sc.chan();
    370 			if (!v->rel->is_const()) {
    371 				src.rel = 1;
    372 				update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
    373 				if (prev && !add_nop) {
    374 					for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
    375 						alu_node *pn = static_cast<alu_node*>(*pI);
    376 						if (pn->bc.dst_gpr == src.sel) {
    377 							add_nop = true;
    378 							break;
    379 						}
    380 					}
    381 				}
    382 			} else
    383 				src.rel = 0;
    384 
    385 			break;
    386 		case VLK_REG:
    387 			gpr = v->get_final_gpr();
    388 			src.sel = gpr.sel();
    389 			src.chan = gpr.chan();
    390 			update_ngpr(src.sel);
    391 			break;
    392 		case VLK_TEMP:
    393 			src.sel = v->gpr.sel();
    394 			src.chan = v->gpr.chan();
    395 			update_ngpr(src.sel);
    396 			break;
    397 		case VLK_UNDEF:
    398 		case VLK_CONST: {
    399 			literal lv = v->literal_value;
    400 			src.chan = 0;
    401 
    402 			if (lv == literal(0))
    403 				src.sel = ALU_SRC_0;
    404 			else if (lv == literal(0.5f))
    405 				src.sel = ALU_SRC_0_5;
    406 			else if (lv == literal(1.0f))
    407 				src.sel = ALU_SRC_1;
    408 			else if (lv == literal(1))
    409 				src.sel = ALU_SRC_1_INT;
    410 			else if (lv == literal(-1))
    411 				src.sel = ALU_SRC_M_1_INT;
    412 			else {
    413 				src.sel = ALU_SRC_LITERAL;
    414 				src.chan = g->literal_chan(lv);
    415 				src.value = lv;
    416 			}
    417 			break;
    418 		}
    419 		case VLK_KCACHE: {
    420 			cf_node *clause = static_cast<cf_node*>(g->parent);
    421 			assert(clause->is_alu_clause());
    422 			sel_chan k = translate_kcache(clause, v);
    423 
    424 			assert(k && "kcache translation failed");
    425 
    426 			src.sel = k.sel();
    427 			src.chan = k.chan();
    428 			break;
    429 		}
    430 		case VLK_PARAM:
    431 		case VLK_SPECIAL_CONST:
    432 			src.sel = v->select.sel();
    433 			src.chan = v->select.chan();
    434 			break;
    435 		default:
    436 			assert(!"unknown value kind");
    437 			break;
    438 		}
    439 		if (prev && !add_nop) {
    440 			for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
    441 				alu_node *pn = static_cast<alu_node*>(*pI);
    442 				if (pn->bc.dst_rel) {
    443 					if (pn->bc.dst_gpr == src.sel) {
    444 						add_nop = true;
    445 						break;
    446 					}
    447 				}
    448 			}
    449 		}
    450 	}
    451 
    452 	while (si < 3) {
    453 		a->bc.src[si++].sel = 0;
    454 	}
    455 	return add_nop;
    456 }
    457 
    458 void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
    459 {
    460 	int reg = -1;
    461 
    462 	for (unsigned chan = 0; chan < 4; ++chan) {
    463 
    464 		dst.bc.dst_sel[chan] = SEL_MASK;
    465 
    466 		unsigned sel = SEL_MASK;
    467 
    468 		value *v = src.src[arg_start + chan];
    469 
    470 		if (!v || v->is_undef()) {
    471 			sel = SEL_MASK;
    472 		} else if (v->is_const()) {
    473 			literal l = v->literal_value;
    474 			if (l == literal(0))
    475 				sel = SEL_0;
    476 			else if (l == literal(1.0f))
    477 				sel = SEL_1;
    478 			else {
    479 				sblog << "invalid fetch constant operand  " << chan << " ";
    480 				dump::dump_op(&src);
    481 				sblog << "\n";
    482 				abort();
    483 			}
    484 
    485 		} else if (v->is_any_gpr()) {
    486 			unsigned vreg = v->gpr.sel();
    487 			unsigned vchan = v->gpr.chan();
    488 
    489 			if (reg == -1)
    490 				reg = vreg;
    491 			else if ((unsigned)reg != vreg) {
    492 				sblog << "invalid fetch source operand  " << chan << " ";
    493 				dump::dump_op(&src);
    494 				sblog << "\n";
    495 				abort();
    496 			}
    497 
    498 			sel = vchan;
    499 
    500 		} else {
    501 			sblog << "invalid fetch source operand  " << chan << " ";
    502 			dump::dump_op(&src);
    503 			sblog << "\n";
    504 			abort();
    505 		}
    506 
    507 		dst.bc.src_sel[chan] = sel;
    508 	}
    509 
    510 	if (reg >= 0)
    511 		update_ngpr(reg);
    512 
    513 	dst.bc.src_gpr = reg >= 0 ? reg : 0;
    514 }
    515 
    516 void bc_finalizer::emit_set_grad(fetch_node* f) {
    517 
    518 	assert(f->src.size() == 12 || f->src.size() == 13);
    519 	unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
    520 
    521 	unsigned arg_start = 0;
    522 
    523 	for (unsigned op = 0; op < 2; ++op) {
    524 		fetch_node *n = sh.create_fetch();
    525 		n->bc.set_op(ops[op]);
    526 
    527 		arg_start += 4;
    528 
    529 		copy_fetch_src(*n, *f, arg_start);
    530 
    531 		f->insert_before(n);
    532 	}
    533 
    534 }
    535 
    536 void bc_finalizer::emit_set_texture_offsets(fetch_node &f) {
    537 	assert(f.src.size() == 8);
    538 
    539 	fetch_node *n = sh.create_fetch();
    540 
    541 	n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS);
    542 
    543 	copy_fetch_src(*n, f, 4);
    544 
    545 	f.insert_before(n);
    546 }
    547 
    548 void bc_finalizer::finalize_fetch(fetch_node* f) {
    549 
    550 	int reg = -1;
    551 
    552 	// src
    553 
    554 	unsigned src_count = 4;
    555 
    556 	unsigned flags = f->bc.op_ptr->flags;
    557 
    558 	if (flags & FF_VTX) {
    559 		src_count = 1;
    560 	} else if (flags & FF_USEGRAD) {
    561 		emit_set_grad(f);
    562 	} else if (flags & FF_USE_TEXTURE_OFFSETS) {
    563 		emit_set_texture_offsets(*f);
    564 	}
    565 
    566 	for (unsigned chan = 0; chan < src_count; ++chan) {
    567 
    568 		unsigned sel = f->bc.src_sel[chan];
    569 
    570 		if (sel > SEL_W)
    571 			continue;
    572 
    573 		value *v = f->src[chan];
    574 
    575 		if (v->is_undef()) {
    576 			sel = SEL_MASK;
    577 		} else if (v->is_const()) {
    578 			literal l = v->literal_value;
    579 			if (l == literal(0))
    580 				sel = SEL_0;
    581 			else if (l == literal(1.0f))
    582 				sel = SEL_1;
    583 			else {
    584 				sblog << "invalid fetch constant operand  " << chan << " ";
    585 				dump::dump_op(f);
    586 				sblog << "\n";
    587 				abort();
    588 			}
    589 
    590 		} else if (v->is_any_gpr()) {
    591 			unsigned vreg = v->gpr.sel();
    592 			unsigned vchan = v->gpr.chan();
    593 
    594 			if (reg == -1)
    595 				reg = vreg;
    596 			else if ((unsigned)reg != vreg) {
    597 				sblog << "invalid fetch source operand  " << chan << " ";
    598 				dump::dump_op(f);
    599 				sblog << "\n";
    600 				abort();
    601 			}
    602 
    603 			sel = vchan;
    604 
    605 		} else {
    606 			sblog << "invalid fetch source operand  " << chan << " ";
    607 			dump::dump_op(f);
    608 			sblog << "\n";
    609 			abort();
    610 		}
    611 
    612 		f->bc.src_sel[chan] = sel;
    613 	}
    614 
    615 	if (reg >= 0)
    616 		update_ngpr(reg);
    617 
    618 	f->bc.src_gpr = reg >= 0 ? reg : 0;
    619 
    620 	// dst
    621 
    622 	reg = -1;
    623 
    624 	unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK};
    625 
    626 	for (unsigned chan = 0; chan < 4; ++chan) {
    627 
    628 		unsigned sel = f->bc.dst_sel[chan];
    629 
    630 		if (sel == SEL_MASK)
    631 			continue;
    632 
    633 		value *v = f->dst[chan];
    634 		if (!v)
    635 			continue;
    636 
    637 		if (v->is_any_gpr()) {
    638 			unsigned vreg = v->gpr.sel();
    639 			unsigned vchan = v->gpr.chan();
    640 
    641 			if (reg == -1)
    642 				reg = vreg;
    643 			else if ((unsigned)reg != vreg) {
    644 				sblog << "invalid fetch dst operand  " << chan << " ";
    645 				dump::dump_op(f);
    646 				sblog << "\n";
    647 				abort();
    648 			}
    649 
    650 			dst_swz[vchan] = sel;
    651 
    652 		} else {
    653 			sblog << "invalid fetch dst operand  " << chan << " ";
    654 			dump::dump_op(f);
    655 			sblog << "\n";
    656 			abort();
    657 		}
    658 
    659 	}
    660 
    661 	for (unsigned i = 0; i < 4; ++i)
    662 		f->bc.dst_sel[i] = dst_swz[i];
    663 
    664 	assert(reg >= 0);
    665 
    666 	if (reg >= 0)
    667 		update_ngpr(reg);
    668 
    669 	f->bc.dst_gpr = reg >= 0 ? reg : 0;
    670 }
    671 
    672 void bc_finalizer::finalize_cf(cf_node* c) {
    673 
    674 	unsigned flags = c->bc.op_ptr->flags;
    675 
    676 	c->bc.end_of_program = 0;
    677 	last_cf = c;
    678 
    679 	if (flags & CF_EXP) {
    680 		c->bc.set_op(CF_OP_EXPORT);
    681 		last_export[c->bc.type] = c;
    682 
    683 		int reg = -1;
    684 
    685 		for (unsigned chan = 0; chan < 4; ++chan) {
    686 
    687 			unsigned sel = c->bc.sel[chan];
    688 
    689 			if (sel > SEL_W)
    690 				continue;
    691 
    692 			value *v = c->src[chan];
    693 
    694 			if (v->is_undef()) {
    695 				sel = SEL_MASK;
    696 			} else if (v->is_const()) {
    697 				literal l = v->literal_value;
    698 				if (l == literal(0))
    699 					sel = SEL_0;
    700 				else if (l == literal(1.0f))
    701 					sel = SEL_1;
    702 				else {
    703 					sblog << "invalid export constant operand  " << chan << " ";
    704 					dump::dump_op(c);
    705 					sblog << "\n";
    706 					abort();
    707 				}
    708 
    709 			} else if (v->is_any_gpr()) {
    710 				unsigned vreg = v->gpr.sel();
    711 				unsigned vchan = v->gpr.chan();
    712 
    713 				if (reg == -1)
    714 					reg = vreg;
    715 				else if ((unsigned)reg != vreg) {
    716 					sblog << "invalid export source operand  " << chan << " ";
    717 					dump::dump_op(c);
    718 					sblog << "\n";
    719 					abort();
    720 				}
    721 
    722 				sel = vchan;
    723 
    724 			} else {
    725 				sblog << "invalid export source operand  " << chan << " ";
    726 				dump::dump_op(c);
    727 				sblog << "\n";
    728 				abort();
    729 			}
    730 
    731 			c->bc.sel[chan] = sel;
    732 		}
    733 
    734 		if (reg >= 0)
    735 			update_ngpr(reg);
    736 
    737 		c->bc.rw_gpr = reg >= 0 ? reg : 0;
    738 
    739 	} else if (flags & CF_MEM) {
    740 
    741 		int reg = -1;
    742 		unsigned mask = 0;
    743 
    744 		for (unsigned chan = 0; chan < 4; ++chan) {
    745 			value *v = c->src[chan];
    746 			if (!v || v->is_undef())
    747 				continue;
    748 
    749 			if (!v->is_any_gpr() || v->gpr.chan() != chan) {
    750 				sblog << "invalid source operand  " << chan << " ";
    751 				dump::dump_op(c);
    752 				sblog << "\n";
    753 				abort();
    754 			}
    755 			unsigned vreg = v->gpr.sel();
    756 			if (reg == -1)
    757 				reg = vreg;
    758 			else if ((unsigned)reg != vreg) {
    759 				sblog << "invalid source operand  " << chan << " ";
    760 				dump::dump_op(c);
    761 				sblog << "\n";
    762 				abort();
    763 			}
    764 
    765 			mask |= (1 << chan);
    766 		}
    767 
    768 		if (reg >= 0)
    769 			update_ngpr(reg);
    770 
    771 		c->bc.rw_gpr = reg >= 0 ? reg : 0;
    772 		c->bc.comp_mask = mask;
    773 
    774 		if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) {
    775 
    776 			reg = -1;
    777 
    778 			for (unsigned chan = 0; chan < 4; ++chan) {
    779 				value *v = c->src[4 + chan];
    780 				if (!v || v->is_undef())
    781 					continue;
    782 
    783 				if (!v->is_any_gpr() || v->gpr.chan() != chan) {
    784 					sblog << "invalid source operand  " << chan << " ";
    785 					dump::dump_op(c);
    786 					sblog << "\n";
    787 					abort();
    788 				}
    789 				unsigned vreg = v->gpr.sel();
    790 				if (reg == -1)
    791 					reg = vreg;
    792 				else if ((unsigned)reg != vreg) {
    793 					sblog << "invalid source operand  " << chan << " ";
    794 					dump::dump_op(c);
    795 					sblog << "\n";
    796 					abort();
    797 				}
    798 			}
    799 
    800 			assert(reg >= 0);
    801 
    802 			if (reg >= 0)
    803 				update_ngpr(reg);
    804 
    805 			c->bc.index_gpr = reg >= 0 ? reg : 0;
    806 		}
    807 	} else if (flags & CF_CALL) {
    808 		update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1);
    809 	}
    810 }
    811 
    812 sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
    813 	unsigned sel = v->select.kcache_sel();
    814 	unsigned bank = v->select.kcache_bank();
    815 	unsigned chan = v->select.chan();
    816 	static const unsigned kc_base[] = {128, 160, 256, 288};
    817 
    818 	sel &= 4095;
    819 
    820 	unsigned line = sel >> 4;
    821 
    822 	for (unsigned k = 0; k < 4; ++k) {
    823 		bc_kcache &kc = alu->bc.kc[k];
    824 
    825 		if (kc.mode == KC_LOCK_NONE)
    826 			break;
    827 
    828 		if (kc.bank == bank && (kc.addr == line ||
    829 				(kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) {
    830 
    831 			sel = kc_base[k] + (sel - (kc.addr << 4));
    832 
    833 			return sel_chan(sel, chan);
    834 		}
    835 	}
    836 
    837 	assert(!"kcache translation error");
    838 	return 0;
    839 }
    840 
    841 void bc_finalizer::update_ngpr(unsigned gpr) {
    842 	if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr)
    843 		ngpr = gpr + 1;
    844 }
    845 
    846 unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
    847                                            unsigned &ifs, unsigned add) {
    848 	unsigned stack_elements = add;
    849 	bool has_non_wqm_push = (add != 0);
    850 	region_node *r = n->is_region() ?
    851 			static_cast<region_node*>(n) : n->get_parent_region();
    852 
    853 	loops = 0;
    854 	ifs = 0;
    855 
    856 	while (r) {
    857 		if (r->is_loop()) {
    858 			++loops;
    859 		} else {
    860 			++ifs;
    861 			has_non_wqm_push = true;
    862 		}
    863 		r = r->get_parent_region();
    864 	}
    865 	stack_elements += (loops * ctx.stack_entry_size) + ifs;
    866 
    867 	// reserve additional elements in some cases
    868 	switch (ctx.hw_class) {
    869 	case HW_CLASS_R600:
    870 	case HW_CLASS_R700:
    871 		// If any non-WQM push is invoked, 2 elements should be reserved.
    872 		if (has_non_wqm_push)
    873 			stack_elements += 2;
    874 		break;
    875 	case HW_CLASS_CAYMAN:
    876 		// If any stack operation is invoked, 2 elements should be reserved
    877 		if (stack_elements)
    878 			stack_elements += 2;
    879 		break;
    880 	case HW_CLASS_EVERGREEN:
    881 		// According to the docs we need to reserve 1 element for each of the
    882 		// following cases:
    883 		//   1) non-WQM push is used with WQM/LOOP frames on stack
    884 		//   2) ALU_ELSE_AFTER is used at the point of max stack usage
    885 		// NOTE:
    886 		// It was found that the conditions above are not sufficient, there are
    887 		// other cases where we also need to reserve stack space, that's why
    888 		// we always reserve 1 stack element if we have non-WQM push on stack.
    889 		// Condition 2 is ignored for now because we don't use this instruction.
    890 		if (has_non_wqm_push)
    891 			++stack_elements;
    892 		break;
    893 	case HW_CLASS_UNKNOWN:
    894 		assert(0);
    895 	}
    896 	return stack_elements;
    897 }
    898 
    899 void bc_finalizer::update_nstack(region_node* r, unsigned add) {
    900 	unsigned loops = 0;
    901 	unsigned ifs = 0;
    902 	unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add;
    903 
    904 	// XXX all chips expect this value to be computed using 4 as entry size,
    905 	// not the real entry size
    906 	unsigned stack_entries = (elems + 3) >> 2;
    907 
    908 	if (nstack < stack_entries)
    909 		nstack = stack_entries;
    910 }
    911 
    912 void bc_finalizer::cf_peephole() {
    913 	if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) {
    914 		for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
    915 				I = N) {
    916 			N = I; ++N;
    917 			cf_node *c = static_cast<cf_node*>(*I);
    918 
    919 			if (c->bc.op == CF_OP_ALU_PUSH_BEFORE &&
    920 					(c->flags & NF_ALU_STACK_WORKAROUND)) {
    921 				cf_node *push = sh.create_cf(CF_OP_PUSH);
    922 				c->insert_before(push);
    923 				push->jump(c);
    924 				c->bc.set_op(CF_OP_ALU);
    925 			}
    926 		}
    927 	}
    928 
    929 	for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
    930 			I = N) {
    931 		N = I; ++N;
    932 
    933 		cf_node *c = static_cast<cf_node*>(*I);
    934 
    935 		if (c->jump_after_target) {
    936 			c->jump_target = static_cast<cf_node*>(c->jump_target->next);
    937 			c->jump_after_target = false;
    938 		}
    939 
    940 		if (c->is_cf_op(CF_OP_POP)) {
    941 			node *p = c->prev;
    942 			if (p->is_alu_clause()) {
    943 				cf_node *a = static_cast<cf_node*>(p);
    944 
    945 				if (a->bc.op == CF_OP_ALU) {
    946 					a->bc.set_op(CF_OP_ALU_POP_AFTER);
    947 					c->remove();
    948 				}
    949 			}
    950 		} else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) {
    951 			// if JUMP is immediately followed by its jump target,
    952 			// then JUMP is useless and we can eliminate it
    953 			c->remove();
    954 		}
    955 	}
    956 }
    957 
    958 } // namespace r600_sb
    959