Home | History | Annotate | Download | only in sb
      1 /*
      2  * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *      Vadim Girlin
     25  */
     26 
     27 #define FBC_DEBUG 0
     28 
     29 #if FBC_DEBUG
     30 #define FBC_DUMP(q) do { q } while (0)
     31 #else
     32 #define FBC_DUMP(q)
     33 #endif
     34 
     35 #include "sb_bc.h"
     36 #include "sb_shader.h"
     37 #include "sb_pass.h"
     38 
     39 namespace r600_sb {
     40 
     41 void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) {
     42 
     43 	alu_group_node *g = sh.create_alu_group();
     44 	alu_node *a = sh.create_alu();
     45 
     46 	a->bc.set_op(ALU_OP0_NOP);
     47 	a->bc.last = 1;
     48 
     49 	g->push_back(a);
     50 	b4->insert_before(g);
     51 }
     52 
     53 int bc_finalizer::run() {
     54 
     55 	run_on(sh.root);
     56 
     57 	regions_vec &rv = sh.get_regions();
     58 	for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E;
     59 			++I) {
     60 		region_node *r = *I;
     61 
     62 		assert(r);
     63 
     64 		bool loop = r->is_loop();
     65 
     66 		if (loop)
     67 			finalize_loop(r);
     68 		else
     69 			finalize_if(r);
     70 
     71 		r->expand();
     72 	}
     73 
     74 	cf_peephole();
     75 
     76 	// workaround for some problems on r6xx/7xx
     77 	// add ALU NOP to each vertex shader
     78 	if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) {
     79 		cf_node *c = sh.create_clause(NST_ALU_CLAUSE);
     80 
     81 		alu_group_node *g = sh.create_alu_group();
     82 
     83 		alu_node *a = sh.create_alu();
     84 		a->bc.set_op(ALU_OP0_NOP);
     85 		a->bc.last = 1;
     86 
     87 		g->push_back(a);
     88 		c->push_back(g);
     89 
     90 		sh.root->push_back(c);
     91 
     92 		c = sh.create_cf(CF_OP_NOP);
     93 		sh.root->push_back(c);
     94 
     95 		last_cf = c;
     96 	}
     97 
     98 	if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) {
     99 		last_cf = sh.create_cf(CF_OP_NOP);
    100 		sh.root->push_back(last_cf);
    101 	}
    102 
    103 	if (ctx.is_cayman()) {
    104 		if (!last_cf) {
    105 			cf_node *c = sh.create_cf(CF_OP_CF_END);
    106 			sh.root->push_back(c);
    107 		} else
    108 			last_cf->insert_after(sh.create_cf(CF_OP_CF_END));
    109 	} else
    110 		last_cf->bc.end_of_program = 1;
    111 
    112 	for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) {
    113 		cf_node *le = last_export[t];
    114 		if (le)
    115 			le->bc.set_op(CF_OP_EXPORT_DONE);
    116 	}
    117 
    118 	sh.ngpr = ngpr;
    119 	sh.nstack = nstack;
    120 	return 0;
    121 }
    122 
    123 void bc_finalizer::finalize_loop(region_node* r) {
    124 
    125 	update_nstack(r);
    126 
    127 	cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10);
    128 	cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END);
    129 
    130 	// Update last_cf, but don't overwrite it if it's outside the current loop nest since
    131 	// it may point to a cf that is later in program order.
    132 	// The single parent level check is sufficient since finalize_loop() is processed in
    133 	// reverse order from innermost to outermost loop nest level.
    134 	if (!last_cf || last_cf->get_parent_region() == r) {
    135 		last_cf = loop_end;
    136 	}
    137 
    138 	loop_start->jump_after(loop_end);
    139 	loop_end->jump_after(loop_start);
    140 
    141 	for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
    142 			I != E; ++I) {
    143 		depart_node *dep = *I;
    144 		cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK);
    145 		loop_break->jump(loop_end);
    146 		dep->push_back(loop_break);
    147 		dep->expand();
    148 	}
    149 
    150 	// FIXME produces unnecessary LOOP_CONTINUE
    151 	for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end();
    152 			I != E; ++I) {
    153 		repeat_node *rep = *I;
    154 		if (!(rep->parent == r && rep->prev == NULL)) {
    155 			cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE);
    156 			loop_cont->jump(loop_end);
    157 			rep->push_back(loop_cont);
    158 		}
    159 		rep->expand();
    160 	}
    161 
    162 	r->push_front(loop_start);
    163 	r->push_back(loop_end);
    164 }
    165 
    166 void bc_finalizer::finalize_if(region_node* r) {
    167 
    168 	update_nstack(r);
    169 
    170 	// expecting the following control flow structure here:
    171 	//   - region
    172 	//     {
    173 	//       - depart/repeat 1 (it may be depart/repeat for some outer region)
    174 	//         {
    175 	//           - if
    176 	//             {
    177 	//               - depart/repeat 2 (possibly for outer region)
    178 	//                 {
    179 	//                   - some optional code
    180 	//                 }
    181 	//             }
    182 	//           - optional <else> code> ...
    183 	//         }
    184 	//     }
    185 
    186 	container_node *repdep1 = static_cast<container_node*>(r->first);
    187 	assert(repdep1->is_depart() || repdep1->is_repeat());
    188 
    189 	if_node *n_if = static_cast<if_node*>(repdep1->first);
    190 
    191 	if (n_if) {
    192 
    193 
    194 		assert(n_if->is_if());
    195 
    196 		container_node *repdep2 = static_cast<container_node*>(n_if->first);
    197 		assert(repdep2->is_depart() || repdep2->is_repeat());
    198 
    199 		cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
    200 		cf_node *if_pop = sh.create_cf(CF_OP_POP);
    201 
    202 		if (!last_cf || last_cf->get_parent_region() == r) {
    203 			last_cf = if_pop;
    204 		}
    205 		if_pop->bc.pop_count = 1;
    206 		if_pop->jump_after(if_pop);
    207 
    208 		r->push_front(if_jump);
    209 		r->push_back(if_pop);
    210 
    211 		/* the depart/repeat 1 is actually part of the "else" code.
    212 		 * if it's a depart for an outer loop region it will want to
    213 		 * insert a LOOP_BREAK or LOOP_CONTINUE in here, so we need
    214 		 * to emit the else clause.
    215 		 */
    216 		bool has_else = n_if->next;
    217 
    218 		if (repdep1->is_depart()) {
    219 			depart_node *dep1 = static_cast<depart_node*>(repdep1);
    220 			if (dep1->target != r && dep1->target->is_loop())
    221 				has_else = true;
    222 		}
    223 
    224 		if (repdep1->is_repeat()) {
    225 			repeat_node *rep1 = static_cast<repeat_node*>(repdep1);
    226 			if (rep1->target != r && rep1->target->is_loop())
    227 				has_else = true;
    228 		}
    229 
    230 		if (has_else) {
    231 			cf_node *nelse = sh.create_cf(CF_OP_ELSE);
    232 			n_if->insert_after(nelse);
    233 			if_jump->jump(nelse);
    234 			nelse->jump_after(if_pop);
    235 			nelse->bc.pop_count = 1;
    236 
    237 		} else {
    238 			if_jump->jump_after(if_pop);
    239 			if_jump->bc.pop_count = 1;
    240 		}
    241 
    242 		n_if->expand();
    243 	}
    244 
    245 	for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end();
    246 			I != E; ++I) {
    247 		(*I)->expand();
    248 	}
    249 	r->departs.clear();
    250 	assert(r->repeats.empty());
    251 }
    252 
    253 void bc_finalizer::run_on(container_node* c) {
    254 	node *prev_node = NULL;
    255 	for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
    256 		node *n = *I;
    257 
    258 		if (n->is_alu_group()) {
    259 			finalize_alu_group(static_cast<alu_group_node*>(n), prev_node);
    260 		} else {
    261 			if (n->is_alu_clause()) {
    262 				cf_node *c = static_cast<cf_node*>(n);
    263 
    264 				if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) {
    265 					if (ctx.stack_workaround_8xx) {
    266 						region_node *r = c->get_parent_region();
    267 						if (r) {
    268 							unsigned ifs, loops;
    269 							unsigned elems = get_stack_depth(r, loops, ifs);
    270 							unsigned dmod1 = elems % ctx.stack_entry_size;
    271 							unsigned dmod2 = (elems + 1) % ctx.stack_entry_size;
    272 
    273 							if (elems && (!dmod1 || !dmod2))
    274 								c->flags |= NF_ALU_STACK_WORKAROUND;
    275 						}
    276 					} else if (ctx.stack_workaround_9xx) {
    277 						region_node *r = c->get_parent_region();
    278 						if (r) {
    279 							unsigned ifs, loops;
    280 							get_stack_depth(r, loops, ifs);
    281 							if (loops >= 2)
    282 								c->flags |= NF_ALU_STACK_WORKAROUND;
    283 						}
    284 					}
    285 				}
    286 				last_cf = c;
    287 			} else if (n->is_fetch_inst()) {
    288 				finalize_fetch(static_cast<fetch_node*>(n));
    289 			} else if (n->is_cf_inst()) {
    290 				finalize_cf(static_cast<cf_node*>(n));
    291 			}
    292 			if (n->is_container())
    293 				run_on(static_cast<container_node*>(n));
    294 		}
    295 		prev_node = n;
    296 	}
    297 }
    298 
    299 void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) {
    300 
    301 	alu_node *last = NULL;
    302 	alu_group_node *prev_g = NULL;
    303 	bool add_nop = false;
    304 	if (prev_node && prev_node->is_alu_group()) {
    305 		prev_g = static_cast<alu_group_node*>(prev_node);
    306 	}
    307 
    308 	for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) {
    309 		alu_node *n = static_cast<alu_node*>(*I);
    310 		unsigned slot = n->bc.slot;
    311 		value *d = n->dst.empty() ? NULL : n->dst[0];
    312 
    313 		if (d && d->is_special_reg()) {
    314 			assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit() || d->is_lds_oq() || d->is_lds_access());
    315 			d = NULL;
    316 		}
    317 
    318 		sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0);
    319 
    320 		if (d) {
    321 			assert(fdst.chan() == slot || slot == SLOT_TRANS);
    322 		}
    323 
    324 		if (!(n->bc.op_ptr->flags & AF_MOVA && ctx.is_cayman()))
    325 			n->bc.dst_gpr = fdst.sel();
    326 		n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0;
    327 
    328 
    329 		if (d && d->is_rel() && d->rel && !d->rel->is_const()) {
    330 			n->bc.dst_rel = 1;
    331 			update_ngpr(d->array->gpr.sel() + d->array->array_size -1);
    332 		} else {
    333 			n->bc.dst_rel = 0;
    334 		}
    335 
    336 		n->bc.write_mask = d != NULL;
    337 		n->bc.last = 0;
    338 
    339 		if (n->bc.op_ptr->flags & AF_PRED) {
    340 			n->bc.update_pred = (n->dst[1] != NULL);
    341 			n->bc.update_exec_mask = (n->dst[2] != NULL);
    342 		}
    343 
    344 		// FIXME handle predication here
    345 		n->bc.pred_sel = PRED_SEL_OFF;
    346 
    347 		update_ngpr(n->bc.dst_gpr);
    348 
    349 		add_nop |= finalize_alu_src(g, n, prev_g);
    350 
    351 		last = n;
    352 	}
    353 
    354 	if (add_nop) {
    355 		if (sh.get_ctx().r6xx_gpr_index_workaround) {
    356 			insert_rv6xx_load_ar_workaround(g);
    357 		}
    358 	}
    359 	last->bc.last = 1;
    360 }
    361 
    362 bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) {
    363 	vvec &sv = a->src;
    364 	bool add_nop = false;
    365 	FBC_DUMP(
    366 		sblog << "finalize_alu_src: ";
    367 		dump::dump_op(a);
    368 		sblog << "\n";
    369 	);
    370 
    371 	unsigned si = 0;
    372 
    373 	for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) {
    374 		value *v = *I;
    375 		assert(v);
    376 
    377 		bc_alu_src &src = a->bc.src[si];
    378 		sel_chan sc;
    379 		src.rel = 0;
    380 
    381 		sel_chan gpr;
    382 
    383 		switch (v->kind) {
    384 		case VLK_REL_REG:
    385 			sc = v->get_final_gpr();
    386 			src.sel = sc.sel();
    387 			src.chan = sc.chan();
    388 			if (!v->rel->is_const()) {
    389 				src.rel = 1;
    390 				update_ngpr(v->array->gpr.sel() + v->array->array_size -1);
    391 				if (prev && !add_nop) {
    392 					for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
    393 						alu_node *pn = static_cast<alu_node*>(*pI);
    394 						if (pn->bc.dst_gpr == src.sel) {
    395 							add_nop = true;
    396 							break;
    397 						}
    398 					}
    399 				}
    400 			} else
    401 				src.rel = 0;
    402 
    403 			break;
    404 		case VLK_REG:
    405 			gpr = v->get_final_gpr();
    406 			src.sel = gpr.sel();
    407 			src.chan = gpr.chan();
    408 			update_ngpr(src.sel);
    409 			break;
    410 		case VLK_TEMP:
    411 			src.sel = v->gpr.sel();
    412 			src.chan = v->gpr.chan();
    413 			update_ngpr(src.sel);
    414 			break;
    415 		case VLK_UNDEF:
    416 		case VLK_CONST: {
    417 			literal lv = v->literal_value;
    418 			src.chan = 0;
    419 
    420 			if (lv == literal(0))
    421 				src.sel = ALU_SRC_0;
    422 			else if (lv == literal(0.5f))
    423 				src.sel = ALU_SRC_0_5;
    424 			else if (lv == literal(1.0f))
    425 				src.sel = ALU_SRC_1;
    426 			else if (lv == literal(1))
    427 				src.sel = ALU_SRC_1_INT;
    428 			else if (lv == literal(-1))
    429 				src.sel = ALU_SRC_M_1_INT;
    430 			else {
    431 				src.sel = ALU_SRC_LITERAL;
    432 				src.chan = g->literal_chan(lv);
    433 				src.value = lv;
    434 			}
    435 			break;
    436 		}
    437 		case VLK_KCACHE: {
    438 			cf_node *clause = static_cast<cf_node*>(g->parent);
    439 			assert(clause->is_alu_clause());
    440 			sel_chan k = translate_kcache(clause, v);
    441 
    442 			assert(k && "kcache translation failed");
    443 
    444 			src.sel = k.sel();
    445 			src.chan = k.chan();
    446 			break;
    447 		}
    448 		case VLK_SPECIAL_REG:
    449 			if (v->select.sel() == SV_LDS_OQA) {
    450 				src.sel = ALU_SRC_LDS_OQ_A_POP;
    451 				src.chan = 0;
    452 			} else if (v->select.sel() == SV_LDS_OQB) {
    453 				src.sel = ALU_SRC_LDS_OQ_B_POP;
    454 				src.chan = 0;
    455 			} else {
    456 				src.sel = ALU_SRC_0;
    457 				src.chan = 0;
    458 			}
    459 			break;
    460 		case VLK_PARAM:
    461 		case VLK_SPECIAL_CONST:
    462 			src.sel = v->select.sel();
    463 			src.chan = v->select.chan();
    464 			break;
    465 		default:
    466 			assert(!"unknown value kind");
    467 			break;
    468 		}
    469 		if (prev && !add_nop) {
    470 			for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) {
    471 				alu_node *pn = static_cast<alu_node*>(*pI);
    472 				if (pn->bc.dst_rel) {
    473 					if (pn->bc.dst_gpr == src.sel) {
    474 						add_nop = true;
    475 						break;
    476 					}
    477 				}
    478 			}
    479 		}
    480 	}
    481 
    482 	while (si < 3) {
    483 		a->bc.src[si++].sel = 0;
    484 	}
    485 	return add_nop;
    486 }
    487 
    488 void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start)
    489 {
    490 	int reg = -1;
    491 
    492 	for (unsigned chan = 0; chan < 4; ++chan) {
    493 
    494 		dst.bc.dst_sel[chan] = SEL_MASK;
    495 
    496 		unsigned sel = SEL_MASK;
    497 
    498 		value *v = src.src[arg_start + chan];
    499 
    500 		if (!v || v->is_undef()) {
    501 			sel = SEL_MASK;
    502 		} else if (v->is_const()) {
    503 			literal l = v->literal_value;
    504 			if (l == literal(0))
    505 				sel = SEL_0;
    506 			else if (l == literal(1.0f))
    507 				sel = SEL_1;
    508 			else {
    509 				sblog << "invalid fetch constant operand  " << chan << " ";
    510 				dump::dump_op(&src);
    511 				sblog << "\n";
    512 				abort();
    513 			}
    514 
    515 		} else if (v->is_any_gpr()) {
    516 			unsigned vreg = v->gpr.sel();
    517 			unsigned vchan = v->gpr.chan();
    518 
    519 			if (reg == -1)
    520 				reg = vreg;
    521 			else if ((unsigned)reg != vreg) {
    522 				sblog << "invalid fetch source operand  " << chan << " ";
    523 				dump::dump_op(&src);
    524 				sblog << "\n";
    525 				abort();
    526 			}
    527 
    528 			sel = vchan;
    529 
    530 		} else {
    531 			sblog << "invalid fetch source operand  " << chan << " ";
    532 			dump::dump_op(&src);
    533 			sblog << "\n";
    534 			abort();
    535 		}
    536 
    537 		dst.bc.src_sel[chan] = sel;
    538 	}
    539 
    540 	if (reg >= 0)
    541 		update_ngpr(reg);
    542 
    543 	dst.bc.src_gpr = reg >= 0 ? reg : 0;
    544 }
    545 
    546 void bc_finalizer::emit_set_grad(fetch_node* f) {
    547 
    548 	assert(f->src.size() == 12 || f->src.size() == 13);
    549 	unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H };
    550 
    551 	unsigned arg_start = 0;
    552 
    553 	for (unsigned op = 0; op < 2; ++op) {
    554 		fetch_node *n = sh.create_fetch();
    555 		n->bc.set_op(ops[op]);
    556 
    557 		arg_start += 4;
    558 
    559 		copy_fetch_src(*n, *f, arg_start);
    560 
    561 		f->insert_before(n);
    562 	}
    563 
    564 }
    565 
    566 void bc_finalizer::emit_set_texture_offsets(fetch_node &f) {
    567 	assert(f.src.size() == 8);
    568 
    569 	fetch_node *n = sh.create_fetch();
    570 
    571 	n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS);
    572 
    573 	copy_fetch_src(*n, f, 4);
    574 
    575 	f.insert_before(n);
    576 }
    577 
    578 void bc_finalizer::finalize_fetch(fetch_node* f) {
    579 
    580 	int reg = -1;
    581 
    582 	// src
    583 
    584 	unsigned src_count = 4;
    585 
    586 	unsigned flags = f->bc.op_ptr->flags;
    587 
    588 	if (flags & FF_VTX) {
    589 		src_count = 1;
    590 	} else if (flags & FF_GDS) {
    591 		src_count = 2;
    592 	} else if (flags & FF_USEGRAD) {
    593 		emit_set_grad(f);
    594 	} else if (flags & FF_USE_TEXTURE_OFFSETS) {
    595 		emit_set_texture_offsets(*f);
    596 	}
    597 
    598 	for (unsigned chan = 0; chan < src_count; ++chan) {
    599 
    600 		unsigned sel = f->bc.src_sel[chan];
    601 
    602 		if (sel > SEL_W)
    603 			continue;
    604 
    605 		value *v = f->src[chan];
    606 
    607 		if (v->is_undef()) {
    608 			sel = SEL_MASK;
    609 		} else if (v->is_const()) {
    610 			literal l = v->literal_value;
    611 			if (l == literal(0))
    612 				sel = SEL_0;
    613 			else if (l == literal(1.0f))
    614 				sel = SEL_1;
    615 			else {
    616 				sblog << "invalid fetch constant operand  " << chan << " ";
    617 				dump::dump_op(f);
    618 				sblog << "\n";
    619 				abort();
    620 			}
    621 
    622 		} else if (v->is_any_gpr()) {
    623 			unsigned vreg = v->gpr.sel();
    624 			unsigned vchan = v->gpr.chan();
    625 
    626 			if (reg == -1)
    627 				reg = vreg;
    628 			else if ((unsigned)reg != vreg) {
    629 				sblog << "invalid fetch source operand  " << chan << " ";
    630 				dump::dump_op(f);
    631 				sblog << "\n";
    632 				abort();
    633 			}
    634 
    635 			sel = vchan;
    636 
    637 		} else {
    638 			sblog << "invalid fetch source operand  " << chan << " ";
    639 			dump::dump_op(f);
    640 			sblog << "\n";
    641 			abort();
    642 		}
    643 
    644 		f->bc.src_sel[chan] = sel;
    645 	}
    646 
    647 	if (reg >= 0)
    648 		update_ngpr(reg);
    649 
    650 	f->bc.src_gpr = reg >= 0 ? reg : 0;
    651 
    652 	// dst
    653 
    654 	reg = -1;
    655 
    656 	unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK};
    657 
    658 	for (unsigned chan = 0; chan < 4; ++chan) {
    659 
    660 		unsigned sel = f->bc.dst_sel[chan];
    661 
    662 		if (sel == SEL_MASK)
    663 			continue;
    664 
    665 		value *v = f->dst[chan];
    666 		if (!v)
    667 			continue;
    668 
    669 		if (v->is_any_gpr()) {
    670 			unsigned vreg = v->gpr.sel();
    671 			unsigned vchan = v->gpr.chan();
    672 
    673 			if (reg == -1)
    674 				reg = vreg;
    675 			else if ((unsigned)reg != vreg) {
    676 				sblog << "invalid fetch dst operand  " << chan << " ";
    677 				dump::dump_op(f);
    678 				sblog << "\n";
    679 				abort();
    680 			}
    681 
    682 			dst_swz[vchan] = sel;
    683 
    684 		} else {
    685 			sblog << "invalid fetch dst operand  " << chan << " ";
    686 			dump::dump_op(f);
    687 			sblog << "\n";
    688 			abort();
    689 		}
    690 
    691 	}
    692 
    693 	for (unsigned i = 0; i < 4; ++i)
    694 		f->bc.dst_sel[i] = dst_swz[i];
    695 
    696 	if ((flags & FF_GDS) && reg == -1) {
    697 		f->bc.dst_sel[0] = SEL_MASK;
    698 		f->bc.dst_gpr = 0;
    699 		return ;
    700 	}
    701 	assert(reg >= 0);
    702 
    703 	if (reg >= 0)
    704 		update_ngpr(reg);
    705 
    706 	f->bc.dst_gpr = reg >= 0 ? reg : 0;
    707 }
    708 
    709 void bc_finalizer::finalize_cf(cf_node* c) {
    710 
    711 	unsigned flags = c->bc.op_ptr->flags;
    712 
    713 	c->bc.end_of_program = 0;
    714 	last_cf = c;
    715 
    716 	if (flags & CF_EXP) {
    717 		c->bc.set_op(CF_OP_EXPORT);
    718 		last_export[c->bc.type] = c;
    719 
    720 		int reg = -1;
    721 
    722 		for (unsigned chan = 0; chan < 4; ++chan) {
    723 
    724 			unsigned sel = c->bc.sel[chan];
    725 
    726 			if (sel > SEL_W)
    727 				continue;
    728 
    729 			value *v = c->src[chan];
    730 
    731 			if (v->is_undef()) {
    732 				sel = SEL_MASK;
    733 			} else if (v->is_const()) {
    734 				literal l = v->literal_value;
    735 				if (l == literal(0))
    736 					sel = SEL_0;
    737 				else if (l == literal(1.0f))
    738 					sel = SEL_1;
    739 				else {
    740 					sblog << "invalid export constant operand  " << chan << " ";
    741 					dump::dump_op(c);
    742 					sblog << "\n";
    743 					abort();
    744 				}
    745 
    746 			} else if (v->is_any_gpr()) {
    747 				unsigned vreg = v->gpr.sel();
    748 				unsigned vchan = v->gpr.chan();
    749 
    750 				if (reg == -1)
    751 					reg = vreg;
    752 				else if ((unsigned)reg != vreg) {
    753 					sblog << "invalid export source operand  " << chan << " ";
    754 					dump::dump_op(c);
    755 					sblog << "\n";
    756 					abort();
    757 				}
    758 
    759 				sel = vchan;
    760 
    761 			} else {
    762 				sblog << "invalid export source operand  " << chan << " ";
    763 				dump::dump_op(c);
    764 				sblog << "\n";
    765 				abort();
    766 			}
    767 
    768 			c->bc.sel[chan] = sel;
    769 		}
    770 
    771 		if (reg >= 0)
    772 			update_ngpr(reg);
    773 
    774 		c->bc.rw_gpr = reg >= 0 ? reg : 0;
    775 
    776 	} else if (flags & CF_MEM) {
    777 
    778 		int reg = -1;
    779 		unsigned mask = 0;
    780 
    781 		for (unsigned chan = 0; chan < 4; ++chan) {
    782 			value *v = c->src[chan];
    783 			if (!v || v->is_undef())
    784 				continue;
    785 
    786 			if (!v->is_any_gpr() || v->gpr.chan() != chan) {
    787 				sblog << "invalid source operand  " << chan << " ";
    788 				dump::dump_op(c);
    789 				sblog << "\n";
    790 				abort();
    791 			}
    792 			unsigned vreg = v->gpr.sel();
    793 			if (reg == -1)
    794 				reg = vreg;
    795 			else if ((unsigned)reg != vreg) {
    796 				sblog << "invalid source operand  " << chan << " ";
    797 				dump::dump_op(c);
    798 				sblog << "\n";
    799 				abort();
    800 			}
    801 
    802 			mask |= (1 << chan);
    803 		}
    804 
    805 		if (reg >= 0)
    806 			update_ngpr(reg);
    807 
    808 		c->bc.rw_gpr = reg >= 0 ? reg : 0;
    809 		c->bc.comp_mask = mask;
    810 
    811 		if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) {
    812 
    813 			reg = -1;
    814 
    815 			for (unsigned chan = 0; chan < 4; ++chan) {
    816 				value *v = c->src[4 + chan];
    817 				if (!v || v->is_undef())
    818 					continue;
    819 
    820 				if (!v->is_any_gpr() || v->gpr.chan() != chan) {
    821 					sblog << "invalid source operand  " << chan << " ";
    822 					dump::dump_op(c);
    823 					sblog << "\n";
    824 					abort();
    825 				}
    826 				unsigned vreg = v->gpr.sel();
    827 				if (reg == -1)
    828 					reg = vreg;
    829 				else if ((unsigned)reg != vreg) {
    830 					sblog << "invalid source operand  " << chan << " ";
    831 					dump::dump_op(c);
    832 					sblog << "\n";
    833 					abort();
    834 				}
    835 			}
    836 
    837 			assert(reg >= 0);
    838 
    839 			if (reg >= 0)
    840 				update_ngpr(reg);
    841 
    842 			c->bc.index_gpr = reg >= 0 ? reg : 0;
    843 		}
    844 	} else if (flags & CF_CALL) {
    845 		update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1);
    846 	}
    847 }
    848 
    849 sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) {
    850 	unsigned sel = v->select.kcache_sel();
    851 	unsigned bank = v->select.kcache_bank();
    852 	unsigned chan = v->select.chan();
    853 	static const unsigned kc_base[] = {128, 160, 256, 288};
    854 
    855 	sel &= 4095;
    856 
    857 	unsigned line = sel >> 4;
    858 
    859 	for (unsigned k = 0; k < 4; ++k) {
    860 		bc_kcache &kc = alu->bc.kc[k];
    861 
    862 		if (kc.mode == KC_LOCK_NONE)
    863 			break;
    864 
    865 		if (kc.bank == bank && (kc.addr == line ||
    866 				(kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) {
    867 
    868 			sel = kc_base[k] + (sel - (kc.addr << 4));
    869 
    870 			return sel_chan(sel, chan);
    871 		}
    872 	}
    873 
    874 	assert(!"kcache translation error");
    875 	return 0;
    876 }
    877 
    878 void bc_finalizer::update_ngpr(unsigned gpr) {
    879 	if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr)
    880 		ngpr = gpr + 1;
    881 }
    882 
    883 unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops,
    884                                            unsigned &ifs, unsigned add) {
    885 	unsigned stack_elements = add;
    886 	bool has_non_wqm_push = (add != 0);
    887 	region_node *r = n->is_region() ?
    888 			static_cast<region_node*>(n) : n->get_parent_region();
    889 
    890 	loops = 0;
    891 	ifs = 0;
    892 
    893 	while (r) {
    894 		if (r->is_loop()) {
    895 			++loops;
    896 		} else {
    897 			++ifs;
    898 			has_non_wqm_push = true;
    899 		}
    900 		r = r->get_parent_region();
    901 	}
    902 	stack_elements += (loops * ctx.stack_entry_size) + ifs;
    903 
    904 	// reserve additional elements in some cases
    905 	switch (ctx.hw_class) {
    906 	case HW_CLASS_R600:
    907 	case HW_CLASS_R700:
    908 		// If any non-WQM push is invoked, 2 elements should be reserved.
    909 		if (has_non_wqm_push)
    910 			stack_elements += 2;
    911 		break;
    912 	case HW_CLASS_CAYMAN:
    913 		// If any stack operation is invoked, 2 elements should be reserved
    914 		if (stack_elements)
    915 			stack_elements += 2;
    916 		break;
    917 	case HW_CLASS_EVERGREEN:
    918 		// According to the docs we need to reserve 1 element for each of the
    919 		// following cases:
    920 		//   1) non-WQM push is used with WQM/LOOP frames on stack
    921 		//   2) ALU_ELSE_AFTER is used at the point of max stack usage
    922 		// NOTE:
    923 		// It was found that the conditions above are not sufficient, there are
    924 		// other cases where we also need to reserve stack space, that's why
    925 		// we always reserve 1 stack element if we have non-WQM push on stack.
    926 		// Condition 2 is ignored for now because we don't use this instruction.
    927 		if (has_non_wqm_push)
    928 			++stack_elements;
    929 		break;
    930 	case HW_CLASS_UNKNOWN:
    931 		assert(0);
    932 	}
    933 	return stack_elements;
    934 }
    935 
    936 void bc_finalizer::update_nstack(region_node* r, unsigned add) {
    937 	unsigned loops = 0;
    938 	unsigned ifs = 0;
    939 	unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add;
    940 
    941 	// XXX all chips expect this value to be computed using 4 as entry size,
    942 	// not the real entry size
    943 	unsigned stack_entries = (elems + 3) >> 2;
    944 
    945 	if (nstack < stack_entries)
    946 		nstack = stack_entries;
    947 }
    948 
    949 void bc_finalizer::cf_peephole() {
    950 	if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) {
    951 		for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
    952 				I = N) {
    953 			N = I; ++N;
    954 			cf_node *c = static_cast<cf_node*>(*I);
    955 
    956 			if (c->bc.op == CF_OP_ALU_PUSH_BEFORE &&
    957 					(c->flags & NF_ALU_STACK_WORKAROUND)) {
    958 				cf_node *push = sh.create_cf(CF_OP_PUSH);
    959 				c->insert_before(push);
    960 				push->jump(c);
    961 				c->bc.set_op(CF_OP_ALU);
    962 			}
    963 		}
    964 	}
    965 
    966 	for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E;
    967 			I = N) {
    968 		N = I; ++N;
    969 
    970 		cf_node *c = static_cast<cf_node*>(*I);
    971 
    972 		if (c->jump_after_target) {
    973 			if (c->jump_target->next == NULL) {
    974 				c->jump_target->insert_after(sh.create_cf(CF_OP_NOP));
    975 				if (last_cf == c->jump_target)
    976 					last_cf = static_cast<cf_node*>(c->jump_target->next);
    977 			}
    978 			c->jump_target = static_cast<cf_node*>(c->jump_target->next);
    979 			c->jump_after_target = false;
    980 		}
    981 
    982 		if (c->is_cf_op(CF_OP_POP)) {
    983 			node *p = c->prev;
    984 			if (p->is_alu_clause()) {
    985 				cf_node *a = static_cast<cf_node*>(p);
    986 
    987 				if (a->bc.op == CF_OP_ALU) {
    988 					a->bc.set_op(CF_OP_ALU_POP_AFTER);
    989 					c->remove();
    990 				}
    991 			}
    992 		} else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) {
    993 			// if JUMP is immediately followed by its jump target,
    994 			// then JUMP is useless and we can eliminate it
    995 			c->remove();
    996 		}
    997 	}
    998 }
    999 
   1000 } // namespace r600_sb
   1001