Home | History | Annotate | Download | only in sb
      1 /*
      2  * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *      Vadim Girlin
     25  */
     26 
     27 #include "sb_bc.h"
     28 #include "sb_shader.h"
     29 #include "sb_pass.h"
     30 #include "eg_sq.h" // V_SQ_CF_INDEX_0/1
     31 
     32 namespace r600_sb {
     33 
     34 static const char* chans = "xyzw01?_";
     35 
     36 static const char* vec_bs[] = {
     37 		"VEC_012", "VEC_021", "VEC_120", "VEC_102", "VEC_201", "VEC_210"
     38 };
     39 
     40 static const char* scl_bs[] = {
     41 		"SCL_210", "SCL_122", "SCL_212", "SCL_221"
     42 };
     43 
     44 
     45 bool bc_dump::visit(cf_node& n, bool enter) {
     46 	if (enter) {
     47 
     48 		id = n.bc.id << 1;
     49 
     50 		if ((n.bc.op_ptr->flags & CF_ALU) && n.bc.is_alu_extended()) {
     51 			dump_dw(id, 2);
     52 			id += 2;
     53 			sblog << "\n";
     54 		}
     55 
     56 		dump_dw(id, 2);
     57 		dump(n);
     58 
     59 		if (n.bc.op_ptr->flags & CF_CLAUSE) {
     60 			id = n.bc.addr << 1;
     61 			new_group = 1;
     62 		}
     63 	}
     64 	return true;
     65 }
     66 
     67 bool bc_dump::visit(alu_node& n, bool enter) {
     68 	if (enter) {
     69 		sblog << " ";
     70 		dump_dw(id, 2);
     71 
     72 		if (new_group) {
     73 			sblog.print_w(++group_index, 5);
     74 			sblog << " ";
     75 		} else
     76 			sblog << "      ";
     77 
     78 		dump(n);
     79 		id += 2;
     80 
     81 		new_group = n.bc.last;
     82 	} else {
     83 		if (n.bc.last) {
     84 			alu_group_node *g =
     85 					static_cast<alu_group_node*>(n.get_alu_group_node());
     86 			assert(g);
     87 			for (unsigned k = 0; k < g->literals.size(); ++k) {
     88 				sblog << " ";
     89 				dump_dw(id, 1);
     90 				id += 1;
     91 				sblog << "\n";
     92 			}
     93 
     94 			id = (id + 1) & ~1u;
     95 		}
     96 	}
     97 
     98 	return false;
     99 }
    100 
    101 bool bc_dump::visit(fetch_node& n, bool enter) {
    102 	if (enter) {
    103 		sblog << " ";
    104 		dump_dw(id, 3);
    105 		dump(n);
    106 		id += 4;
    107 	}
    108 	return false;
    109 }
    110 
    111 static void fill_to(sb_ostringstream &s, int pos) {
    112 	int l = s.str().length();
    113 	if (l < pos)
    114 		s << std::string(pos-l, ' ');
    115 }
    116 
    117 void bc_dump::dump(cf_node& n) {
    118 	sb_ostringstream s;
    119 	s << n.bc.op_ptr->name;
    120 
    121 	if (n.bc.op_ptr->flags & CF_EXP) {
    122 		static const char *exp_type[] = {"PIXEL", "POS  ", "PARAM"};
    123 
    124 		fill_to(s, 18);
    125 		s << " " << exp_type[n.bc.type] << " ";
    126 
    127 		if (n.bc.burst_count) {
    128 			sb_ostringstream s2;
    129 			s2 << n.bc.array_base << "-" << n.bc.array_base + n.bc.burst_count;
    130 			s.print_wl(s2.str(), 5);
    131 			s << " R" << n.bc.rw_gpr << "-" <<
    132 					n.bc.rw_gpr + n.bc.burst_count << ".";
    133 		} else {
    134 			s.print_wl(n.bc.array_base, 5);
    135 			s << " R" << n.bc.rw_gpr << ".";
    136 		}
    137 
    138 		for (int k = 0; k < 4; ++k)
    139 			s << chans[n.bc.sel[k]];
    140 
    141 	} else if (n.bc.op_ptr->flags & CF_MEM) {
    142 		static const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK",
    143 				"WRITE_IND_ACK"};
    144 		fill_to(s, 18);
    145 		s << " " << exp_type[n.bc.type] << " ";
    146 		s.print_wl(n.bc.array_base, 5);
    147 		s << " R" << n.bc.rw_gpr << ".";
    148 		for (int k = 0; k < 4; ++k)
    149 			s << ((n.bc.comp_mask & (1 << k)) ? chans[k] : '_');
    150 
    151 		if ((n.bc.op_ptr->flags & CF_RAT) && (n.bc.type & 1)) {
    152 			s << ", @R" << n.bc.index_gpr << ".xyz";
    153 		}
    154 		if ((n.bc.op_ptr->flags & CF_MEM) && (n.bc.type & 1)) {
    155 			s << ", @R" << n.bc.index_gpr << ".x";
    156 		}
    157 
    158 		s << "  ES:" << n.bc.elem_size;
    159 
    160 	} else {
    161 
    162 		if (n.bc.op_ptr->flags & CF_CLAUSE) {
    163 			s << " " << n.bc.count+1;
    164 		}
    165 
    166 		s << " @" << (n.bc.addr << 1);
    167 
    168 		if (n.bc.op_ptr->flags & CF_ALU) {
    169 			static const char *index_mode[] = {"", " CF_INDEX_0", " CF_INDEX_1"};
    170 
    171 			for (int k = 0; k < 4; ++k) {
    172 				bc_kcache &kc = n.bc.kc[k];
    173 				if (kc.mode) {
    174 					s << " KC" << k << "[CB" << kc.bank << ":" <<
    175 							(kc.addr << 4) << "-" <<
    176 							(((kc.addr + kc.mode) << 4) - 1) << index_mode[kc.index_mode] << "]";
    177 				}
    178 			}
    179 		}
    180 
    181 		if (n.bc.cond)
    182 			s << " CND:" << n.bc.cond;
    183 
    184 		if (n.bc.pop_count)
    185 			s << " POP:" << n.bc.pop_count;
    186 
    187 		if (n.bc.count && (n.bc.op_ptr->flags & CF_EMIT))
    188 			s << " STREAM" << n.bc.count;
    189 	}
    190 
    191 	if (!n.bc.barrier)
    192 		s << "  NO_BARRIER";
    193 
    194 	if (n.bc.valid_pixel_mode)
    195 		s << "  VPM";
    196 
    197 	if (n.bc.whole_quad_mode)
    198 		s << "  WQM";
    199 
    200 	if (n.bc.end_of_program)
    201 		s << "  EOP";
    202 
    203 	sblog << s.str() << "\n";
    204 }
    205 
    206 
    207 static void print_sel(sb_ostream &s, int sel, int rel, int index_mode,
    208                       int need_brackets) {
    209 	if (rel && index_mode >= 5 && sel < 128)
    210 		s << "G";
    211 	if (rel || need_brackets) {
    212 		s << "[";
    213 	}
    214 	s << sel;
    215 	if (rel) {
    216 		if (index_mode == 0 || index_mode == 6)
    217 			s << "+AR";
    218 		else if (index_mode == 4)
    219 			s << "+AL";
    220 	}
    221 	if (rel || need_brackets) {
    222 		s << "]";
    223 	}
    224 }
    225 
    226 static void print_dst(sb_ostream &s, bc_alu &alu)
    227 {
    228 	unsigned sel = alu.dst_gpr;
    229 	char reg_char = 'R';
    230 	if (sel >= 128 - 4) { // clause temporary gpr
    231 		sel -= 128 - 4;
    232 		reg_char = 'T';
    233 	}
    234 
    235 	if (alu.write_mask || (alu.op_ptr->src_count == 3 && alu.op < LDS_OP2_LDS_ADD)) {
    236 		s << reg_char;
    237 		print_sel(s, sel, alu.dst_rel, alu.index_mode, 0);
    238 	} else {
    239 		s << "__";
    240 	}
    241 	s << ".";
    242 	s << chans[alu.dst_chan];
    243 }
    244 
    245 static void print_src(sb_ostream &s, bc_alu &alu, unsigned idx)
    246 {
    247 	bc_alu_src *src = &alu.src[idx];
    248 	unsigned sel = src->sel, need_sel = 1, need_chan = 1, need_brackets = 0;
    249 
    250 	if (src->neg)
    251 		s <<"-";
    252 	if (src->abs)
    253 		s <<"|";
    254 
    255 	if (sel < 128 - 4) {
    256 		s << "R";
    257 	} else if (sel < 128) {
    258 		s << "T";
    259 		sel -= 128 - 4;
    260 	} else if (sel < 160) {
    261 		s << "KC0";
    262 		need_brackets = 1;
    263 		sel -= 128;
    264 	} else if (sel < 192) {
    265 		s << "KC1";
    266 		need_brackets = 1;
    267 		sel -= 160;
    268 	} else if (sel >= 448) {
    269 		s << "Param";
    270 		sel -= 448;
    271 	} else if (sel >= 288) {
    272 		s << "KC3";
    273 		need_brackets = 1;
    274 		sel -= 288;
    275 	} else if (sel >= 256) {
    276 		s << "KC2";
    277 		need_brackets = 1;
    278 		sel -= 256;
    279 	} else {
    280 		need_sel = 0;
    281 		need_chan = 0;
    282 		switch (sel) {
    283 		case ALU_SRC_LDS_OQ_A:
    284 			s << "LDS_OQ_A";
    285 			need_chan = 1;
    286 			break;
    287 		case ALU_SRC_LDS_OQ_B:
    288 			s << "LDS_OQ_B";
    289 			need_chan = 1;
    290 			break;
    291 		case ALU_SRC_LDS_OQ_A_POP:
    292 			s << "LDS_OQ_A_POP";
    293 			need_chan = 1;
    294 			break;
    295 		case ALU_SRC_LDS_OQ_B_POP:
    296 			s << "LDS_OQ_B_POP";
    297 			need_chan = 1;
    298 			break;
    299 		case ALU_SRC_LDS_DIRECT_A:
    300 			s << "LDS_A["; s.print_zw_hex(src->value.u, 8); s << "]";
    301 			break;
    302 		case ALU_SRC_LDS_DIRECT_B:
    303 			s << "LDS_B["; s.print_zw_hex(src->value.u, 8); s << "]";
    304 			break;
    305 		case ALU_SRC_PS:
    306 			s << "PS";
    307 			break;
    308 		case ALU_SRC_PV:
    309 			s << "PV";
    310 			need_chan = 1;
    311 			break;
    312 		case ALU_SRC_LITERAL:
    313 			s << "[0x";
    314 			s.print_zw_hex(src->value.u, 8);
    315 			s << " " << src->value.f << "]";
    316 			need_chan = 1;
    317 			break;
    318 		case ALU_SRC_0_5:
    319 			s << "0.5";
    320 			break;
    321 		case ALU_SRC_M_1_INT:
    322 			s << "-1";
    323 			break;
    324 		case ALU_SRC_1_INT:
    325 			s << "1";
    326 			break;
    327 		case ALU_SRC_1:
    328 			s << "1.0";
    329 			break;
    330 		case ALU_SRC_0:
    331 			s << "0";
    332 			break;
    333 		default:
    334 			s << "??IMM_" <<  sel;
    335 			break;
    336 		}
    337 	}
    338 
    339 	if (need_sel)
    340 		print_sel(s, sel, src->rel, alu.index_mode, need_brackets);
    341 
    342 	if (need_chan) {
    343 		s << "." << chans[src->chan];
    344 	}
    345 
    346 	if (src->abs)
    347 		s << "|";
    348 }
    349 void bc_dump::dump(alu_node& n) {
    350 	sb_ostringstream s;
    351 	static const char *omod_str[] = {"","*2","*4","/2"};
    352 	static const char *slots = "xyzwt";
    353 
    354 	s << (n.bc.update_exec_mask ? "M" : " ");
    355 	s << (n.bc.update_pred ? "P" : " ");
    356 	s << " ";
    357 	s << (n.bc.pred_sel>=2 ? (n.bc.pred_sel == 2 ? "0" : "1") : " ");
    358 	s << " ";
    359 
    360 	s << slots[n.bc.slot] << ": ";
    361 
    362 	s << n.bc.op_ptr->name << omod_str[n.bc.omod] << (n.bc.clamp ? "_sat" : "");
    363 	fill_to(s, 26);
    364 	s << " ";
    365 
    366 	print_dst(s, n.bc);
    367 	for (int k = 0; k < n.bc.op_ptr->src_count; ++k) {
    368 		s << (k ? ", " : ",  ");
    369 		print_src(s, n.bc, k);
    370 	}
    371 
    372 	if (n.bc.bank_swizzle) {
    373 		fill_to(s, 55);
    374 		if (n.bc.slot == SLOT_TRANS)
    375 			s << "  " << scl_bs[n.bc.bank_swizzle];
    376 		else
    377 			s << "  " << vec_bs[n.bc.bank_swizzle];
    378 	}
    379 
    380 	if (ctx.is_cayman()) {
    381 		if (n.bc.op == ALU_OP1_MOVA_INT) {
    382 			static const char *mova_str[] = { " AR_X", " PC", " CF_IDX0", " CF_IDX1",
    383 				" Unknown MOVA_INT dest" };
    384 			s << mova_str[std::min(n.bc.dst_gpr, 4u)];  // CM_V_SQ_MOVA_DST_AR_*
    385 		}
    386 	}
    387 
    388 	if (n.bc.lds_idx_offset) {
    389 		s << " IDX_OFFSET:" << n.bc.lds_idx_offset;
    390 	}
    391 
    392 	sblog << s.str() << "\n";
    393 }
    394 
    395 int bc_dump::init() {
    396 	sb_ostringstream s;
    397 	s << "===== SHADER #" << sh.id;
    398 
    399 	if (sh.optimized)
    400 		s << " OPT";
    401 
    402 	s << " ";
    403 
    404 	std::string target = std::string(" ") +
    405 			sh.get_full_target_name() + " =====";
    406 
    407 	while (s.str().length() + target.length() < 80)
    408 		s << "=";
    409 
    410 	s << target;
    411 
    412 	sblog << "\n" << s.str() << "\n";
    413 
    414 	s.clear();
    415 
    416 	if (bc_data) {
    417 		s << "===== " << ndw << " dw ===== " << sh.ngpr
    418 				<< " gprs ===== " << sh.nstack << " stack ";
    419 	}
    420 
    421 	while (s.str().length() < 80)
    422 		s << "=";
    423 
    424 	sblog << s.str() << "\n";
    425 
    426 	return 0;
    427 }
    428 
    429 int bc_dump::done() {
    430 	sb_ostringstream s;
    431 	s << "===== SHADER_END ";
    432 
    433 	while (s.str().length() < 80)
    434 		s << "=";
    435 
    436 	sblog << s.str() << "\n\n";
    437 
    438 	return 0;
    439 }
    440 
    441 bc_dump::bc_dump(shader& s, bytecode* bc)  :
    442 	vpass(s), bc_data(), ndw(), id(),
    443 	new_group(), group_index() {
    444 
    445 	if (bc) {
    446 		bc_data = bc->data();
    447 		ndw = bc->ndw();
    448 	}
    449 }
    450 
    451 void bc_dump::dump(fetch_node& n) {
    452 	sb_ostringstream s;
    453 	static const char * fetch_type[] = {"VERTEX", "INSTANCE", ""};
    454 	unsigned gds = n.bc.op_ptr->flags & FF_GDS;
    455 	bool gds_has_ret = gds && n.bc.op >= FETCH_OP_GDS_ADD_RET &&
    456 		n.bc.op <= FETCH_OP_GDS_USHORT_READ_RET;
    457 	bool show_dst = !gds || (gds && gds_has_ret);
    458 
    459 	s << n.bc.op_ptr->name;
    460 	fill_to(s, 20);
    461 
    462 	if (show_dst) {
    463 		s << "R";
    464 		print_sel(s, n.bc.dst_gpr, n.bc.dst_rel, INDEX_LOOP, 0);
    465 		s << ".";
    466 		for (int k = 0; k < 4; ++k)
    467 			s << chans[n.bc.dst_sel[k]];
    468 		s << ", ";
    469 	}
    470 
    471 	s << "R";
    472 	print_sel(s, n.bc.src_gpr, n.bc.src_rel, INDEX_LOOP, 0);
    473 	s << ".";
    474 
    475 	unsigned vtx = n.bc.op_ptr->flags & FF_VTX;
    476 	unsigned num_src_comp = gds ? 3 : vtx ? ctx.is_cayman() ? 2 : 1 : 4;
    477 
    478 	for (unsigned k = 0; k < num_src_comp; ++k)
    479 		s << chans[n.bc.src_sel[k]];
    480 
    481 	if (vtx && n.bc.offset[0]) {
    482 		s << " + " << n.bc.offset[0] << "b ";
    483 	}
    484 
    485 	if (!gds)
    486 		s << ",   RID:" << n.bc.resource_id;
    487 
    488 	if (gds) {
    489 		s << " UAV:" << n.bc.uav_id;
    490 		if (n.bc.uav_index_mode)
    491 			s << " UAV:SQ_CF_INDEX_" << (n.bc.uav_index_mode - V_SQ_CF_INDEX_0);
    492 		if (n.bc.bcast_first_req)
    493 			s << " BFQ";
    494 		if (n.bc.alloc_consume)
    495 			s << " AC";
    496 	} else if (vtx) {
    497 		s << "  " << fetch_type[n.bc.fetch_type];
    498 		if (!ctx.is_cayman() && n.bc.mega_fetch_count)
    499 			s << " MFC:" << n.bc.mega_fetch_count;
    500 		if (n.bc.fetch_whole_quad)
    501 			s << " FWQ";
    502 		if (ctx.is_egcm() && n.bc.resource_index_mode)
    503 			s << " RIM:SQ_CF_INDEX_" << (n.bc.resource_index_mode - V_SQ_CF_INDEX_0);
    504 		if (ctx.is_egcm() && n.bc.sampler_index_mode)
    505 			s << " SID:SQ_CF_INDEX_" << (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0);
    506 
    507 		s << " UCF:" << n.bc.use_const_fields
    508 				<< " FMT(DTA:" << n.bc.data_format
    509 				<< " NUM:" << n.bc.num_format_all
    510 				<< " COMP:" << n.bc.format_comp_all
    511 				<< " MODE:" << n.bc.srf_mode_all << ")";
    512 	} else {
    513 		s << ", SID:" << n.bc.sampler_id;
    514 		if (n.bc.lod_bias)
    515 			s << " LB:" << n.bc.lod_bias;
    516 		s << " CT:";
    517 		for (unsigned k = 0; k < 4; ++k)
    518 			s << (n.bc.coord_type[k] ? "N" : "U");
    519 		for (unsigned k = 0; k < 3; ++k)
    520 			if (n.bc.offset[k])
    521 				s << " O" << chans[k] << ":" << n.bc.offset[k];
    522 		if (ctx.is_egcm() && n.bc.resource_index_mode)
    523 			s << " RIM:SQ_CF_INDEX_" << (n.bc.resource_index_mode - V_SQ_CF_INDEX_0);
    524 		if (ctx.is_egcm() && n.bc.sampler_index_mode)
    525 			s << " SID:SQ_CF_INDEX_" << (n.bc.sampler_index_mode - V_SQ_CF_INDEX_0);
    526 	}
    527 
    528 	sblog << s.str() << "\n";
    529 }
    530 
    531 void bc_dump::dump_dw(unsigned dw_id, unsigned count) {
    532 	if (!bc_data)
    533 		return;
    534 
    535 	assert(dw_id + count <= ndw);
    536 
    537 	sblog.print_zw(dw_id, 4);
    538 	sblog << "  ";
    539 	while (count--) {
    540 		sblog.print_zw_hex(bc_data[dw_id++], 8);
    541 		sblog << " ";
    542 	}
    543 }
    544 
    545 } // namespace r600_sb
    546