Home | History | Annotate | Download | only in sb
      1 /*
      2  * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *      Vadim Girlin
     25  */
     26 
     27 #define SB_RA_SCHED_CHECK DEBUG
     28 
     29 #include "os/os_time.h"
     30 #include "r600_pipe.h"
     31 #include "r600_shader.h"
     32 
     33 #include "sb_public.h"
     34 
     35 #include <stack>
     36 #include <map>
     37 
     38 #include "sb_bc.h"
     39 #include "sb_shader.h"
     40 #include "sb_pass.h"
     41 #include "sb_sched.h"
     42 
     43 using namespace r600_sb;
     44 
     45 static sb_hw_class translate_chip_class(enum chip_class cc);
     46 static sb_hw_chip translate_chip(enum radeon_family rf);
     47 
     48 sb_context *r600_sb_context_create(struct r600_context *rctx) {
     49 
     50 	sb_context *sctx = new sb_context();
     51 
     52 	if (sctx->init(rctx->isa, translate_chip(rctx->b.family),
     53 			translate_chip_class(rctx->b.chip_class))) {
     54 		delete sctx;
     55 		sctx = NULL;
     56 	}
     57 
     58 	unsigned df = rctx->screen->b.debug_flags;
     59 
     60 	sb_context::dump_pass = df & DBG_SB_DUMP;
     61 	sb_context::dump_stat = df & DBG_SB_STAT;
     62 	sb_context::dry_run = df & DBG_SB_DRY_RUN;
     63 	sb_context::no_fallback = df & DBG_SB_NO_FALLBACK;
     64 	sb_context::safe_math = df & DBG_SB_SAFEMATH;
     65 
     66 	sb_context::dskip_start = debug_get_num_option("R600_SB_DSKIP_START", 0);
     67 	sb_context::dskip_end = debug_get_num_option("R600_SB_DSKIP_END", 0);
     68 	sb_context::dskip_mode = debug_get_num_option("R600_SB_DSKIP_MODE", 0);
     69 
     70 	return sctx;
     71 }
     72 
     73 void r600_sb_context_destroy(void * sctx) {
     74 	if (sctx) {
     75 		sb_context *ctx = static_cast<sb_context*>(sctx);
     76 
     77 		if (sb_context::dump_stat) {
     78 			sblog << "\ncontext src stats: ";
     79 			ctx->src_stats.dump();
     80 			sblog << "context opt stats: ";
     81 			ctx->opt_stats.dump();
     82 			sblog << "context diff: ";
     83 			ctx->src_stats.dump_diff(ctx->opt_stats);
     84 		}
     85 
     86 		delete ctx;
     87 	}
     88 }
     89 
     90 int r600_sb_bytecode_process(struct r600_context *rctx,
     91                              struct r600_bytecode *bc,
     92                              struct r600_shader *pshader,
     93                              int dump_bytecode,
     94                              int optimize) {
     95 	int r = 0;
     96 	unsigned shader_id = bc->debug_id;
     97 
     98 	sb_context *ctx = (sb_context *)rctx->sb_context;
     99 	if (!ctx) {
    100 		rctx->sb_context = ctx = r600_sb_context_create(rctx);
    101 	}
    102 
    103 	int64_t time_start = 0;
    104 	if (sb_context::dump_stat) {
    105 		time_start = os_time_get_nano();
    106 	}
    107 
    108 	SB_DUMP_STAT( sblog << "\nsb: shader " << shader_id << "\n"; );
    109 
    110 	bc_parser parser(*ctx, bc, pshader);
    111 
    112 	if ((r = parser.decode())) {
    113 		assert(!"sb: bytecode decoding error");
    114 		return r;
    115 	}
    116 
    117 	shader *sh = parser.get_shader();
    118 
    119 	if (dump_bytecode) {
    120 		bc_dump(*sh, bc->bytecode, bc->ndw).run();
    121 	}
    122 
    123 	if (!optimize) {
    124 		delete sh;
    125 		return 0;
    126 	}
    127 
    128 	if (sh->target != TARGET_FETCH) {
    129 		sh->src_stats.ndw = bc->ndw;
    130 		sh->collect_stats(false);
    131 	}
    132 
    133 	/* skip some shaders (use shaders from default backend)
    134 	 * dskip_start - range start, dskip_end - range_end,
    135 	 * e.g. start = 5, end = 6 means shaders 5 & 6
    136 	 *
    137 	 * dskip_mode == 0 - disabled,
    138 	 * dskip_mode == 1 - don't process the shaders from the [start;end] range
    139 	 * dskip_mode == 2 - process only the shaders from the range
    140 	 */
    141 	if (sb_context::dskip_mode) {
    142 		if ((sb_context::dskip_start <= shader_id &&
    143 				shader_id <= sb_context::dskip_end) ==
    144 						(sb_context::dskip_mode == 1)) {
    145 			sblog << "sb: skipped shader " << shader_id << " : " << "["
    146 					<< sb_context::dskip_start << "; "
    147 					<< sb_context::dskip_end << "] mode "
    148 					<< sb_context::dskip_mode << "\n";
    149 			return 0;
    150 		}
    151 	}
    152 
    153 	if ((r = parser.prepare())) {
    154 		assert(!"sb: bytecode parsing error");
    155 		return r;
    156 	}
    157 
    158 	SB_DUMP_PASS( sblog << "\n\n###### after parse\n"; sh->dump_ir(); );
    159 
    160 #define SB_RUN_PASS(n, dump) \
    161 	do { \
    162 		r = n(*sh).run(); \
    163 		if (r) { \
    164 			sblog << "sb: error (" << r << ") in the " << #n << " pass.\n"; \
    165 			if (sb_context::no_fallback) \
    166 				return r; \
    167 			sblog << "sb: using unoptimized bytecode...\n"; \
    168 			delete sh; \
    169 			return 0; \
    170 		} \
    171 		if (dump) { \
    172 			SB_DUMP_PASS( sblog << "\n\n###### after " << #n << "\n"; \
    173 				sh->dump_ir();); \
    174 		} \
    175 		assert(!r); \
    176 	} while (0)
    177 
    178 	SB_RUN_PASS(ssa_prepare,		0);
    179 	SB_RUN_PASS(ssa_rename,			1);
    180 
    181 	if (sh->has_alu_predication)
    182 		SB_RUN_PASS(psi_ops,		1);
    183 
    184 	SB_RUN_PASS(liveness,			0);
    185 
    186 	sh->dce_flags = DF_REMOVE_DEAD | DF_EXPAND;
    187 	SB_RUN_PASS(dce_cleanup,		0);
    188 	SB_RUN_PASS(def_use,			0);
    189 
    190 	sh->set_undef(sh->root->live_before);
    191 
    192 	// if conversion breaks the dependency tracking between CF_EMIT ops when it removes
    193 	// the phi nodes for SV_GEOMETRY_EMIT. Just disable it for GS
    194 	if (sh->target != TARGET_GS)
    195 		SB_RUN_PASS(if_conversion,		1);
    196 
    197 	// if_conversion breaks info about uses, but next pass (peephole)
    198 	// doesn't need it, so we can skip def/use update here
    199 	// until it's really required
    200 	//SB_RUN_PASS(def_use,			0);
    201 
    202 	SB_RUN_PASS(peephole,			1);
    203 	SB_RUN_PASS(def_use,			0);
    204 
    205 	SB_RUN_PASS(gvn,				1);
    206 
    207 	SB_RUN_PASS(def_use,			1);
    208 
    209 	sh->dce_flags = DF_REMOVE_DEAD | DF_REMOVE_UNUSED;
    210 	SB_RUN_PASS(dce_cleanup,		1);
    211 
    212 	SB_RUN_PASS(ra_split,			0);
    213 	SB_RUN_PASS(def_use,			0);
    214 
    215 	// create 'basic blocks'. it's not like we build CFG, they are just
    216 	// container nodes in the correct locations for code placement
    217 	sh->create_bbs();
    218 
    219 	SB_RUN_PASS(gcm,				1);
    220 
    221 	sh->compute_interferences = true;
    222 	SB_RUN_PASS(liveness,			0);
    223 
    224 	sh->dce_flags = DF_REMOVE_DEAD;
    225 	SB_RUN_PASS(dce_cleanup,		1);
    226 
    227 	SB_RUN_PASS(ra_coalesce,		1);
    228 	SB_RUN_PASS(ra_init,			1);
    229 
    230 	SB_RUN_PASS(post_scheduler,		1);
    231 
    232 	sh->expand_bbs();
    233 
    234 #if SB_RA_SCHED_CHECK
    235 	// check code correctness after regalloc/scheduler
    236 	SB_RUN_PASS(ra_checker,			0);
    237 #endif
    238 
    239 	SB_RUN_PASS(bc_finalizer,		0);
    240 
    241 	sh->optimized = true;
    242 
    243 	bc_builder builder(*sh);
    244 
    245 	if ((r = builder.build())) {
    246 		assert(0);
    247 		return r;
    248 	}
    249 
    250 	bytecode &nbc = builder.get_bytecode();
    251 
    252 	if (dump_bytecode) {
    253 		bc_dump(*sh, &nbc).run();
    254 	}
    255 
    256 	if (!sb_context::dry_run) {
    257 
    258 		free(bc->bytecode);
    259 		bc->ndw = nbc.ndw();
    260 		bc->bytecode = (uint32_t*) malloc(bc->ndw << 2);
    261 		nbc.write_data(bc->bytecode);
    262 
    263 		bc->ngpr = sh->ngpr;
    264 		bc->nstack = sh->nstack;
    265 	} else {
    266 		SB_DUMP_STAT( sblog << "sb: dry run: optimized bytecode is not used\n"; );
    267 	}
    268 
    269 	if (sb_context::dump_stat) {
    270 		int64_t t = os_time_get_nano() - time_start;
    271 
    272 		sblog << "sb: processing shader " << shader_id << " done ( "
    273 				<< ((double)t)/1000000.0 << " ms ).\n";
    274 
    275 		sh->opt_stats.ndw = bc->ndw;
    276 		sh->collect_stats(true);
    277 
    278 		sblog << "src stats: ";
    279 		sh->src_stats.dump();
    280 		sblog << "opt stats: ";
    281 		sh->opt_stats.dump();
    282 		sblog << "diff: ";
    283 		sh->src_stats.dump_diff(sh->opt_stats);
    284 	}
    285 
    286 	delete sh;
    287 	return 0;
    288 }
    289 
    290 static sb_hw_chip translate_chip(enum radeon_family rf) {
    291 	switch (rf) {
    292 
    293 #define TRANSLATE_CHIP(c) case CHIP_##c: return HW_CHIP_##c
    294 		TRANSLATE_CHIP(R600);
    295 		TRANSLATE_CHIP(RV610);
    296 		TRANSLATE_CHIP(RV630);
    297 		TRANSLATE_CHIP(RV670);
    298 		TRANSLATE_CHIP(RV620);
    299 		TRANSLATE_CHIP(RV635);
    300 		TRANSLATE_CHIP(RS780);
    301 		TRANSLATE_CHIP(RS880);
    302 		TRANSLATE_CHIP(RV770);
    303 		TRANSLATE_CHIP(RV730);
    304 		TRANSLATE_CHIP(RV710);
    305 		TRANSLATE_CHIP(RV740);
    306 		TRANSLATE_CHIP(CEDAR);
    307 		TRANSLATE_CHIP(REDWOOD);
    308 		TRANSLATE_CHIP(JUNIPER);
    309 		TRANSLATE_CHIP(CYPRESS);
    310 		TRANSLATE_CHIP(HEMLOCK);
    311 		TRANSLATE_CHIP(PALM);
    312 		TRANSLATE_CHIP(SUMO);
    313 		TRANSLATE_CHIP(SUMO2);
    314 		TRANSLATE_CHIP(BARTS);
    315 		TRANSLATE_CHIP(TURKS);
    316 		TRANSLATE_CHIP(CAICOS);
    317 		TRANSLATE_CHIP(CAYMAN);
    318 		TRANSLATE_CHIP(ARUBA);
    319 #undef TRANSLATE_CHIP
    320 
    321 		default:
    322 			assert(!"unknown chip");
    323 			return HW_CHIP_UNKNOWN;
    324 	}
    325 }
    326 
    327 static sb_hw_class translate_chip_class(enum chip_class cc) {
    328 	switch(cc) {
    329 		case R600: return HW_CLASS_R600;
    330 		case R700: return HW_CLASS_R700;
    331 		case EVERGREEN: return HW_CLASS_EVERGREEN;
    332 		case CAYMAN: return HW_CLASS_CAYMAN;
    333 
    334 		default:
    335 			assert(!"unknown chip class");
    336 			return HW_CLASS_UNKNOWN;
    337 	}
    338 }
    339