1 /* 2 * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27 #define SB_RA_SCHED_CHECK DEBUG 28 29 #include "os/os_time.h" 30 #include "r600_pipe.h" 31 #include "r600_shader.h" 32 33 #include "sb_public.h" 34 35 #include <stack> 36 #include <map> 37 38 #include "sb_bc.h" 39 #include "sb_shader.h" 40 #include "sb_pass.h" 41 #include "sb_sched.h" 42 43 using namespace r600_sb; 44 45 static sb_hw_class translate_chip_class(enum chip_class cc); 46 static sb_hw_chip translate_chip(enum radeon_family rf); 47 48 sb_context *r600_sb_context_create(struct r600_context *rctx) { 49 50 sb_context *sctx = new sb_context(); 51 52 if (sctx->init(rctx->isa, translate_chip(rctx->b.family), 53 translate_chip_class(rctx->b.chip_class))) { 54 delete sctx; 55 sctx = NULL; 56 } 57 58 unsigned df = rctx->screen->b.debug_flags; 59 60 sb_context::dump_pass = df & DBG_SB_DUMP; 61 sb_context::dump_stat = df & DBG_SB_STAT; 62 sb_context::dry_run = df & DBG_SB_DRY_RUN; 63 sb_context::no_fallback = df & DBG_SB_NO_FALLBACK; 64 sb_context::safe_math = df & DBG_SB_SAFEMATH; 65 66 sb_context::dskip_start = debug_get_num_option("R600_SB_DSKIP_START", 0); 67 sb_context::dskip_end = debug_get_num_option("R600_SB_DSKIP_END", 0); 68 sb_context::dskip_mode = debug_get_num_option("R600_SB_DSKIP_MODE", 0); 69 70 return sctx; 71 } 72 73 void r600_sb_context_destroy(void * sctx) { 74 if (sctx) { 75 sb_context *ctx = static_cast<sb_context*>(sctx); 76 77 if (sb_context::dump_stat) { 78 sblog << "\ncontext src stats: "; 79 ctx->src_stats.dump(); 80 sblog << "context opt stats: "; 81 ctx->opt_stats.dump(); 82 sblog << "context diff: "; 83 ctx->src_stats.dump_diff(ctx->opt_stats); 84 } 85 86 delete ctx; 87 } 88 } 89 90 int r600_sb_bytecode_process(struct r600_context *rctx, 91 struct r600_bytecode *bc, 92 struct r600_shader *pshader, 93 int dump_bytecode, 94 int optimize) { 95 int r = 0; 96 unsigned shader_id = bc->debug_id; 97 98 sb_context *ctx = (sb_context *)rctx->sb_context; 99 if (!ctx) { 100 rctx->sb_context = ctx = r600_sb_context_create(rctx); 101 } 102 103 int64_t time_start = 0; 104 if (sb_context::dump_stat) { 105 time_start = os_time_get_nano(); 106 } 107 108 SB_DUMP_STAT( sblog << "\nsb: shader " << shader_id << "\n"; ); 109 110 bc_parser parser(*ctx, bc, pshader); 111 112 if ((r = parser.decode())) { 113 assert(!"sb: bytecode decoding error"); 114 return r; 115 } 116 117 shader *sh = parser.get_shader(); 118 119 if (dump_bytecode) { 120 bc_dump(*sh, bc->bytecode, bc->ndw).run(); 121 } 122 123 if (!optimize) { 124 delete sh; 125 return 0; 126 } 127 128 if (sh->target != TARGET_FETCH) { 129 sh->src_stats.ndw = bc->ndw; 130 sh->collect_stats(false); 131 } 132 133 /* skip some shaders (use shaders from default backend) 134 * dskip_start - range start, dskip_end - range_end, 135 * e.g. start = 5, end = 6 means shaders 5 & 6 136 * 137 * dskip_mode == 0 - disabled, 138 * dskip_mode == 1 - don't process the shaders from the [start;end] range 139 * dskip_mode == 2 - process only the shaders from the range 140 */ 141 if (sb_context::dskip_mode) { 142 if ((sb_context::dskip_start <= shader_id && 143 shader_id <= sb_context::dskip_end) == 144 (sb_context::dskip_mode == 1)) { 145 sblog << "sb: skipped shader " << shader_id << " : " << "[" 146 << sb_context::dskip_start << "; " 147 << sb_context::dskip_end << "] mode " 148 << sb_context::dskip_mode << "\n"; 149 return 0; 150 } 151 } 152 153 if ((r = parser.prepare())) { 154 assert(!"sb: bytecode parsing error"); 155 return r; 156 } 157 158 SB_DUMP_PASS( sblog << "\n\n###### after parse\n"; sh->dump_ir(); ); 159 160 #define SB_RUN_PASS(n, dump) \ 161 do { \ 162 r = n(*sh).run(); \ 163 if (r) { \ 164 sblog << "sb: error (" << r << ") in the " << #n << " pass.\n"; \ 165 if (sb_context::no_fallback) \ 166 return r; \ 167 sblog << "sb: using unoptimized bytecode...\n"; \ 168 delete sh; \ 169 return 0; \ 170 } \ 171 if (dump) { \ 172 SB_DUMP_PASS( sblog << "\n\n###### after " << #n << "\n"; \ 173 sh->dump_ir();); \ 174 } \ 175 assert(!r); \ 176 } while (0) 177 178 SB_RUN_PASS(ssa_prepare, 0); 179 SB_RUN_PASS(ssa_rename, 1); 180 181 if (sh->has_alu_predication) 182 SB_RUN_PASS(psi_ops, 1); 183 184 SB_RUN_PASS(liveness, 0); 185 186 sh->dce_flags = DF_REMOVE_DEAD | DF_EXPAND; 187 SB_RUN_PASS(dce_cleanup, 0); 188 SB_RUN_PASS(def_use, 0); 189 190 sh->set_undef(sh->root->live_before); 191 192 // if conversion breaks the dependency tracking between CF_EMIT ops when it removes 193 // the phi nodes for SV_GEOMETRY_EMIT. Just disable it for GS 194 if (sh->target != TARGET_GS) 195 SB_RUN_PASS(if_conversion, 1); 196 197 // if_conversion breaks info about uses, but next pass (peephole) 198 // doesn't need it, so we can skip def/use update here 199 // until it's really required 200 //SB_RUN_PASS(def_use, 0); 201 202 SB_RUN_PASS(peephole, 1); 203 SB_RUN_PASS(def_use, 0); 204 205 SB_RUN_PASS(gvn, 1); 206 207 SB_RUN_PASS(def_use, 1); 208 209 sh->dce_flags = DF_REMOVE_DEAD | DF_REMOVE_UNUSED; 210 SB_RUN_PASS(dce_cleanup, 1); 211 212 SB_RUN_PASS(ra_split, 0); 213 SB_RUN_PASS(def_use, 0); 214 215 // create 'basic blocks'. it's not like we build CFG, they are just 216 // container nodes in the correct locations for code placement 217 sh->create_bbs(); 218 219 SB_RUN_PASS(gcm, 1); 220 221 sh->compute_interferences = true; 222 SB_RUN_PASS(liveness, 0); 223 224 sh->dce_flags = DF_REMOVE_DEAD; 225 SB_RUN_PASS(dce_cleanup, 1); 226 227 SB_RUN_PASS(ra_coalesce, 1); 228 SB_RUN_PASS(ra_init, 1); 229 230 SB_RUN_PASS(post_scheduler, 1); 231 232 sh->expand_bbs(); 233 234 #if SB_RA_SCHED_CHECK 235 // check code correctness after regalloc/scheduler 236 SB_RUN_PASS(ra_checker, 0); 237 #endif 238 239 SB_RUN_PASS(bc_finalizer, 0); 240 241 sh->optimized = true; 242 243 bc_builder builder(*sh); 244 245 if ((r = builder.build())) { 246 assert(0); 247 return r; 248 } 249 250 bytecode &nbc = builder.get_bytecode(); 251 252 if (dump_bytecode) { 253 bc_dump(*sh, &nbc).run(); 254 } 255 256 if (!sb_context::dry_run) { 257 258 free(bc->bytecode); 259 bc->ndw = nbc.ndw(); 260 bc->bytecode = (uint32_t*) malloc(bc->ndw << 2); 261 nbc.write_data(bc->bytecode); 262 263 bc->ngpr = sh->ngpr; 264 bc->nstack = sh->nstack; 265 } else { 266 SB_DUMP_STAT( sblog << "sb: dry run: optimized bytecode is not used\n"; ); 267 } 268 269 if (sb_context::dump_stat) { 270 int64_t t = os_time_get_nano() - time_start; 271 272 sblog << "sb: processing shader " << shader_id << " done ( " 273 << ((double)t)/1000000.0 << " ms ).\n"; 274 275 sh->opt_stats.ndw = bc->ndw; 276 sh->collect_stats(true); 277 278 sblog << "src stats: "; 279 sh->src_stats.dump(); 280 sblog << "opt stats: "; 281 sh->opt_stats.dump(); 282 sblog << "diff: "; 283 sh->src_stats.dump_diff(sh->opt_stats); 284 } 285 286 delete sh; 287 return 0; 288 } 289 290 static sb_hw_chip translate_chip(enum radeon_family rf) { 291 switch (rf) { 292 293 #define TRANSLATE_CHIP(c) case CHIP_##c: return HW_CHIP_##c 294 TRANSLATE_CHIP(R600); 295 TRANSLATE_CHIP(RV610); 296 TRANSLATE_CHIP(RV630); 297 TRANSLATE_CHIP(RV670); 298 TRANSLATE_CHIP(RV620); 299 TRANSLATE_CHIP(RV635); 300 TRANSLATE_CHIP(RS780); 301 TRANSLATE_CHIP(RS880); 302 TRANSLATE_CHIP(RV770); 303 TRANSLATE_CHIP(RV730); 304 TRANSLATE_CHIP(RV710); 305 TRANSLATE_CHIP(RV740); 306 TRANSLATE_CHIP(CEDAR); 307 TRANSLATE_CHIP(REDWOOD); 308 TRANSLATE_CHIP(JUNIPER); 309 TRANSLATE_CHIP(CYPRESS); 310 TRANSLATE_CHIP(HEMLOCK); 311 TRANSLATE_CHIP(PALM); 312 TRANSLATE_CHIP(SUMO); 313 TRANSLATE_CHIP(SUMO2); 314 TRANSLATE_CHIP(BARTS); 315 TRANSLATE_CHIP(TURKS); 316 TRANSLATE_CHIP(CAICOS); 317 TRANSLATE_CHIP(CAYMAN); 318 TRANSLATE_CHIP(ARUBA); 319 #undef TRANSLATE_CHIP 320 321 default: 322 assert(!"unknown chip"); 323 return HW_CHIP_UNKNOWN; 324 } 325 } 326 327 static sb_hw_class translate_chip_class(enum chip_class cc) { 328 switch(cc) { 329 case R600: return HW_CLASS_R600; 330 case R700: return HW_CLASS_R700; 331 case EVERGREEN: return HW_CLASS_EVERGREEN; 332 case CAYMAN: return HW_CLASS_CAYMAN; 333 334 default: 335 assert(!"unknown chip class"); 336 return HW_CLASS_UNKNOWN; 337 } 338 } 339