Home | History | Annotate | Download | only in sb
      1 /*
      2  * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *      Vadim Girlin
     25  */
     26 
     27 #ifndef SB_PASS_H_
     28 #define SB_PASS_H_
     29 
     30 #include <stack>
     31 
     32 namespace r600_sb {
     33 
     34 class pass {
     35 protected:
     36 	sb_context &ctx;
     37 	shader &sh;
     38 
     39 public:
     40 	pass(shader &s);
     41 
     42 	virtual int run();
     43 
     44 	virtual ~pass() {}
     45 };
     46 
     47 class vpass : public pass {
     48 
     49 public:
     50 
     51 	vpass(shader &s) : pass(s) {}
     52 
     53 	virtual int init();
     54 	virtual int done();
     55 
     56 	virtual int run();
     57 	virtual void run_on(container_node &n);
     58 
     59 	virtual bool visit(node &n, bool enter);
     60 	virtual bool visit(container_node &n, bool enter);
     61 	virtual bool visit(alu_group_node &n, bool enter);
     62 	virtual bool visit(cf_node &n, bool enter);
     63 	virtual bool visit(alu_node &n, bool enter);
     64 	virtual bool visit(alu_packed_node &n, bool enter);
     65 	virtual bool visit(fetch_node &n, bool enter);
     66 	virtual bool visit(region_node &n, bool enter);
     67 	virtual bool visit(repeat_node &n, bool enter);
     68 	virtual bool visit(depart_node &n, bool enter);
     69 	virtual bool visit(if_node &n, bool enter);
     70 	virtual bool visit(bb_node &n, bool enter);
     71 
     72 };
     73 
     74 class rev_vpass : public vpass {
     75 
     76 public:
     77 	rev_vpass(shader &s) : vpass(s) {}
     78 
     79 	virtual void run_on(container_node &n);
     80 };
     81 
     82 
     83 // =================== PASSES
     84 
     85 class bytecode;
     86 
     87 class bc_dump : public vpass {
     88 	using vpass::visit;
     89 
     90 	uint32_t *bc_data;
     91 	unsigned ndw;
     92 
     93 	unsigned id;
     94 
     95 	unsigned new_group, group_index;
     96 
     97 public:
     98 
     99 	bc_dump(shader &s, bytecode *bc = NULL);
    100 
    101 	bc_dump(shader &s, uint32_t *bc_ptr, unsigned ndw) :
    102 		vpass(s), bc_data(bc_ptr), ndw(ndw), id(), new_group(), group_index() {}
    103 
    104 	virtual int init();
    105 	virtual int done();
    106 
    107 	virtual bool visit(cf_node &n, bool enter);
    108 	virtual bool visit(alu_node &n, bool enter);
    109 	virtual bool visit(fetch_node &n, bool enter);
    110 
    111 	void dump_dw(unsigned dw_id, unsigned count = 2);
    112 
    113 	void dump(cf_node& n);
    114 	void dump(alu_node& n);
    115 	void dump(fetch_node& n);
    116 };
    117 
    118 
    119 class dce_cleanup : public vpass {
    120 	using vpass::visit;
    121 
    122 	bool remove_unused;
    123 
    124 public:
    125 
    126 	dce_cleanup(shader &s) : vpass(s),
    127 		remove_unused(s.dce_flags & DF_REMOVE_UNUSED), nodes_changed(false) {}
    128 
    129 	virtual int run();
    130 
    131 	virtual bool visit(node &n, bool enter);
    132 	virtual bool visit(alu_group_node &n, bool enter);
    133 	virtual bool visit(cf_node &n, bool enter);
    134 	virtual bool visit(alu_node &n, bool enter);
    135 	virtual bool visit(alu_packed_node &n, bool enter);
    136 	virtual bool visit(fetch_node &n, bool enter);
    137 	virtual bool visit(region_node &n, bool enter);
    138 	virtual bool visit(container_node &n, bool enter);
    139 
    140 private:
    141 
    142 	void cleanup_dst(node &n);
    143 	bool cleanup_dst_vec(vvec &vv);
    144 
    145 	// Did we alter/remove nodes during a single pass?
    146 	bool nodes_changed;
    147 };
    148 
    149 
    150 class def_use : public pass {
    151 
    152 public:
    153 
    154 	def_use(shader &sh) : pass(sh) {}
    155 
    156 	virtual int run();
    157 	void run_on(node *n, bool defs);
    158 
    159 private:
    160 
    161 	void process_uses(node *n);
    162 	void process_defs(node *n, vvec &vv, bool arr_def);
    163 	void process_phi(container_node *c, bool defs, bool uses);
    164 };
    165 
    166 
    167 
    168 class dump : public vpass {
    169 	using vpass::visit;
    170 
    171 	int level;
    172 
    173 public:
    174 
    175 	dump(shader &s) : vpass(s), level(0) {}
    176 
    177 	virtual bool visit(node &n, bool enter);
    178 	virtual bool visit(container_node &n, bool enter);
    179 	virtual bool visit(alu_group_node &n, bool enter);
    180 	virtual bool visit(cf_node &n, bool enter);
    181 	virtual bool visit(alu_node &n, bool enter);
    182 	virtual bool visit(alu_packed_node &n, bool enter);
    183 	virtual bool visit(fetch_node &n, bool enter);
    184 	virtual bool visit(region_node &n, bool enter);
    185 	virtual bool visit(repeat_node &n, bool enter);
    186 	virtual bool visit(depart_node &n, bool enter);
    187 	virtual bool visit(if_node &n, bool enter);
    188 	virtual bool visit(bb_node &n, bool enter);
    189 
    190 
    191 	static void dump_op(node &n, const char *name);
    192 	static void dump_vec(const vvec & vv);
    193 	static void dump_set(shader &sh, val_set & v);
    194 
    195 	static void dump_rels(vvec & vv);
    196 
    197 	static void dump_val(value *v);
    198 	static void dump_op(node *n);
    199 
    200 	static void dump_op_list(container_node *c);
    201 	static void dump_queue(sched_queue &q);
    202 
    203 	static void dump_alu(alu_node *n);
    204 
    205 private:
    206 
    207 	void indent();
    208 
    209 	void dump_common(node &n);
    210 	void dump_flags(node &n);
    211 
    212 	void dump_live_values(container_node &n, bool before);
    213 };
    214 
    215 
    216 // Global Code Motion
    217 
    218 class gcm : public pass {
    219 
    220 	sched_queue bu_ready[SQ_NUM];
    221 	sched_queue bu_ready_next[SQ_NUM];
    222 	sched_queue bu_ready_early[SQ_NUM];
    223 	sched_queue ready;
    224 	sched_queue ready_above;
    225 
    226 	container_node pending;
    227 
    228 	struct op_info {
    229 		bb_node* top_bb;
    230 		bb_node* bottom_bb;
    231 		op_info() : top_bb(), bottom_bb() {}
    232 	};
    233 
    234 	typedef std::map<node*, op_info> op_info_map;
    235 
    236 	typedef std::map<node*, unsigned> nuc_map;
    237 
    238 	op_info_map op_map;
    239 	nuc_map uses;
    240 
    241 	typedef std::vector<nuc_map> nuc_stack;
    242 
    243 	nuc_stack nuc_stk;
    244 	unsigned ucs_level;
    245 
    246 	bb_node * bu_bb;
    247 
    248 	vvec pending_defs;
    249 
    250 	node_list pending_nodes;
    251 
    252 	unsigned cur_sq;
    253 
    254 	// for register pressure tracking in bottom-up pass
    255 	val_set live;
    256 	int live_count;
    257 
    258 	static const int rp_threshold = 100;
    259 
    260 	bool pending_exec_mask_update;
    261 
    262 public:
    263 
    264 	gcm(shader &sh) : pass(sh),
    265 		bu_ready(), bu_ready_next(), bu_ready_early(),
    266 		ready(), op_map(), uses(), nuc_stk(1), ucs_level(),
    267 		bu_bb(), pending_defs(), pending_nodes(), cur_sq(),
    268 		live(), live_count(), pending_exec_mask_update() {}
    269 
    270 	virtual int run();
    271 
    272 private:
    273 
    274 	void collect_instructions(container_node *c, bool early_pass);
    275 
    276 	void sched_early(container_node *n);
    277 	void td_sched_bb(bb_node *bb);
    278 	bool td_is_ready(node *n);
    279 	void td_release_uses(vvec &v);
    280 	void td_release_val(value *v);
    281 	void td_schedule(bb_node *bb, node *n);
    282 
    283 	void sched_late(container_node *n);
    284 	void bu_sched_bb(bb_node *bb);
    285 	void bu_release_defs(vvec &v, bool src);
    286 	void bu_release_phi_defs(container_node *p, unsigned op);
    287 	bool bu_is_ready(node *n);
    288 	void bu_release_val(value *v);
    289 	void bu_release_op(node * n);
    290 	void bu_find_best_bb(node *n, op_info &oi);
    291 	void bu_schedule(container_node *bb, node *n);
    292 
    293 	void push_uc_stack();
    294 	void pop_uc_stack();
    295 
    296 	void init_def_count(nuc_map &m, container_node &s);
    297 	void init_use_count(nuc_map &m, container_node &s);
    298 	unsigned get_uc_vec(vvec &vv);
    299 	unsigned get_dc_vec(vvec &vv, bool src);
    300 
    301 	void add_ready(node *n);
    302 
    303 	void dump_uc_stack();
    304 
    305 	unsigned real_alu_count(sched_queue &q, unsigned max);
    306 
    307 	// check if we have not less than threshold ready alu instructions
    308 	bool check_alu_ready_count(unsigned threshold);
    309 };
    310 
    311 
    312 class gvn : public vpass {
    313 	using vpass::visit;
    314 
    315 public:
    316 
    317 	gvn(shader &sh) : vpass(sh) {}
    318 
    319 	virtual bool visit(node &n, bool enter);
    320 	virtual bool visit(cf_node &n, bool enter);
    321 	virtual bool visit(alu_node &n, bool enter);
    322 	virtual bool visit(alu_packed_node &n, bool enter);
    323 	virtual bool visit(fetch_node &n, bool enter);
    324 	virtual bool visit(region_node &n, bool enter);
    325 
    326 private:
    327 
    328 	void process_op(node &n, bool rewrite = true);
    329 
    330 	// returns true if the value was rewritten
    331 	bool process_src(value* &v, bool rewrite);
    332 
    333 
    334 	void process_alu_src_constants(node &n, value* &v);
    335 };
    336 
    337 
    338 class if_conversion : public pass {
    339 
    340 public:
    341 
    342 	if_conversion(shader &sh) : pass(sh) {}
    343 
    344 	virtual int run();
    345 
    346 	bool run_on(region_node *r);
    347 
    348 	void convert_kill_instructions(region_node *r, value *em, bool branch,
    349 	                               container_node *c);
    350 
    351 	bool check_and_convert(region_node *r);
    352 
    353 	alu_node* convert_phi(value *select, node *phi);
    354 
    355 };
    356 
    357 
    358 class liveness : public rev_vpass {
    359 	using vpass::visit;
    360 
    361 	val_set live;
    362 	bool live_changed;
    363 
    364 public:
    365 
    366 	liveness(shader &s) : rev_vpass(s), live_changed(false) {}
    367 
    368 	virtual int init();
    369 
    370 	virtual bool visit(node &n, bool enter);
    371 	virtual bool visit(bb_node &n, bool enter);
    372 	virtual bool visit(container_node &n, bool enter);
    373 	virtual bool visit(alu_group_node &n, bool enter);
    374 	virtual bool visit(cf_node &n, bool enter);
    375 	virtual bool visit(alu_node &n, bool enter);
    376 	virtual bool visit(alu_packed_node &n, bool enter);
    377 	virtual bool visit(fetch_node &n, bool enter);
    378 	virtual bool visit(region_node &n, bool enter);
    379 	virtual bool visit(repeat_node &n, bool enter);
    380 	virtual bool visit(depart_node &n, bool enter);
    381 	virtual bool visit(if_node &n, bool enter);
    382 
    383 private:
    384 
    385 	void update_interferences();
    386 	void process_op(node &n);
    387 
    388 	bool remove_val(value *v);
    389 	bool remove_vec(vvec &v);
    390 	bool process_outs(node& n);
    391 	void process_ins(node& n);
    392 
    393 	void process_phi_outs(container_node *phi);
    394 	void process_phi_branch(container_node *phi, unsigned id);
    395 
    396 	bool process_maydef(value *v);
    397 
    398 	bool add_vec(vvec &vv, bool src);
    399 
    400 	void update_src_vec(vvec &vv, bool src);
    401 };
    402 
    403 
    404 struct bool_op_info {
    405 	bool invert;
    406 	unsigned int_cvt;
    407 
    408 	alu_node *n;
    409 };
    410 
    411 class peephole : public pass {
    412 
    413 public:
    414 
    415 	peephole(shader &sh) : pass(sh) {}
    416 
    417 	virtual int run();
    418 
    419 	void run_on(container_node *c);
    420 
    421 	void optimize_cc_op(alu_node *a);
    422 
    423 	void optimize_cc_op2(alu_node *a);
    424 	void optimize_CNDcc_op(alu_node *a);
    425 
    426 	bool get_bool_op_info(value *b, bool_op_info& bop);
    427 	bool get_bool_flt_to_int_source(alu_node* &a);
    428 	void convert_float_setcc(alu_node *f2i, alu_node *s);
    429 };
    430 
    431 
    432 class psi_ops : public rev_vpass {
    433 	using rev_vpass::visit;
    434 
    435 public:
    436 
    437 	psi_ops(shader &s) : rev_vpass(s) {}
    438 
    439 	virtual bool visit(node &n, bool enter);
    440 	virtual bool visit(alu_node &n, bool enter);
    441 
    442 	bool try_inline(node &n);
    443 	bool try_reduce(node &n);
    444 	bool eliminate(node &n);
    445 
    446 	void unpredicate(node *n);
    447 };
    448 
    449 
    450 // check correctness of the generated code, e.g.:
    451 // - expected source operand value is the last value written to its gpr,
    452 // - all arguments of phi node should be allocated to the same gpr,
    453 // TODO other tests
    454 class ra_checker : public pass {
    455 
    456 	typedef std::map<sel_chan, value *> reg_value_map;
    457 
    458 	typedef std::vector<reg_value_map> regmap_stack;
    459 
    460 	regmap_stack rm_stack;
    461 	unsigned rm_stk_level;
    462 
    463 	value* prev_dst[5];
    464 
    465 public:
    466 
    467 	ra_checker(shader &sh) : pass(sh), rm_stk_level(0), prev_dst() {}
    468 
    469 	virtual int run();
    470 
    471 	void run_on(container_node *c);
    472 
    473 	void dump_error(const error_info &e);
    474 	void dump_all_errors();
    475 
    476 private:
    477 
    478 	reg_value_map& rmap() { return rm_stack[rm_stk_level]; }
    479 
    480 	void push_stack();
    481 	void pop_stack();
    482 
    483 	// when going out of the alu clause, values in the clause temporary gprs,
    484 	// AR, predicate values, PS/PV are destroyed
    485 	void kill_alu_only_regs();
    486 	void error(node *n, unsigned id, std::string msg);
    487 
    488 	void check_phi_src(container_node *p, unsigned id);
    489 	void process_phi_dst(container_node *p);
    490 	void check_alu_group(alu_group_node *g);
    491 	void process_op_dst(node *n);
    492 	void check_op_src(node *n);
    493 	void check_src_vec(node *n, unsigned id, vvec &vv, bool src);
    494 	void check_value_gpr(node *n, unsigned id, value *v);
    495 };
    496 
    497 // =======================================
    498 
    499 
    500 class ra_coalesce : public pass {
    501 
    502 public:
    503 
    504 	ra_coalesce(shader &sh) : pass(sh) {}
    505 
    506 	virtual int run();
    507 };
    508 
    509 
    510 
    511 // =======================================
    512 
    513 class ra_init : public pass {
    514 
    515 public:
    516 
    517 	ra_init(shader &sh) : pass(sh), prev_chans() {
    518 
    519 		// The parameter below affects register channels distribution.
    520 		// For cayman (VLIW-4) we're trying to distribute the channels
    521 		// uniformly, this means significantly better alu slots utilization
    522 		// at the expense of higher gpr usage. Hopefully this will improve
    523 		// performance, though it has to be proven with real benchmarks yet.
    524 		// For VLIW-5 this method could also slightly improve slots
    525 		// utilization, but increased register pressure seems more significant
    526 		// and overall performance effect is negative according to some
    527 		// benchmarks, so it's not used currently. Basically, VLIW-5 doesn't
    528 		// really need it because trans slot (unrestricted by register write
    529 		// channel) allows to consume most deviations from uniform channel
    530 		// distribution.
    531 		// Value 3 means that for new allocation we'll use channel that differs
    532 		// from 3 last used channels. 0 for VLIW-5 effectively turns this off.
    533 
    534 		ra_tune = sh.get_ctx().is_cayman() ? 3 : 0;
    535 	}
    536 
    537 	virtual int run();
    538 
    539 private:
    540 
    541 	unsigned prev_chans;
    542 	unsigned ra_tune;
    543 
    544 	void add_prev_chan(unsigned chan);
    545 	unsigned get_preferable_chan_mask();
    546 
    547 	void ra_node(container_node *c);
    548 	void process_op(node *n);
    549 
    550 	void color(value *v);
    551 
    552 	void color_bs_constraint(ra_constraint *c);
    553 
    554 	void assign_color(value *v, sel_chan c);
    555 	void alloc_arrays();
    556 };
    557 
    558 // =======================================
    559 
    560 class ra_split : public pass {
    561 
    562 public:
    563 
    564 	ra_split(shader &sh) : pass(sh) {}
    565 
    566 	virtual int run();
    567 
    568 	void split(container_node *n);
    569 	void split_op(node *n);
    570 	void split_alu_packed(alu_packed_node *n);
    571 	void split_vector_inst(node *n);
    572 
    573 	void split_packed_ins(alu_packed_node *n);
    574 
    575 #if 0
    576 	void split_pinned_outs(node *n);
    577 #endif
    578 
    579 	void split_vec(vvec &vv, vvec &v1, vvec &v2, bool allow_swz);
    580 
    581 	void split_phi_src(container_node *loc, container_node *c, unsigned id,
    582 	                   bool loop);
    583 	void split_phi_dst(node *loc, container_node *c, bool loop);
    584 	void init_phi_constraints(container_node *c);
    585 };
    586 
    587 
    588 
    589 class ssa_prepare : public vpass {
    590 	using vpass::visit;
    591 
    592 	typedef std::vector<val_set> vd_stk;
    593 	vd_stk stk;
    594 
    595 	unsigned level;
    596 
    597 public:
    598 	ssa_prepare(shader &s) : vpass(s), level(0) {}
    599 
    600 	virtual bool visit(cf_node &n, bool enter);
    601 	virtual bool visit(alu_node &n, bool enter);
    602 	virtual bool visit(fetch_node &n, bool enter);
    603 	virtual bool visit(region_node &n, bool enter);
    604 	virtual bool visit(repeat_node &n, bool enter);
    605 	virtual bool visit(depart_node &n, bool enter);
    606 
    607 private:
    608 
    609 	void push_stk() {
    610 		++level;
    611 		if (level + 1 > stk.size())
    612 			stk.resize(level+1);
    613 		else
    614 			stk[level].clear();
    615 	}
    616 	void pop_stk() {
    617 		assert(level);
    618 		--level;
    619 		stk[level].add_set(stk[level + 1]);
    620 	}
    621 
    622 	void add_defs(node &n);
    623 
    624 	val_set & cur_set() { return stk[level]; }
    625 
    626 	container_node* create_phi_nodes(int count);
    627 };
    628 
    629 class ssa_rename : public vpass {
    630 	using vpass::visit;
    631 
    632 	typedef sb_map<value*, unsigned> def_map;
    633 
    634 	def_map def_count;
    635 	std::stack<def_map> rename_stack;
    636 
    637 	typedef std::map<uint32_t, value*> val_map;
    638 	val_map values;
    639 
    640 public:
    641 
    642 	ssa_rename(shader &s) : vpass(s) {}
    643 
    644 	virtual int init();
    645 
    646 	virtual bool visit(container_node &n, bool enter);
    647 	virtual bool visit(node &n, bool enter);
    648 	virtual bool visit(alu_group_node &n, bool enter);
    649 	virtual bool visit(cf_node &n, bool enter);
    650 	virtual bool visit(alu_node &n, bool enter);
    651 	virtual bool visit(alu_packed_node &n, bool enter);
    652 	virtual bool visit(fetch_node &n, bool enter);
    653 	virtual bool visit(region_node &n, bool enter);
    654 	virtual bool visit(repeat_node &n, bool enter);
    655 	virtual bool visit(depart_node &n, bool enter);
    656 	virtual bool visit(if_node &n, bool enter);
    657 
    658 private:
    659 
    660 	void push(node *phi);
    661 	void pop();
    662 
    663 	unsigned get_index(def_map& m, value* v);
    664 	void set_index(def_map& m, value* v, unsigned index);
    665 	unsigned new_index(def_map& m, value* v);
    666 
    667 	value* rename_use(node *n, value* v);
    668 	value* rename_def(node *def, value* v);
    669 
    670 	void rename_src_vec(node *n, vvec &vv, bool src);
    671 	void rename_dst_vec(node *def, vvec &vv, bool set_def);
    672 
    673 	void rename_src(node *n);
    674 	void rename_dst(node *n);
    675 
    676 	void rename_phi_args(container_node *phi, unsigned op, bool def);
    677 
    678 	void rename_virt(node *n);
    679 	void rename_virt_val(node *n, value *v);
    680 };
    681 
    682 class bc_finalizer : public pass {
    683 
    684 	cf_node *last_export[EXP_TYPE_COUNT];
    685 	cf_node *last_cf;
    686 
    687 	unsigned ngpr;
    688 	unsigned nstack;
    689 
    690 public:
    691 
    692 	bc_finalizer(shader &sh) : pass(sh), last_export(), last_cf(), ngpr(),
    693 		nstack() {}
    694 
    695 	virtual int run();
    696 
    697 	void finalize_loop(region_node *r);
    698 	void finalize_if(region_node *r);
    699 
    700 	void run_on(container_node *c);
    701 
    702 	void insert_rv6xx_load_ar_workaround(alu_group_node *b4);
    703 	void finalize_alu_group(alu_group_node *g, node *prev_node);
    704 	bool finalize_alu_src(alu_group_node *g, alu_node *a, alu_group_node *prev_node);
    705 
    706 	void emit_set_grad(fetch_node* f);
    707 	void finalize_fetch(fetch_node *f);
    708 
    709 	void finalize_cf(cf_node *c);
    710 
    711 	sel_chan translate_kcache(cf_node *alu, value *v);
    712 
    713 	void update_ngpr(unsigned gpr);
    714 	void update_nstack(region_node *r, unsigned add = 0);
    715 
    716 	unsigned get_stack_depth(node *n, unsigned &loops, unsigned &ifs,
    717 	                         unsigned add = 0);
    718 
    719 	void cf_peephole();
    720 
    721 private:
    722 	void copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start);
    723 	void emit_set_texture_offsets(fetch_node &f);
    724 };
    725 
    726 
    727 } // namespace r600_sb
    728 
    729 #endif /* SB_PASS_H_ */
    730