Home | History | Annotate | Download | only in sb
      1 /*
      2  * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *      Vadim Girlin
     25  */
     26 
     27 #ifndef SB_SCHED_H_
     28 #define SB_SCHED_H_
     29 
     30 namespace r600_sb {
     31 
     32 typedef sb_map<node*, unsigned> uc_map;
     33 
     34 // resource trackers for scheduler
     35 // rp = read port
     36 // uc = use count
     37 
     38 typedef sb_set<unsigned> kc_lines;
     39 
     40 class rp_kcache_tracker {
     41 	unsigned rp[4];
     42 	unsigned uc[4];
     43 	const unsigned sel_count;
     44 
     45 	unsigned kc_sel(sel_chan r) {
     46 		return sel_count == 4 ? (unsigned)r : ((r - 1) >> 1) + 1;
     47 	}
     48 
     49 public:
     50 	rp_kcache_tracker(shader &sh);
     51 
     52 	bool try_reserve(node *n);
     53 	void unreserve(node *n);
     54 
     55 
     56 	bool try_reserve(sel_chan r);
     57 	void unreserve(sel_chan r);
     58 
     59 	void reset();
     60 
     61 	unsigned num_sels() { return !!rp[0] + !!rp[1] + !!rp[2] + !!rp[3]; }
     62 
     63 	unsigned get_lines(kc_lines &lines);
     64 };
     65 
     66 class literal_tracker {
     67 	literal lt[4];
     68 	unsigned uc[4];
     69 
     70 public:
     71 	literal_tracker() : lt(), uc() {}
     72 
     73 	bool try_reserve(alu_node *n);
     74 	void unreserve(alu_node *n);
     75 
     76 	bool try_reserve(literal l);
     77 	void unreserve(literal l);
     78 
     79 	void reset();
     80 
     81 	unsigned count() { return !!uc[0] + !!uc[1] + !!uc[2] + !!uc[3]; }
     82 
     83 	void init_group_literals(alu_group_node *g);
     84 
     85 };
     86 
     87 class rp_gpr_tracker {
     88 	// rp[cycle][elem]
     89 	unsigned rp[3][4];
     90 	unsigned uc[3][4];
     91 
     92 public:
     93 	rp_gpr_tracker() : rp(), uc() {}
     94 
     95 	bool try_reserve(alu_node *n);
     96 	void unreserve(alu_node *n);
     97 
     98 	bool try_reserve(unsigned cycle, unsigned sel, unsigned chan);
     99 	void unreserve(unsigned cycle, unsigned sel, unsigned chan);
    100 
    101 	void reset();
    102 
    103 	void dump();
    104 };
    105 
    106 class alu_group_tracker {
    107 
    108 	shader &sh;
    109 
    110 	rp_kcache_tracker kc;
    111 	rp_gpr_tracker gpr;
    112 	literal_tracker lt;
    113 
    114 	alu_node * slots[5];
    115 
    116 	unsigned available_slots;
    117 
    118 	unsigned max_slots;
    119 
    120 	typedef std::map<value*, unsigned> value_index_map;
    121 
    122 	value_index_map vmap;
    123 
    124 	bool has_mova;
    125 	bool uses_ar;
    126 	bool has_predset;
    127 	bool has_kill;
    128 	bool updates_exec_mask;
    129 
    130 	unsigned chan_count[4];
    131 
    132 	// param index + 1 (0 means that group doesn't refer to Params)
    133 	// we can't use more than one param index in a group
    134 	unsigned interp_param;
    135 
    136 	unsigned next_id;
    137 
    138 	node_vec packed_ops;
    139 
    140 	void assign_slot(unsigned slot, alu_node *n);
    141 
    142 public:
    143 	alu_group_tracker(shader &sh);
    144 
    145 	// FIXME use fast bs correctness check (values for same chan <= 3) ??
    146 	bool try_reserve(alu_node *n);
    147 	bool try_reserve(alu_packed_node *p);
    148 
    149 	void reinit();
    150 	void reset(bool keep_packed = false);
    151 
    152 	sel_chan get_value_id(value *v);
    153 	void update_flags(alu_node *n);
    154 
    155 	alu_node* slot(unsigned i) { return slots[i]; }
    156 
    157 	unsigned used_slots() {
    158 		return (~available_slots) & ((1 << max_slots) - 1);
    159 	}
    160 
    161 	unsigned inst_count() {
    162 		return __builtin_popcount(used_slots());
    163 	}
    164 
    165 	unsigned literal_count() { return lt.count(); }
    166 	unsigned literal_slot_count() { return (literal_count() + 1) >> 1; };
    167 	unsigned slot_count() { return inst_count() + literal_slot_count(); }
    168 
    169 	alu_group_node* emit();
    170 
    171 	rp_kcache_tracker& kcache() { return kc; }
    172 
    173 	bool has_update_exec_mask() { return updates_exec_mask; }
    174 	unsigned avail_slots() { return available_slots; }
    175 
    176 	void discard_all_slots(container_node &removed_nodes);
    177 	void discard_slots(unsigned slot_mask, container_node &removed_nodes);
    178 
    179 	bool has_ar_load() { return has_mova; }
    180 };
    181 
    182 class alu_kcache_tracker {
    183 	bc_kcache kc[4];
    184 	sb_set<unsigned> lines;
    185 	unsigned max_kcs;
    186 
    187 public:
    188 
    189 	alu_kcache_tracker(sb_hw_class hc)
    190 		: kc(), lines(), max_kcs(hc >= HW_CLASS_EVERGREEN ? 4 : 2) {}
    191 
    192 	void reset();
    193 	bool try_reserve(alu_group_tracker &gt);
    194 	bool update_kc();
    195 	void init_clause(bc_cf &bc) {
    196 		memcpy(bc.kc, kc, sizeof(kc));
    197 	}
    198 };
    199 
    200 class alu_clause_tracker {
    201 	shader &sh;
    202 
    203 	alu_kcache_tracker kt;
    204 	unsigned slot_count;
    205 
    206 	alu_group_tracker grp0;
    207 	alu_group_tracker grp1;
    208 
    209 	unsigned group;
    210 
    211 	cf_node *clause;
    212 
    213 	bool push_exec_mask;
    214 
    215 public:
    216 	container_node conflict_nodes;
    217 
    218 	// current values of AR and PR registers that we have to preload
    219 	// till the end of clause (in fact, beginning, because we're scheduling
    220 	// bottom-up)
    221 	value *current_ar;
    222 	value *current_pr;
    223 	// current values of CF_IDX registers that need preloading
    224 	value *current_idx[2];
    225 
    226 	alu_clause_tracker(shader &sh);
    227 
    228 	void reset();
    229 
    230 	// current group
    231 	alu_group_tracker& grp() { return group ? grp1 : grp0; }
    232 	// previous group
    233 	alu_group_tracker& prev_grp() { return group ? grp0 : grp1; }
    234 
    235 	void emit_group();
    236 	void emit_clause(container_node *c);
    237 	bool check_clause_limits();
    238 	void new_group();
    239 	bool is_empty();
    240 
    241 	alu_node* create_ar_load(value *v, chan_select ar_channel);
    242 
    243 	void discard_current_group();
    244 
    245 	unsigned total_slots() { return slot_count; }
    246 };
    247 
    248 class post_scheduler : public pass {
    249 
    250 	container_node ready, ready_copies; // alu only
    251 	container_node pending, bb_pending;
    252 	bb_node *cur_bb;
    253 	val_set live; // values live at the end of the alu clause
    254 	uc_map ucm;
    255 	alu_clause_tracker alu;
    256 
    257 	typedef std::map<sel_chan, value*> rv_map;
    258 	rv_map regmap, prev_regmap;
    259 
    260 	val_set cleared_interf;
    261 
    262 	void emit_index_registers();
    263 public:
    264 
    265 	post_scheduler(shader &sh) : pass(sh),
    266 		ready(), ready_copies(), pending(), cur_bb(),
    267 		live(), ucm(), alu(sh),	regmap(), cleared_interf() {}
    268 
    269 	virtual int run();
    270 	void run_on(container_node *n);
    271 	void schedule_bb(bb_node *bb);
    272 
    273 	void load_index_register(value *v, unsigned idx);
    274 	void process_fetch(container_node *c);
    275 
    276 	void process_alu(container_node *c);
    277 	void schedule_alu(container_node *c);
    278 	bool prepare_alu_group();
    279 
    280 	void release_op(node *n);
    281 
    282 	void release_src_values(node *n);
    283 	void release_src_vec(vvec &vv, bool src);
    284 	void release_src_val(value *v);
    285 
    286 	void init_uc_val(container_node *c, value *v);
    287 	void init_uc_vec(container_node *c, vvec &vv, bool src);
    288 	unsigned init_ucm(container_node *c, node *n);
    289 
    290 	void init_regmap();
    291 
    292 	bool check_interferences();
    293 
    294 	unsigned try_add_instruction(node *n);
    295 
    296 	bool check_copy(node *n);
    297 	void dump_group(alu_group_tracker &rt);
    298 
    299 	bool unmap_dst(alu_node *n);
    300 	bool unmap_dst_val(value *d);
    301 
    302 	bool map_src(alu_node *n);
    303 	bool map_src_vec(vvec &vv, bool src);
    304 	bool map_src_val(value *v);
    305 
    306 	bool recolor_local(value *v);
    307 
    308 	void update_local_interferences();
    309 	void update_live_src_vec(vvec &vv, val_set *born, bool src);
    310 	void update_live_dst_vec(vvec &vv);
    311 	void update_live(node *n, val_set *born);
    312 	void process_group();
    313 
    314 	void set_color_local_val(value *v, sel_chan color);
    315 	void set_color_local(value *v, sel_chan color);
    316 
    317 	void add_interferences(value *v, sb_bitset &rb, val_set &vs);
    318 
    319 	void init_globals(val_set &s, bool prealloc);
    320 
    321 	void recolor_locals();
    322 
    323 	void dump_regmap();
    324 
    325 	void emit_load_ar();
    326 	void emit_clause();
    327 
    328 	void process_ready_copies();
    329 };
    330 
    331 } // namespace r600_sb
    332 
    333 #endif /* SB_SCHED_H_ */
    334