Home | History | Annotate | Download | only in sb
      1 /*
      2  * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *      Vadim Girlin
     25  */
     26 
     27 #ifndef SB_SHADER_H_
     28 #define SB_SHADER_H_
     29 
     30 #include <list>
     31 #include <string>
     32 #include <map>
     33 
     34 #include "sb_ir.h"
     35 #include "sb_expr.h"
     36 
     37 namespace r600_sb {
     38 
     39 struct shader_input {
     40 	unsigned comp_mask;
     41 	unsigned preloaded;
     42 };
     43 
     44 struct error_info {
     45 	node *n;
     46 	unsigned arg_index;
     47 	std::string message;
     48 };
     49 
     50 typedef std::multimap<node*, error_info> error_map;
     51 
     52 class sb_context;
     53 
     54 typedef std::vector<shader_input> inputs_vec;
     55 typedef std::vector<gpr_array*> gpr_array_vec;
     56 
     57 struct ra_edge {
     58 	value *a, *b;
     59 	unsigned cost;
     60 
     61 	ra_edge(value *a, value *b, unsigned cost) : a(a), b(b), cost(cost) {}
     62 };
     63 
     64 enum chunk_flags {
     65 	RCF_GLOBAL = (1 << 0),
     66 	RCF_PIN_CHAN = (1 << 1),
     67 	RCF_PIN_REG = (1 << 2),
     68 
     69 	RCF_FIXED = (1 << 3),
     70 
     71 	RCF_PREALLOC = (1 << 4)
     72 };
     73 
     74 enum dce_flags {
     75 	DF_REMOVE_DEAD  = (1 << 0),
     76 	DF_REMOVE_UNUSED = (1 << 1),
     77 	DF_EXPAND = (1 << 2),
     78 };
     79 
     80 inline dce_flags operator |(dce_flags l, dce_flags r) {
     81 	return (dce_flags)((unsigned)l|(unsigned)r);
     82 }
     83 
     84 inline chunk_flags operator |(chunk_flags l, chunk_flags r) {
     85 	return (chunk_flags)((unsigned)l|(unsigned)r);
     86 }
     87 inline chunk_flags& operator |=(chunk_flags &l, chunk_flags r) {
     88 	l = l | r;
     89 	return l;
     90 }
     91 
     92 inline chunk_flags& operator &=(chunk_flags &l, chunk_flags r) {
     93 	l = (chunk_flags)((unsigned)l & (unsigned)r);
     94 	return l;
     95 }
     96 
     97 inline chunk_flags operator ~(chunk_flags r) {
     98 	return (chunk_flags)~(unsigned)r;
     99 }
    100 
    101 struct ra_chunk {
    102 	vvec values;
    103 	chunk_flags flags;
    104 	unsigned cost;
    105 	sel_chan pin;
    106 
    107 	ra_chunk() : values(), flags(), cost(), pin() {}
    108 
    109 	bool is_fixed() { return flags & RCF_FIXED; }
    110 	void fix() { flags |= RCF_FIXED; }
    111 
    112 	bool is_global() { return flags & RCF_GLOBAL; }
    113 	void set_global() {	flags |= RCF_GLOBAL; }
    114 
    115 	bool is_reg_pinned() { return flags & RCF_PIN_REG; }
    116 	bool is_chan_pinned() { return flags & RCF_PIN_CHAN; }
    117 
    118 	bool is_prealloc() { return flags & RCF_PREALLOC; }
    119 	void set_prealloc() { flags |= RCF_PREALLOC; }
    120 };
    121 
    122 typedef std::vector<ra_chunk*> chunk_vector;
    123 
    124 class ra_constraint {
    125 public:
    126 	ra_constraint(constraint_kind kind) : kind(kind), cost(0) {}
    127 
    128 	constraint_kind kind;
    129 	vvec values;
    130 	unsigned cost;
    131 
    132 	void update_values();
    133 	bool check();
    134 };
    135 
    136 typedef std::vector<ra_constraint*> constraint_vec;
    137 typedef std::vector<ra_chunk*> chunk_vec;
    138 
    139 // priority queue
    140 // FIXME use something more suitale or custom class ?
    141 
    142 template <class T>
    143 struct cost_compare {
    144 	bool operator ()(const T& t1, const T& t2) {
    145 		return t1->cost > t2->cost;
    146 	}
    147 };
    148 
    149 template <class T, class Comp>
    150 class queue {
    151 	typedef std::vector<T> container;
    152 	container cont;
    153 
    154 public:
    155 	queue() : cont() {}
    156 
    157 	typedef typename container::iterator iterator;
    158 
    159 	iterator begin() { return cont.begin(); }
    160 	iterator end() { return cont.end(); }
    161 
    162 	iterator insert(const T& t) {
    163 		iterator I = std::upper_bound(begin(), end(), t, Comp());
    164 		if (I == end())
    165 			cont.push_back(t);
    166 		else
    167 			cont.insert(I, t);
    168 
    169 		return I;
    170 	}
    171 
    172 	void erase(const T& t) {
    173 		std::pair<iterator, iterator> R =
    174 				std::equal_range(begin(), end(), t, Comp());
    175 		iterator F = std::find(R.first, R.second, t);
    176 		if (F != R.second)
    177 			cont.erase(F);
    178 	}
    179 };
    180 
    181 typedef queue<ra_chunk*, cost_compare<ra_chunk*> > chunk_queue;
    182 typedef queue<ra_edge*, cost_compare<ra_edge*> > edge_queue;
    183 typedef queue<ra_constraint*, cost_compare<ra_constraint*> > constraint_queue;
    184 
    185 typedef std::set<ra_chunk*> chunk_set;
    186 
    187 class shader;
    188 
    189 class coalescer {
    190 
    191 	shader &sh;
    192 
    193 	edge_queue edges;
    194 	chunk_queue chunks;
    195 	constraint_queue constraints;
    196 
    197 	constraint_vec all_constraints;
    198 	chunk_vec all_chunks;
    199 
    200 public:
    201 
    202 	coalescer(shader &sh) : sh(sh), edges(), chunks(), constraints() {}
    203 	~coalescer();
    204 
    205 	int run();
    206 
    207 	void add_edge(value *a, value *b, unsigned cost);
    208 	void build_chunks();
    209 	void build_constraint_queue();
    210 	void build_chunk_queue();
    211 	int color_constraints();
    212 	void color_chunks();
    213 
    214 	ra_constraint* create_constraint(constraint_kind kind);
    215 
    216 	enum ac_cost {
    217 		phi_cost = 10000,
    218 		copy_cost = 1,
    219 	};
    220 
    221 	void dump_edges();
    222 	void dump_chunks();
    223 	void dump_constraint_queue();
    224 
    225 	static void dump_chunk(ra_chunk *c);
    226 	static void dump_constraint(ra_constraint* c);
    227 
    228 	void get_chunk_interferences(ra_chunk *c, val_set &s);
    229 
    230 private:
    231 
    232 	void create_chunk(value *v);
    233 	void unify_chunks(ra_edge *e);
    234 	bool chunks_interference(ra_chunk *c1, ra_chunk *c2);
    235 
    236 	int color_reg_constraint(ra_constraint *c);
    237 	void color_phi_constraint(ra_constraint *c);
    238 
    239 
    240 	void init_reg_bitset(sb_bitset &bs, val_set &vs);
    241 
    242 	void color_chunk(ra_chunk *c, sel_chan color);
    243 
    244 	ra_chunk* detach_value(value *v);
    245 };
    246 
    247 
    248 
    249 class shader {
    250 
    251 	sb_context &ctx;
    252 
    253 	typedef sb_map<uint32_t, value*> value_map;
    254 	value_map reg_values;
    255 
    256 	// read-only values
    257 	value_map const_values; // immediate constants key -const  value (uint32_t)
    258 	value_map special_ro_values; //  key - hw alu_sel & chan
    259 	value_map kcache_values;
    260 
    261 	gpr_array_vec gpr_arrays;
    262 
    263 	unsigned next_temp_value_index;
    264 
    265 	unsigned prep_regs_count;
    266 
    267 	value* pred_sels[2];
    268 
    269 	regions_vec regions;
    270 	inputs_vec inputs;
    271 
    272 	value *undef;
    273 
    274 	sb_value_pool val_pool;
    275 	sb_pool pool;
    276 
    277 	std::vector<node*> all_nodes;
    278 
    279 public:
    280 	shader_stats src_stats, opt_stats;
    281 
    282 	error_map errors;
    283 
    284 	bool optimized;
    285 
    286 	unsigned id;
    287 
    288 	coalescer coal;
    289 
    290 	static const unsigned temp_regid_offset = 512;
    291 
    292 	bbs_vec bbs;
    293 
    294 	const shader_target target;
    295 
    296 	value_table vt;
    297 	expr_handler ex;
    298 
    299 	container_node *root;
    300 
    301 	bool compute_interferences;
    302 
    303 	bool has_alu_predication;
    304 	bool uses_gradients;
    305 
    306 	bool safe_math;
    307 
    308 	unsigned ngpr, nstack;
    309 
    310 	unsigned dce_flags;
    311 
    312 	shader(sb_context &sctx, shader_target t, unsigned id);
    313 
    314 	~shader();
    315 
    316 	sb_context &get_ctx() const { return ctx; }
    317 
    318 	value* get_const_value(const literal & v);
    319 	value* get_special_value(unsigned sv_id, unsigned version = 0);
    320 	value* create_temp_value();
    321 	value* get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel,
    322                          unsigned version = 0);
    323 
    324 
    325 	value* get_special_ro_value(unsigned sel);
    326 	value* get_kcache_value(unsigned bank, unsigned index, unsigned chan, alu_kcache_index_mode index_mode);
    327 
    328 	value* get_value_version(value* v, unsigned ver);
    329 
    330 	void init();
    331 	void add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask, bool src);
    332 
    333 	void dump_ir();
    334 
    335 	void add_gpr_array(unsigned gpr_start, unsigned gpr_count,
    336 	                   unsigned comp_mask);
    337 
    338 	value* get_pred_sel(int sel);
    339 	bool assign_slot(alu_node *n, alu_node *slots[5]);
    340 
    341 	gpr_array* get_gpr_array(unsigned reg, unsigned chan);
    342 
    343 	void add_input(unsigned gpr, bool preloaded = false,
    344 	               unsigned comp_mask = 0xF);
    345 
    346 	const inputs_vec & get_inputs() {return inputs; }
    347 
    348 	regions_vec & get_regions() { return regions; }
    349 
    350 	void init_call_fs(cf_node *cf);
    351 
    352 	value *get_undef_value();
    353 	void set_undef(val_set &s);
    354 
    355 	node* create_node(node_type nt, node_subtype nst,
    356 	                  node_flags flags = NF_EMPTY);
    357 	alu_node* create_alu();
    358 	alu_group_node* create_alu_group();
    359 	alu_packed_node* create_alu_packed();
    360 	cf_node* create_cf();
    361 	cf_node* create_cf(unsigned op);
    362 	fetch_node* create_fetch();
    363 	region_node* create_region();
    364 	depart_node* create_depart(region_node *target);
    365 	repeat_node* create_repeat(region_node *target);
    366 	container_node* create_container(node_type nt = NT_LIST,
    367 	                                 node_subtype nst = NST_LIST,
    368 	                                 node_flags flags = NF_EMPTY);
    369 	if_node* create_if();
    370 	bb_node* create_bb(unsigned id, unsigned loop_level);
    371 
    372 	value* get_value_by_uid(unsigned id) { return val_pool[id - 1]; }
    373 
    374 	cf_node* create_clause(node_subtype nst);
    375 
    376 	void create_bbs();
    377 	void expand_bbs();
    378 
    379 	alu_node* create_mov(value* dst, value* src);
    380 	alu_node* create_copy_mov(value *dst, value *src, unsigned affcost = 1);
    381 
    382 	const char * get_shader_target_name();
    383 
    384 	std::string get_full_target_name();
    385 
    386 	void create_bbs(container_node* n, bbs_vec &bbs, int loop_level = 0);
    387 	void expand_bbs(bbs_vec &bbs);
    388 
    389 	sched_queue_id get_queue_id(node* n);
    390 
    391 	void simplify_dep_rep(node *dr);
    392 
    393 	unsigned first_temp_gpr();
    394 	unsigned num_nontemp_gpr();
    395 
    396 	gpr_array_vec& arrays() { return gpr_arrays; }
    397 
    398 	void set_uses_kill();
    399 
    400 	void fill_array_values(gpr_array *a, vvec &vv);
    401 
    402 	alu_node* clone(alu_node *n);
    403 
    404 	sb_value_pool& get_value_pool() { return val_pool; }
    405 
    406 	void collect_stats(bool opt);
    407 
    408 private:
    409 	value* create_value(value_kind k, sel_chan regid, unsigned ver);
    410 	value* get_value(value_kind kind, sel_chan id,
    411 	                         unsigned version = 0);
    412 	value* get_ro_value(value_map &vm, value_kind vk, unsigned key);
    413 };
    414 
    415 }
    416 
    417 #endif /* SHADER_H_ */
    418