Home | History | Annotate | Download | only in sb
      1 /*
      2  * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *      Vadim Girlin
     25  */
     26 
     27 #ifndef R600_SB_IR_H_
     28 #define R600_SB_IR_H_
     29 
     30 #include <algorithm>
     31 #include <stdint.h>
     32 #include <vector>
     33 #include <set>
     34 #include <algorithm>
     35 
     36 #include "sb_bc.h"
     37 
     38 namespace r600_sb {
     39 
     40 enum special_regs {
     41 	SV_ALU_PRED = 128,
     42 	SV_EXEC_MASK,
     43 	SV_AR_INDEX,
     44 	SV_VALID_MASK,
     45 	SV_GEOMETRY_EMIT,
     46 	SV_LDS_RW,
     47 	SV_LDS_OQA,
     48 	SV_LDS_OQB,
     49 };
     50 
     51 class node;
     52 class value;
     53 class shader;
     54 
     55 struct sel_chan
     56 {
     57 	unsigned id;
     58 
     59 	sel_chan(unsigned id = 0) : id(id) {}
     60 	sel_chan(unsigned sel, unsigned chan) : id(((sel << 2) | chan) + 1) {}
     61 
     62 	unsigned sel() const { return sel(id); }
     63 	unsigned chan() const {return chan(id); }
     64 	operator unsigned() const {return id;}
     65 
     66 	static unsigned sel(unsigned idx) { return (idx-1) >> 2; }
     67 	static unsigned chan(unsigned idx) { return (idx-1) & 3; }
     68 
     69 	sel_chan(unsigned bank, unsigned index,
     70 			 unsigned chan, alu_kcache_index_mode index_mode)
     71 		: id(sel_chan((bank << 12) | index | ((unsigned)index_mode << 28), chan).id) {}
     72 	unsigned kcache_index_mode() const { return sel() >> 28; }
     73 	unsigned kcache_sel() const { return sel() & 0x0fffffffu; }
     74 	unsigned kcache_bank() const { return kcache_sel() >> 12; }
     75 };
     76 
     77 inline sb_ostream& operator <<(sb_ostream& o, sel_chan r) {
     78 	static const char * ch = "xyzw";
     79 	o << r.sel() << "." << ch[r.chan()];
     80 	return o;
     81 }
     82 
     83 typedef std::vector<value*>  vvec;
     84 
     85 class sb_pool {
     86 protected:
     87 	static const unsigned SB_POOL_ALIGN = 8;
     88 	static const unsigned SB_POOL_DEFAULT_BLOCK_SIZE = (1 << 16);
     89 
     90 	typedef std::vector<void*> block_vector;
     91 
     92 	unsigned block_size;
     93 	block_vector blocks;
     94 	unsigned total_size;
     95 
     96 public:
     97 	sb_pool(unsigned block_size = SB_POOL_DEFAULT_BLOCK_SIZE)
     98 		: block_size(block_size), blocks(), total_size() {}
     99 
    100 	virtual ~sb_pool() { free_all(); }
    101 
    102 	void* allocate(unsigned sz);
    103 
    104 protected:
    105 	void free_all();
    106 };
    107 
    108 template <typename V, typename Comp = std::less<V> >
    109 class sb_set {
    110 	typedef std::vector<V> data_vector;
    111 	data_vector vec;
    112 public:
    113 
    114 	typedef typename data_vector::iterator iterator;
    115 	typedef typename data_vector::const_iterator const_iterator;
    116 
    117 	sb_set() : vec() {}
    118 	~sb_set() {  }
    119 
    120 	iterator begin() { return vec.begin(); }
    121 	iterator end() { return vec.end(); }
    122 	const_iterator begin() const { return vec.begin(); }
    123 	const_iterator end() const { return vec.end(); }
    124 
    125 	void add_set(const sb_set& s) {
    126 		data_vector t;
    127 		t.reserve(vec.size() + s.vec.size());
    128 		std::set_union(vec.begin(), vec.end(), s.vec.begin(), s.vec.end(),
    129 		          std::inserter(t, t.begin()), Comp());
    130 		vec.swap(t);
    131 	}
    132 
    133 	iterator lower_bound(const V& v) {
    134 		return std::lower_bound(vec.begin(), vec.end(), v, Comp());
    135 	}
    136 
    137 	std::pair<iterator, bool> insert(const V& v) {
    138 		iterator P = lower_bound(v);
    139 		if (P != vec.end() && is_equal(*P, v))
    140 			return std::make_pair(P, false);
    141 		return std::make_pair(vec.insert(P, v), true);
    142 	}
    143 
    144 	unsigned erase(const V&  v) {
    145 		iterator P = lower_bound(v);
    146 		if (P == vec.end() || !is_equal(*P, v))
    147 			return 0;
    148 		vec.erase(P);
    149 		return 1;
    150 	}
    151 
    152 	void clear() { vec.clear(); }
    153 
    154 	bool empty() { return vec.empty(); }
    155 
    156 	bool is_equal(const V& v1, const V& v2) {
    157 		return !Comp()(v1, v2) && !Comp()(v2, v1);
    158 	}
    159 
    160 	iterator find(const V& v) {
    161 		iterator P = lower_bound(v);
    162 		return (P != vec.end() && is_equal(*P, v)) ? P : vec.end();
    163 	}
    164 
    165 	unsigned size() { return vec.size(); }
    166 	void erase(iterator I) { vec.erase(I); }
    167 };
    168 
    169 template <typename K, typename V, typename KComp = std::less<K> >
    170 class sb_map {
    171 	typedef std::pair<K, V> datatype;
    172 
    173 	struct Comp {
    174 		bool operator()(const datatype &v1, const datatype &v2) {
    175 			return KComp()(v1.first, v2.first);
    176 		}
    177 	};
    178 
    179 	typedef sb_set<datatype, Comp> dataset;
    180 
    181 	dataset set;
    182 
    183 public:
    184 
    185 	sb_map() : set() {}
    186 
    187 	typedef typename dataset::iterator iterator;
    188 
    189 	iterator begin() { return set.begin(); }
    190 	iterator end() { return set.end(); }
    191 
    192 	void clear() { set.clear(); }
    193 
    194 	V& operator[](const K& key) {
    195 		datatype P = std::make_pair(key, V());
    196 		iterator F = set.find(P);
    197 		if (F == set.end()) {
    198 			return (*(set.insert(P).first)).second;
    199 		} else {
    200 			return (*F).second;
    201 		}
    202 	}
    203 
    204 	std::pair<iterator, bool> insert(const datatype& d) {
    205 		return set.insert(d);
    206 	}
    207 
    208 	iterator find(const K& key) {
    209 		return set.find(std::make_pair(key, V()));
    210 	}
    211 
    212 	unsigned erase(const K& key) {
    213 		return set.erase(std::make_pair(key, V()));
    214 	}
    215 
    216 	void erase(iterator I) {
    217 		set.erase(I);
    218 	}
    219 };
    220 
    221 class sb_bitset {
    222 	typedef uint32_t basetype;
    223 	static const unsigned bt_bits = sizeof(basetype) << 3;
    224 	std::vector<basetype> data;
    225 	unsigned bit_size;
    226 
    227 public:
    228 
    229 	sb_bitset() : data(), bit_size() {}
    230 
    231 	bool get(unsigned id);
    232 	void set(unsigned id, bool bit = true);
    233 	bool set_chk(unsigned id, bool bit = true);
    234 
    235 	void clear();
    236 	void resize(unsigned size);
    237 
    238 	unsigned size() { return bit_size; }
    239 
    240 	unsigned find_bit(unsigned start = 0);
    241 
    242 	void swap(sb_bitset & bs2);
    243 
    244 	bool operator==(const sb_bitset &bs2);
    245 	bool operator!=(const sb_bitset &bs2) { return !(*this == bs2); }
    246 
    247 	sb_bitset& operator|=(const sb_bitset &bs2) {
    248 		if (bit_size < bs2.bit_size) {
    249 			resize(bs2.bit_size);
    250 		}
    251 
    252 		for (unsigned i = 0, c = std::min(data.size(), bs2.data.size()); i < c;
    253 				++i) {
    254 			data[i] |= bs2.data[i];
    255 		}
    256 		return *this;
    257 	}
    258 
    259 	sb_bitset& operator&=(const sb_bitset &bs2);
    260 	sb_bitset& mask(const sb_bitset &bs2);
    261 
    262 	friend sb_bitset operator|(const sb_bitset &b1, const sb_bitset &b2) {
    263 			sb_bitset nbs(b1);
    264 			nbs |= b2;
    265 			return nbs;
    266 	}
    267 };
    268 
    269 enum value_kind {
    270 	VLK_REG,
    271 	VLK_REL_REG,
    272 	VLK_SPECIAL_REG,
    273 	VLK_TEMP,
    274 
    275 	VLK_CONST,
    276 	VLK_KCACHE,
    277 	VLK_PARAM,
    278 	VLK_SPECIAL_CONST,
    279 
    280 	VLK_UNDEF
    281 };
    282 
    283 
    284 
    285 class sb_value_pool : protected sb_pool {
    286 	unsigned aligned_elt_size;
    287 
    288 public:
    289 	sb_value_pool(unsigned elt_size, unsigned block_elts = 256)
    290 		: sb_pool(block_elts * (aligned_elt_size = ((elt_size +
    291 				SB_POOL_ALIGN - 1) & ~(SB_POOL_ALIGN - 1)))) {}
    292 
    293 	virtual ~sb_value_pool() { delete_all(); }
    294 
    295 	value* create(value_kind k, sel_chan regid, unsigned ver);
    296 
    297 	value* operator[](unsigned id) {
    298 		unsigned offset = id * aligned_elt_size;
    299 		unsigned block_id;
    300 		if (offset < block_size) {
    301 			block_id = 0;
    302 		} else {
    303 			block_id = offset / block_size;
    304 			offset = offset % block_size;
    305 		}
    306 		return (value*)((char*)blocks[block_id] + offset);
    307 	}
    308 
    309 	unsigned size() { return total_size / aligned_elt_size; }
    310 
    311 protected:
    312 	void delete_all();
    313 };
    314 
    315 
    316 
    317 
    318 
    319 class sb_value_set {
    320 
    321 	sb_bitset bs;
    322 
    323 public:
    324 	sb_value_set() : bs() {}
    325 
    326 	class iterator {
    327 		sb_value_pool &vp;
    328 		sb_value_set *s;
    329 		unsigned nb;
    330 	public:
    331 		iterator(shader &sh, sb_value_set *s, unsigned nb = 0);
    332 
    333 
    334 		iterator& operator++() {
    335 			if (nb + 1 < s->bs.size())
    336 				nb = s->bs.find_bit(nb + 1);
    337 			else
    338 				nb = s->bs.size();
    339 			return *this;
    340 		}
    341 		bool operator !=(const iterator &i) {
    342 			return s != i.s || nb != i.nb;
    343 		}
    344 		bool operator ==(const iterator &i) { return !(*this != i); }
    345 		value* operator *() {
    346 			 return vp[nb];
    347 		}
    348 
    349 
    350 	};
    351 
    352 	iterator begin(shader &sh) {
    353 		return iterator(sh, this, bs.size() ? bs.find_bit(0) : 0);
    354 	}
    355 	iterator end(shader &sh) { return iterator(sh, this, bs.size()); }
    356 
    357 	bool add_set_checked(sb_value_set & s2);
    358 
    359 	void add_set(sb_value_set & s2)  {
    360 		if (bs.size() < s2.bs.size())
    361 			bs.resize(s2.bs.size());
    362 		bs |= s2.bs;
    363 	}
    364 
    365 	void remove_set(sb_value_set & s2);
    366 
    367 	bool add_vec(vvec &vv);
    368 
    369 	bool add_val(value *v);
    370 	bool contains(value *v);
    371 
    372 	bool remove_val(value *v);
    373 
    374 	bool remove_vec(vvec &vv);
    375 
    376 	void clear();
    377 
    378 	bool empty();
    379 };
    380 
    381 typedef sb_value_set val_set;
    382 
    383 struct gpr_array {
    384 	sel_chan base_gpr; // original gpr
    385 	sel_chan gpr; // assigned by regalloc
    386 	unsigned array_size;
    387 
    388 	gpr_array(sel_chan base_gpr, unsigned array_size) : base_gpr(base_gpr),
    389 			array_size(array_size) {}
    390 
    391 	unsigned hash() { return (base_gpr << 10) * array_size; }
    392 
    393 	val_set interferences;
    394 	vvec refs;
    395 
    396 	bool is_dead();
    397 
    398 };
    399 
    400 typedef std::vector<gpr_array*> regarray_vec;
    401 
    402 enum value_flags {
    403 	VLF_UNDEF = (1 << 0),
    404 	VLF_READONLY = (1 << 1),
    405 	VLF_DEAD = (1 << 2),
    406 
    407 	VLF_PIN_REG = (1 << 3),
    408 	VLF_PIN_CHAN = (1 << 4),
    409 
    410 	// opposite to alu clause local value - goes through alu clause boundary
    411 	// (can't use temp gpr, can't recolor in the alu scheduler, etc)
    412 	VLF_GLOBAL = (1 << 5),
    413 	VLF_FIXED = (1 << 6),
    414 	VLF_PVPS = (1 << 7),
    415 
    416 	VLF_PREALLOC = (1 << 8)
    417 };
    418 
    419 inline value_flags operator |(value_flags l, value_flags r) {
    420 	return (value_flags)((unsigned)l|(unsigned)r);
    421 }
    422 inline value_flags operator &(value_flags l, value_flags r) {
    423 	return (value_flags)((unsigned)l&(unsigned)r);
    424 }
    425 inline value_flags operator ~(value_flags l) {
    426 	return (value_flags)(~(unsigned)l);
    427 }
    428 inline value_flags& operator |=(value_flags &l, value_flags r) {
    429 	l = l | r;
    430 	return l;
    431 }
    432 inline value_flags& operator &=(value_flags &l, value_flags r) {
    433 	l = l & r;
    434 	return l;
    435 }
    436 
    437 sb_ostream& operator << (sb_ostream &o, value &v);
    438 
    439 typedef uint32_t value_hash;
    440 
    441 typedef std::list< node * > uselist;
    442 
    443 enum constraint_kind {
    444 	CK_SAME_REG,
    445 	CK_PACKED_BS,
    446 	CK_PHI
    447 };
    448 
    449 class shader;
    450 class sb_value_pool;
    451 struct ra_chunk;
    452 class ra_constraint;
    453 
    454 class value {
    455 protected:
    456 	value(unsigned sh_id, value_kind k, sel_chan select, unsigned ver = 0)
    457 		: kind(k), flags(),
    458 			rel(), array(),
    459 			version(ver), select(select), pin_gpr(select), gpr(),
    460 			gvn_source(), ghash(),
    461 			def(), adef(), uses(), constraint(), chunk(),
    462 			literal_value(), uid(sh_id) {}
    463 
    464 	~value() { delete_uses(); }
    465 
    466 	friend class sb_value_pool;
    467 public:
    468 	value_kind kind;
    469 	value_flags flags;
    470 
    471 	vvec mdef;
    472 	vvec muse;
    473 	value *rel;
    474 	gpr_array *array;
    475 
    476 	unsigned version;
    477 
    478 	sel_chan select;
    479 	sel_chan pin_gpr;
    480 	sel_chan gpr;
    481 
    482 	value *gvn_source;
    483 	value_hash ghash;
    484 
    485 	node *def, *adef;
    486 	uselist uses;
    487 
    488 	ra_constraint *constraint;
    489 	ra_chunk *chunk;
    490 
    491 	literal literal_value;
    492 
    493 	bool is_const() { return kind == VLK_CONST || kind == VLK_UNDEF; }
    494 
    495 	bool is_AR() {
    496 		return is_special_reg() && select == sel_chan(SV_AR_INDEX, 0);
    497 	}
    498 	bool is_geometry_emit() {
    499 		return is_special_reg() && select == sel_chan(SV_GEOMETRY_EMIT, 0);
    500 	}
    501 	bool is_lds_access() {
    502 		return is_special_reg() && select == sel_chan(SV_LDS_RW, 0);
    503 	}
    504 	bool is_lds_oq() {
    505 		return is_special_reg() && (select == sel_chan(SV_LDS_OQA, 0) || select == sel_chan(SV_LDS_OQB, 0));
    506 	}
    507 
    508 	node* any_def() {
    509 		assert(!(def && adef));
    510 		return def ? def : adef;
    511 	}
    512 
    513 	value* gvalue() {
    514 		value *v = this;
    515 		while (v->gvn_source && v != v->gvn_source)
    516 			// FIXME we really shouldn't have such chains
    517 			v = v->gvn_source;
    518 		return v;
    519 	}
    520 
    521 	bool is_float_0_or_1() {
    522 		value *v = gvalue();
    523 		return v->is_const() && (v->literal_value == literal(0)
    524 						|| v->literal_value == literal(1.0f));
    525 	}
    526 
    527 	bool is_undef() { return gvalue()->kind == VLK_UNDEF; }
    528 
    529 	bool is_any_gpr() {
    530 		return (kind == VLK_REG || kind == VLK_TEMP);
    531 	}
    532 
    533 	bool is_agpr() {
    534 		return array && is_any_gpr();
    535 	}
    536 
    537 	// scalar gpr, as opposed to element of gpr array
    538 	bool is_sgpr() {
    539 		return !array && is_any_gpr();
    540 	}
    541 
    542 	bool is_special_reg() {	return kind == VLK_SPECIAL_REG;	}
    543 	bool is_any_reg() { return is_any_gpr() || is_special_reg(); }
    544 	bool is_kcache() { return kind == VLK_KCACHE; }
    545 	bool is_rel() {	return kind == VLK_REL_REG; }
    546 	bool is_readonly() { return flags & VLF_READONLY; }
    547 
    548 	bool is_chan_pinned() { return flags & VLF_PIN_CHAN; }
    549 	bool is_reg_pinned() { return flags & VLF_PIN_REG; }
    550 
    551 	bool is_global();
    552 	void set_global();
    553 	void set_prealloc();
    554 
    555 	bool is_prealloc();
    556 
    557 	bool is_fixed();
    558 	void fix();
    559 
    560 	bool is_dead() { return flags & VLF_DEAD; }
    561 
    562 	literal & get_const_value() {
    563 		value *v = gvalue();
    564 		assert(v->is_const());
    565 		return v->literal_value;
    566 	}
    567 
    568 	// true if needs to be encoded as literal in alu
    569 	bool is_literal() {
    570 		return is_const()
    571 				&& literal_value != literal(0)
    572 				&& literal_value != literal(1)
    573 				&& literal_value != literal(-1)
    574 				&& literal_value != literal(0.5)
    575 				&& literal_value != literal(1.0);
    576 	}
    577 
    578 	void add_use(node *n);
    579 	void remove_use(const node *n);
    580 
    581 	value_hash hash();
    582 	value_hash rel_hash();
    583 
    584 	void assign_source(value *v) {
    585 		assert(!gvn_source || gvn_source == this);
    586 		gvn_source = v->gvalue();
    587 	}
    588 
    589 	bool v_equal(value *v) { return gvalue() == v->gvalue(); }
    590 
    591 	unsigned use_count();
    592 	void delete_uses();
    593 
    594 	sel_chan get_final_gpr() {
    595 		if (array && array->gpr) {
    596 			int reg_offset = select.sel() - array->base_gpr.sel();
    597 			if (rel && rel->is_const())
    598 				reg_offset += rel->get_const_value().i;
    599 			return array->gpr + (reg_offset << 2);
    600 		} else {
    601 			return gpr;
    602 		}
    603 	}
    604 
    605 	unsigned get_final_chan() {
    606 		if (array) {
    607 			assert(array->gpr);
    608 			return array->gpr.chan();
    609 		} else {
    610 			assert(gpr);
    611 			return gpr.chan();
    612 		}
    613 	}
    614 
    615 	val_set interferences;
    616 	unsigned uid;
    617 };
    618 
    619 class expr_handler;
    620 
    621 class value_table {
    622 	typedef std::vector<value*> vt_item;
    623 	typedef std::vector<vt_item> vt_table;
    624 
    625 	expr_handler &ex;
    626 
    627 	unsigned size_bits;
    628 	unsigned size;
    629 	unsigned size_mask;
    630 
    631 	vt_table hashtable;
    632 
    633 	unsigned cnt;
    634 
    635 public:
    636 
    637 	value_table(expr_handler &ex, unsigned size_bits = 10)
    638 		: ex(ex), size_bits(size_bits), size(1u << size_bits),
    639 		  size_mask(size - 1), hashtable(size), cnt() {}
    640 
    641 	~value_table() {}
    642 
    643 	void add_value(value* v);
    644 
    645 	bool expr_equal(value* l, value* r);
    646 
    647 	unsigned count() { return cnt; }
    648 
    649 	void get_values(vvec & v);
    650 };
    651 
    652 class sb_context;
    653 
    654 enum node_type {
    655 	NT_UNKNOWN,
    656 	NT_LIST,
    657 	NT_OP,
    658 	NT_REGION,
    659 	NT_REPEAT,
    660 	NT_DEPART,
    661 	NT_IF,
    662 };
    663 
    664 enum node_subtype {
    665 	NST_UNKNOWN,
    666 	NST_LIST,
    667 	NST_ALU_GROUP,
    668 	NST_ALU_CLAUSE,
    669 	NST_ALU_INST,
    670 	NST_ALU_PACKED_INST,
    671 	NST_CF_INST,
    672 	NST_FETCH_INST,
    673 	NST_TEX_CLAUSE,
    674 	NST_VTX_CLAUSE,
    675 	NST_GDS_CLAUSE,
    676 
    677 	NST_BB,
    678 
    679 	NST_PHI,
    680 	NST_PSI,
    681 	NST_COPY,
    682 
    683 	NST_LOOP_PHI_CONTAINER,
    684 	NST_LOOP_CONTINUE,
    685 	NST_LOOP_BREAK
    686 };
    687 
    688 enum node_flags {
    689 	NF_EMPTY = 0,
    690 	NF_DEAD = (1 << 0),
    691 	NF_REG_CONSTRAINT = (1 << 1),
    692 	NF_CHAN_CONSTRAINT = (1 << 2),
    693 	NF_ALU_4SLOT = (1 << 3),
    694 	NF_CONTAINER = (1 << 4),
    695 
    696 	NF_COPY_MOV = (1 << 5),
    697 
    698 	NF_DONT_KILL = (1 << 6),
    699 	NF_DONT_HOIST = (1 << 7),
    700 	NF_DONT_MOVE = (1 << 8),
    701 
    702 	// for KILLxx - we want to schedule them as early as possible
    703 	NF_SCHEDULE_EARLY = (1 << 9),
    704 
    705 	// for ALU_PUSH_BEFORE - when set, replace with PUSH + ALU
    706 	NF_ALU_STACK_WORKAROUND = (1 << 10)
    707 };
    708 
    709 inline node_flags operator |(node_flags l, node_flags r) {
    710 	return (node_flags)((unsigned)l|(unsigned)r);
    711 }
    712 inline node_flags& operator |=(node_flags &l, node_flags r) {
    713 	l = l | r;
    714 	return l;
    715 }
    716 
    717 inline node_flags& operator &=(node_flags &l, node_flags r) {
    718 	l = (node_flags)((unsigned)l & (unsigned)r);
    719 	return l;
    720 }
    721 
    722 inline node_flags operator ~(node_flags r) {
    723 	return (node_flags)~(unsigned)r;
    724 }
    725 
    726 struct node_stats {
    727 	unsigned alu_count;
    728 	unsigned alu_kill_count;
    729 	unsigned alu_copy_mov_count;
    730 	unsigned cf_count;
    731 	unsigned fetch_count;
    732 	unsigned region_count;
    733 	unsigned loop_count;
    734 	unsigned phi_count;
    735 	unsigned loop_phi_count;
    736 	unsigned depart_count;
    737 	unsigned repeat_count;
    738 	unsigned if_count;
    739        bool uses_ar;
    740 
    741 	node_stats() : alu_count(), alu_kill_count(), alu_copy_mov_count(),
    742 			cf_count(), fetch_count(), region_count(),
    743 			loop_count(), phi_count(), loop_phi_count(), depart_count(),
    744                        repeat_count(), if_count(), uses_ar(false) {}
    745 
    746 	void dump();
    747 };
    748 
    749 class shader;
    750 
    751 class vpass;
    752 
    753 class container_node;
    754 class region_node;
    755 
    756 class node {
    757 
    758 protected:
    759 	node(node_type nt, node_subtype nst, node_flags flags = NF_EMPTY)
    760 	: prev(), next(), parent(),
    761 	  type(nt), subtype(nst), flags(flags),
    762 	  pred(), dst(), src() {}
    763 
    764 	virtual ~node() {};
    765 
    766 public:
    767 	node *prev, *next;
    768 	container_node *parent;
    769 
    770 	node_type type;
    771 	node_subtype subtype;
    772 	node_flags flags;
    773 
    774 	value *pred;
    775 
    776 	vvec dst;
    777 	vvec src;
    778 
    779 	virtual bool is_valid() { return true; }
    780 	virtual bool accept(vpass &p, bool enter);
    781 
    782 	void insert_before(node *n);
    783 	void insert_after(node *n);
    784 	void replace_with(node *n);
    785 	void remove();
    786 
    787 	virtual value_hash hash() const;
    788 	value_hash hash_src() const;
    789 
    790 	virtual bool fold_dispatch(expr_handler *ex);
    791 
    792 	bool is_container() { return flags & NF_CONTAINER; }
    793 
    794 	bool is_alu_packed() { return subtype == NST_ALU_PACKED_INST; }
    795 	bool is_alu_inst() { return subtype == NST_ALU_INST; }
    796 	bool is_alu_group() { return subtype == NST_ALU_GROUP; }
    797 	bool is_alu_clause() { return subtype == NST_ALU_CLAUSE; }
    798 
    799 	bool is_fetch_clause() {
    800 		return subtype == NST_TEX_CLAUSE || subtype == NST_VTX_CLAUSE || subtype == NST_GDS_CLAUSE;
    801 	}
    802 
    803 	bool is_copy() { return subtype == NST_COPY; }
    804 	bool is_copy_mov() { return flags & NF_COPY_MOV; }
    805 	bool is_any_alu() { return is_alu_inst() || is_alu_packed() || is_copy(); }
    806 
    807 	bool is_fetch_inst() { return subtype == NST_FETCH_INST; }
    808 	bool is_cf_inst() { return subtype == NST_CF_INST; }
    809 
    810 	bool is_region() { return type == NT_REGION; }
    811 	bool is_depart() { return type == NT_DEPART; }
    812 	bool is_repeat() { return type == NT_REPEAT; }
    813 	bool is_if() { return type == NT_IF; }
    814 	bool is_bb() { return subtype == NST_BB; }
    815 
    816 	bool is_phi() { return subtype == NST_PHI; }
    817 
    818 	bool is_dead() { return flags & NF_DEAD; }
    819 
    820 	bool is_cf_op(unsigned op);
    821 	bool is_alu_op(unsigned op);
    822 	bool is_fetch_op(unsigned op);
    823 
    824 	unsigned cf_op_flags();
    825 	unsigned alu_op_flags();
    826 	unsigned alu_op_slot_flags();
    827 	unsigned fetch_op_flags();
    828 
    829 	bool is_mova();
    830 	bool is_pred_set();
    831 
    832 	bool vec_uses_ar(vvec &vv) {
    833 		for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
    834 			value *v = *I;
    835 			if (v && v->rel && !v->rel->is_const())
    836 				return true;
    837 		}
    838 		return false;
    839 	}
    840 
    841 	bool uses_ar() {
    842 		return vec_uses_ar(dst) || vec_uses_ar(src);
    843 	}
    844 
    845 	bool vec_uses_lds_oq(vvec &vv) {
    846 		for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
    847 			value *v = *I;
    848 			if (v && v->is_lds_oq())
    849 				return true;
    850 		}
    851 		return false;
    852 	}
    853 
    854 	bool consumes_lds_oq() {
    855 		return vec_uses_lds_oq(src);
    856 	}
    857 
    858 	bool produces_lds_oq() {
    859 		return vec_uses_lds_oq(dst);
    860 	}
    861 
    862 	region_node* get_parent_region();
    863 
    864 	friend class shader;
    865 };
    866 
    867 class container_node : public node {
    868 public:
    869 
    870 	container_node(node_type nt = NT_LIST, node_subtype nst = NST_LIST,
    871 	               node_flags flags = NF_EMPTY)
    872 	: node(nt, nst, flags | NF_CONTAINER), first(), last(),
    873 	  live_after(), live_before() {}
    874 
    875 	// child items list
    876 	node *first, *last;
    877 
    878 	val_set live_after;
    879 	val_set live_before;
    880 
    881 	class iterator {
    882 		node *p;
    883 	public:
    884 		iterator(node *pp = NULL) : p(pp) {}
    885 		iterator & operator ++() { p = p->next; return *this;}
    886 		iterator & operator --() { p = p->prev; return *this;}
    887 		node* operator *() { return p; }
    888 		node* operator ->() { return p; }
    889 		const iterator advance(int n) {
    890 			if (!n) return *this;
    891 			iterator I(p);
    892 			if (n > 0) while (n--) ++I;
    893 			else while (n++) --I;
    894 			return I;
    895 		}
    896 		const iterator operator +(int n) { return advance(n); }
    897 		const iterator operator -(int n) { return advance(-n); }
    898 		bool operator !=(const iterator &i) { return p != i.p; }
    899 		bool operator ==(const iterator &i) { return p == i.p; }
    900 	};
    901 
    902 	class riterator {
    903 		iterator i;
    904 	public:
    905 		riterator(node *p = NULL) : i(p) {}
    906 		riterator & operator ++() { --i; return *this;}
    907 		riterator & operator --() { ++i; return *this;}
    908 		node* operator *() { return *i; }
    909 		node* operator ->() { return *i; }
    910 		bool operator !=(const riterator &r) { return i != r.i; }
    911 		bool operator ==(const riterator &r) { return i == r.i; }
    912 	};
    913 
    914 	iterator begin() { return first; }
    915 	iterator end() { return NULL; }
    916 	riterator rbegin() { return last; }
    917 	riterator rend() { return NULL; }
    918 
    919 	bool empty() { assert(first != NULL || first == last); return !first; }
    920 	unsigned count();
    921 
    922 	// used with node containers that represent shceduling queues
    923 	// ignores copies and takes into account alu_packed_node items
    924 	unsigned real_alu_count();
    925 
    926 	void push_back(node *n);
    927 	void push_front(node *n);
    928 
    929 	void insert_node_before(node *s, node *n);
    930 	void insert_node_after(node *s, node *n);
    931 
    932 	void append_from(container_node *c);
    933 
    934 	// remove range [b..e) from some container and assign to this container
    935 	void move(iterator b, iterator e);
    936 
    937 	void expand();
    938 	void expand(container_node *n);
    939 	void remove_node(node *n);
    940 
    941 	node *cut(iterator b, iterator e);
    942 
    943 	void clear() { first = last = NULL; }
    944 
    945 	virtual bool is_valid() { return true; }
    946 	virtual bool accept(vpass &p, bool enter);
    947 	virtual bool fold_dispatch(expr_handler *ex);
    948 
    949 	node* front() { return first; }
    950 	node* back() { return last; }
    951 
    952 	void collect_stats(node_stats &s);
    953 
    954 	friend class shader;
    955 
    956 
    957 };
    958 
    959 typedef container_node::iterator node_iterator;
    960 typedef container_node::riterator node_riterator;
    961 
    962 class alu_group_node : public container_node {
    963 protected:
    964 	alu_group_node() : container_node(NT_LIST, NST_ALU_GROUP), literals() {}
    965 public:
    966 
    967 	std::vector<literal> literals;
    968 
    969 	virtual bool is_valid() { return subtype == NST_ALU_GROUP; }
    970 	virtual bool accept(vpass &p, bool enter);
    971 
    972 
    973 	unsigned literal_chan(literal l) {
    974 		std::vector<literal>::iterator F =
    975 				std::find(literals.begin(), literals.end(), l);
    976 		assert(F != literals.end());
    977 		return F - literals.begin();
    978 	}
    979 
    980 	friend class shader;
    981 };
    982 
    983 class cf_node : public container_node {
    984 protected:
    985 	cf_node() : container_node(NT_OP, NST_CF_INST), jump_target(),
    986 		jump_after_target() { memset(&bc, 0, sizeof(bc_cf)); };
    987 public:
    988 	bc_cf bc;
    989 
    990 	cf_node *jump_target;
    991 	bool jump_after_target;
    992 
    993 	virtual bool is_valid() { return subtype == NST_CF_INST; }
    994 	virtual bool accept(vpass &p, bool enter);
    995 	virtual bool fold_dispatch(expr_handler *ex);
    996 
    997 	void jump(cf_node *c) { jump_target = c; jump_after_target = false; }
    998 	void jump_after(cf_node *c) { jump_target = c; jump_after_target = true; }
    999 
   1000 	friend class shader;
   1001 };
   1002 
   1003 class alu_node : public node {
   1004 protected:
   1005 	alu_node() : node(NT_OP, NST_ALU_INST) { memset(&bc, 0, sizeof(bc_alu)); };
   1006 public:
   1007 	bc_alu bc;
   1008 
   1009 	virtual bool is_valid() { return subtype == NST_ALU_INST; }
   1010 	virtual bool accept(vpass &p, bool enter);
   1011 	virtual bool fold_dispatch(expr_handler *ex);
   1012 
   1013 	unsigned forced_bank_swizzle() {
   1014 		return ((bc.op_ptr->flags & AF_INTERP) && (bc.slot_flags == AF_4V)) ?
   1015 				VEC_210 : 0;
   1016 	}
   1017 
   1018 	// return param index + 1 if instruction references interpolation param,
   1019 	// otherwise 0
   1020 	unsigned interp_param();
   1021 
   1022 	alu_group_node *get_alu_group_node();
   1023 
   1024 	friend class shader;
   1025 };
   1026 
   1027 // for multi-slot instrs - DOT/INTERP/... (maybe useful for 64bit pairs later)
   1028 class alu_packed_node : public container_node {
   1029 protected:
   1030 	alu_packed_node() : container_node(NT_OP, NST_ALU_PACKED_INST) {}
   1031 public:
   1032 
   1033 	const alu_op_info* op_ptr() {
   1034 		return static_cast<alu_node*>(first)->bc.op_ptr;
   1035 	}
   1036 	unsigned op() { return static_cast<alu_node*>(first)->bc.op; }
   1037 	void init_args(bool repl);
   1038 
   1039 	virtual bool is_valid() { return subtype == NST_ALU_PACKED_INST; }
   1040 	virtual bool accept(vpass &p, bool enter);
   1041 	virtual bool fold_dispatch(expr_handler *ex);
   1042 
   1043 	unsigned get_slot_mask();
   1044 	void update_packed_items(sb_context &ctx);
   1045 
   1046 	friend class shader;
   1047 };
   1048 
   1049 class fetch_node : public node {
   1050 protected:
   1051 	fetch_node() : node(NT_OP, NST_FETCH_INST) { memset(&bc, 0, sizeof(bc_fetch)); };
   1052 public:
   1053 	bc_fetch bc;
   1054 
   1055 	virtual bool is_valid() { return subtype == NST_FETCH_INST; }
   1056 	virtual bool accept(vpass &p, bool enter);
   1057 	virtual bool fold_dispatch(expr_handler *ex);
   1058 
   1059 	bool uses_grad() { return bc.op_ptr->flags & FF_USEGRAD; }
   1060 
   1061 	friend class shader;
   1062 };
   1063 
   1064 class region_node;
   1065 
   1066 class repeat_node : public container_node {
   1067 protected:
   1068 	repeat_node(region_node *target, unsigned id)
   1069 	: container_node(NT_REPEAT, NST_LIST), target(target), rep_id(id) {}
   1070 public:
   1071 	region_node *target;
   1072 	unsigned rep_id;
   1073 
   1074 	virtual bool accept(vpass &p, bool enter);
   1075 
   1076 	friend class shader;
   1077 };
   1078 
   1079 class depart_node : public container_node {
   1080 protected:
   1081 	depart_node(region_node *target, unsigned id)
   1082 	: container_node(NT_DEPART, NST_LIST), target(target), dep_id(id) {}
   1083 public:
   1084 	region_node *target;
   1085 	unsigned dep_id;
   1086 
   1087 	virtual bool accept(vpass &p, bool enter);
   1088 
   1089 	friend class shader;
   1090 };
   1091 
   1092 class if_node : public container_node {
   1093 protected:
   1094 	if_node() : container_node(NT_IF, NST_LIST), cond() {};
   1095 public:
   1096 	value *cond; // glued to pseudo output (dst[2]) of the PRED_SETxxx
   1097 
   1098 	virtual bool accept(vpass &p, bool enter);
   1099 
   1100 	friend class shader;
   1101 };
   1102 
   1103 typedef std::vector<depart_node*> depart_vec;
   1104 typedef std::vector<repeat_node*> repeat_vec;
   1105 
   1106 class region_node : public container_node {
   1107 protected:
   1108 	region_node(unsigned id) : container_node(NT_REGION, NST_LIST), region_id(id),
   1109 			loop_phi(), phi(), vars_defined(), departs(), repeats(), src_loop()
   1110 			{}
   1111 public:
   1112 	unsigned region_id;
   1113 
   1114 	container_node *loop_phi;
   1115 	container_node *phi;
   1116 
   1117 	val_set vars_defined;
   1118 
   1119 	depart_vec departs;
   1120 	repeat_vec repeats;
   1121 
   1122 	// true if region was created for loop in the parser, sometimes repeat_node
   1123 	// may be optimized away so we need to remember this information
   1124 	bool src_loop;
   1125 
   1126 	virtual bool accept(vpass &p, bool enter);
   1127 
   1128 	unsigned dep_count() { return departs.size(); }
   1129 	unsigned rep_count() { return repeats.size() + 1; }
   1130 
   1131 	bool is_loop() { return src_loop || !repeats.empty(); }
   1132 
   1133 	container_node* get_entry_code_location() {
   1134 		node *p = first;
   1135 		while (p && (p->is_depart() || p->is_repeat()))
   1136 			p = static_cast<container_node*>(p)->first;
   1137 
   1138 		container_node *c = static_cast<container_node*>(p);
   1139 		if (c->is_bb())
   1140 			return c;
   1141 		else
   1142 			return c->parent;
   1143 	}
   1144 
   1145 	void expand_depart(depart_node *d);
   1146 	void expand_repeat(repeat_node *r);
   1147 
   1148 	friend class shader;
   1149 };
   1150 
   1151 class bb_node : public container_node {
   1152 protected:
   1153 	bb_node(unsigned id, unsigned loop_level)
   1154 		: container_node(NT_LIST, NST_BB), id(id), loop_level(loop_level) {}
   1155 public:
   1156 	unsigned id;
   1157 	unsigned loop_level;
   1158 
   1159 	virtual bool accept(vpass &p, bool enter);
   1160 
   1161 	friend class shader;
   1162 };
   1163 
   1164 
   1165 typedef std::vector<region_node*> regions_vec;
   1166 typedef std::vector<bb_node*> bbs_vec;
   1167 typedef std::list<node*> sched_queue;
   1168 typedef sched_queue::iterator sq_iterator;
   1169 typedef std::vector<node*> node_vec;
   1170 typedef std::list<node*> node_list;
   1171 typedef std::set<node*> node_set;
   1172 
   1173 
   1174 
   1175 } // namespace r600_sb
   1176 
   1177 #endif /* R600_SB_IR_H_ */
   1178