Home | History | Annotate | Download | only in sb
      1 /*
      2  * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *      Vadim Girlin
     25  */
     26 
     27 #ifndef R600_SB_IR_H_
     28 #define R600_SB_IR_H_
     29 
     30 #include <algorithm>
     31 #include <stdint.h>
     32 #include <vector>
     33 #include <set>
     34 #include <algorithm>
     35 
     36 #include "sb_bc.h"
     37 
     38 namespace r600_sb {
     39 
     40 enum special_regs {
     41 	SV_ALU_PRED = 128,
     42 	SV_EXEC_MASK,
     43 	SV_AR_INDEX,
     44 	SV_VALID_MASK,
     45 	SV_GEOMETRY_EMIT
     46 };
     47 
     48 class node;
     49 class value;
     50 class shader;
     51 
     52 struct sel_chan
     53 {
     54 	unsigned id;
     55 
     56 	sel_chan(unsigned id = 0) : id(id) {}
     57 	sel_chan(unsigned sel, unsigned chan) : id(((sel << 2) | chan) + 1) {}
     58 
     59 	unsigned sel() const { return sel(id); }
     60 	unsigned chan() const {return chan(id); }
     61 	operator unsigned() const {return id;}
     62 
     63 	static unsigned sel(unsigned idx) { return (idx-1) >> 2; }
     64 	static unsigned chan(unsigned idx) { return (idx-1) & 3; }
     65 
     66 	sel_chan(unsigned bank, unsigned index,
     67 			 unsigned chan, alu_kcache_index_mode index_mode)
     68 		: id(sel_chan((bank << 12) | index | ((unsigned)index_mode << 28), chan).id) {}
     69 	unsigned kcache_index_mode() const { return sel() >> 28; }
     70 	unsigned kcache_sel() const { return sel() & 0x0fffffffu; }
     71 	unsigned kcache_bank() const { return kcache_sel() >> 12; }
     72 };
     73 
     74 inline sb_ostream& operator <<(sb_ostream& o, sel_chan r) {
     75 	static const char * ch = "xyzw";
     76 	o << r.sel() << "." << ch[r.chan()];
     77 	return o;
     78 }
     79 
     80 typedef std::vector<value*>  vvec;
     81 
     82 class sb_pool {
     83 protected:
     84 	static const unsigned SB_POOL_ALIGN = 8;
     85 	static const unsigned SB_POOL_DEFAULT_BLOCK_SIZE = (1 << 16);
     86 
     87 	typedef std::vector<void*> block_vector;
     88 
     89 	unsigned block_size;
     90 	block_vector blocks;
     91 	unsigned total_size;
     92 
     93 public:
     94 	sb_pool(unsigned block_size = SB_POOL_DEFAULT_BLOCK_SIZE)
     95 		: block_size(block_size), blocks(), total_size() {}
     96 
     97 	virtual ~sb_pool() { free_all(); }
     98 
     99 	void* allocate(unsigned sz);
    100 
    101 protected:
    102 	void free_all();
    103 };
    104 
    105 template <typename V, typename Comp = std::less<V> >
    106 class sb_set {
    107 	typedef std::vector<V> data_vector;
    108 	data_vector vec;
    109 public:
    110 
    111 	typedef typename data_vector::iterator iterator;
    112 	typedef typename data_vector::const_iterator const_iterator;
    113 
    114 	sb_set() : vec() {}
    115 	~sb_set() {  }
    116 
    117 	iterator begin() { return vec.begin(); }
    118 	iterator end() { return vec.end(); }
    119 	const_iterator begin() const { return vec.begin(); }
    120 	const_iterator end() const { return vec.end(); }
    121 
    122 	void add_set(const sb_set& s) {
    123 		data_vector t;
    124 		t.reserve(vec.size() + s.vec.size());
    125 		std::set_union(vec.begin(), vec.end(), s.vec.begin(), s.vec.end(),
    126 		          std::inserter(t, t.begin()), Comp());
    127 		vec.swap(t);
    128 	}
    129 
    130 	iterator lower_bound(const V& v) {
    131 		return std::lower_bound(vec.begin(), vec.end(), v, Comp());
    132 	}
    133 
    134 	std::pair<iterator, bool> insert(const V& v) {
    135 		iterator P = lower_bound(v);
    136 		if (P != vec.end() && is_equal(*P, v))
    137 			return std::make_pair(P, false);
    138 		return std::make_pair(vec.insert(P, v), true);
    139 	}
    140 
    141 	unsigned erase(const V&  v) {
    142 		iterator P = lower_bound(v);
    143 		if (P == vec.end() || !is_equal(*P, v))
    144 			return 0;
    145 		vec.erase(P);
    146 		return 1;
    147 	}
    148 
    149 	void clear() { vec.clear(); }
    150 
    151 	bool empty() { return vec.empty(); }
    152 
    153 	bool is_equal(const V& v1, const V& v2) {
    154 		return !Comp()(v1, v2) && !Comp()(v2, v1);
    155 	}
    156 
    157 	iterator find(const V& v) {
    158 		iterator P = lower_bound(v);
    159 		return (P != vec.end() && is_equal(*P, v)) ? P : vec.end();
    160 	}
    161 
    162 	unsigned size() { return vec.size(); }
    163 	void erase(iterator I) { vec.erase(I); }
    164 };
    165 
    166 template <typename K, typename V, typename KComp = std::less<K> >
    167 class sb_map {
    168 	typedef std::pair<K, V> datatype;
    169 
    170 	struct Comp {
    171 		bool operator()(const datatype &v1, const datatype &v2) {
    172 			return KComp()(v1.first, v2.first);
    173 		}
    174 	};
    175 
    176 	typedef sb_set<datatype, Comp> dataset;
    177 
    178 	dataset set;
    179 
    180 public:
    181 
    182 	sb_map() : set() {}
    183 
    184 	typedef typename dataset::iterator iterator;
    185 
    186 	iterator begin() { return set.begin(); }
    187 	iterator end() { return set.end(); }
    188 
    189 	void clear() { set.clear(); }
    190 
    191 	V& operator[](const K& key) {
    192 		datatype P = std::make_pair(key, V());
    193 		iterator F = set.find(P);
    194 		if (F == set.end()) {
    195 			return (*(set.insert(P).first)).second;
    196 		} else {
    197 			return (*F).second;
    198 		}
    199 	}
    200 
    201 	std::pair<iterator, bool> insert(const datatype& d) {
    202 		return set.insert(d);
    203 	}
    204 
    205 	iterator find(const K& key) {
    206 		return set.find(std::make_pair(key, V()));
    207 	}
    208 
    209 	unsigned erase(const K& key) {
    210 		return set.erase(std::make_pair(key, V()));
    211 	}
    212 
    213 	void erase(iterator I) {
    214 		set.erase(I);
    215 	}
    216 };
    217 
    218 class sb_bitset {
    219 	typedef uint32_t basetype;
    220 	static const unsigned bt_bits = sizeof(basetype) << 3;
    221 	std::vector<basetype> data;
    222 	unsigned bit_size;
    223 
    224 public:
    225 
    226 	sb_bitset() : data(), bit_size() {}
    227 
    228 	bool get(unsigned id);
    229 	void set(unsigned id, bool bit = true);
    230 	bool set_chk(unsigned id, bool bit = true);
    231 
    232 	void clear();
    233 	void resize(unsigned size);
    234 
    235 	unsigned size() { return bit_size; }
    236 
    237 	unsigned find_bit(unsigned start = 0);
    238 
    239 	void swap(sb_bitset & bs2);
    240 
    241 	bool operator==(const sb_bitset &bs2);
    242 	bool operator!=(const sb_bitset &bs2) { return !(*this == bs2); }
    243 
    244 	sb_bitset& operator|=(const sb_bitset &bs2) {
    245 		if (bit_size < bs2.bit_size) {
    246 			resize(bs2.bit_size);
    247 		}
    248 
    249 		for (unsigned i = 0, c = std::min(data.size(), bs2.data.size()); i < c;
    250 				++i) {
    251 			data[i] |= bs2.data[i];
    252 		}
    253 		return *this;
    254 	}
    255 
    256 	sb_bitset& operator&=(const sb_bitset &bs2);
    257 	sb_bitset& mask(const sb_bitset &bs2);
    258 
    259 	friend sb_bitset operator|(const sb_bitset &b1, const sb_bitset &b2) {
    260 			sb_bitset nbs(b1);
    261 			nbs |= b2;
    262 			return nbs;
    263 	}
    264 };
    265 
    266 enum value_kind {
    267 	VLK_REG,
    268 	VLK_REL_REG,
    269 	VLK_SPECIAL_REG,
    270 	VLK_TEMP,
    271 
    272 	VLK_CONST,
    273 	VLK_KCACHE,
    274 	VLK_PARAM,
    275 	VLK_SPECIAL_CONST,
    276 
    277 	VLK_UNDEF
    278 };
    279 
    280 
    281 
    282 class sb_value_pool : protected sb_pool {
    283 	unsigned aligned_elt_size;
    284 
    285 public:
    286 	sb_value_pool(unsigned elt_size, unsigned block_elts = 256)
    287 		: sb_pool(block_elts * (aligned_elt_size = ((elt_size +
    288 				SB_POOL_ALIGN - 1) & ~(SB_POOL_ALIGN - 1)))) {}
    289 
    290 	virtual ~sb_value_pool() { delete_all(); }
    291 
    292 	value* create(value_kind k, sel_chan regid, unsigned ver);
    293 
    294 	value* operator[](unsigned id) {
    295 		unsigned offset = id * aligned_elt_size;
    296 		unsigned block_id;
    297 		if (offset < block_size) {
    298 			block_id = 0;
    299 		} else {
    300 			block_id = offset / block_size;
    301 			offset = offset % block_size;
    302 		}
    303 		return (value*)((char*)blocks[block_id] + offset);
    304 	}
    305 
    306 	unsigned size() { return total_size / aligned_elt_size; }
    307 
    308 protected:
    309 	void delete_all();
    310 };
    311 
    312 
    313 
    314 
    315 
    316 class sb_value_set {
    317 
    318 	sb_bitset bs;
    319 
    320 public:
    321 	sb_value_set() : bs() {}
    322 
    323 	class iterator {
    324 		sb_value_pool &vp;
    325 		sb_value_set *s;
    326 		unsigned nb;
    327 	public:
    328 		iterator(shader &sh, sb_value_set *s, unsigned nb = 0);
    329 
    330 
    331 		iterator& operator++() {
    332 			if (nb + 1 < s->bs.size())
    333 				nb = s->bs.find_bit(nb + 1);
    334 			else
    335 				nb = s->bs.size();
    336 			return *this;
    337 		}
    338 		bool operator !=(const iterator &i) {
    339 			return s != i.s || nb != i.nb;
    340 		}
    341 		bool operator ==(const iterator &i) { return !(*this != i); }
    342 		value* operator *() {
    343 			 return vp[nb];
    344 		}
    345 
    346 
    347 	};
    348 
    349 	iterator begin(shader &sh) {
    350 		return iterator(sh, this, bs.size() ? bs.find_bit(0) : 0);
    351 	}
    352 	iterator end(shader &sh) { return iterator(sh, this, bs.size()); }
    353 
    354 	bool add_set_checked(sb_value_set & s2);
    355 
    356 	void add_set(sb_value_set & s2)  {
    357 		if (bs.size() < s2.bs.size())
    358 			bs.resize(s2.bs.size());
    359 		bs |= s2.bs;
    360 	}
    361 
    362 	void remove_set(sb_value_set & s2);
    363 
    364 	bool add_vec(vvec &vv);
    365 
    366 	bool add_val(value *v);
    367 	bool contains(value *v);
    368 
    369 	bool remove_val(value *v);
    370 
    371 	bool remove_vec(vvec &vv);
    372 
    373 	void clear();
    374 
    375 	bool empty();
    376 };
    377 
    378 typedef sb_value_set val_set;
    379 
    380 struct gpr_array {
    381 	sel_chan base_gpr; // original gpr
    382 	sel_chan gpr; // assigned by regalloc
    383 	unsigned array_size;
    384 
    385 	gpr_array(sel_chan base_gpr, unsigned array_size) : base_gpr(base_gpr),
    386 			array_size(array_size) {}
    387 
    388 	unsigned hash() { return (base_gpr << 10) * array_size; }
    389 
    390 	val_set interferences;
    391 	vvec refs;
    392 
    393 	bool is_dead();
    394 
    395 };
    396 
    397 typedef std::vector<gpr_array*> regarray_vec;
    398 
    399 enum value_flags {
    400 	VLF_UNDEF = (1 << 0),
    401 	VLF_READONLY = (1 << 1),
    402 	VLF_DEAD = (1 << 2),
    403 
    404 	VLF_PIN_REG = (1 << 3),
    405 	VLF_PIN_CHAN = (1 << 4),
    406 
    407 	// opposite to alu clause local value - goes through alu clause boundary
    408 	// (can't use temp gpr, can't recolor in the alu scheduler, etc)
    409 	VLF_GLOBAL = (1 << 5),
    410 	VLF_FIXED = (1 << 6),
    411 	VLF_PVPS = (1 << 7),
    412 
    413 	VLF_PREALLOC = (1 << 8)
    414 };
    415 
    416 inline value_flags operator |(value_flags l, value_flags r) {
    417 	return (value_flags)((unsigned)l|(unsigned)r);
    418 }
    419 inline value_flags operator &(value_flags l, value_flags r) {
    420 	return (value_flags)((unsigned)l&(unsigned)r);
    421 }
    422 inline value_flags operator ~(value_flags l) {
    423 	return (value_flags)(~(unsigned)l);
    424 }
    425 inline value_flags& operator |=(value_flags &l, value_flags r) {
    426 	l = l | r;
    427 	return l;
    428 }
    429 inline value_flags& operator &=(value_flags &l, value_flags r) {
    430 	l = l & r;
    431 	return l;
    432 }
    433 
    434 sb_ostream& operator << (sb_ostream &o, value &v);
    435 
    436 typedef uint32_t value_hash;
    437 
    438 enum use_kind {
    439 	UK_SRC,
    440 	UK_SRC_REL,
    441 	UK_DST_REL,
    442 	UK_MAYDEF,
    443 	UK_MAYUSE,
    444 	UK_PRED,
    445 	UK_COND
    446 };
    447 
    448 struct use_info {
    449 	node *op;
    450 	use_kind kind;
    451 	int arg;
    452 
    453 	use_info(node *n, use_kind kind, int arg)
    454 		: op(n), kind(kind), arg(arg) {}
    455 };
    456 
    457 typedef std::list< use_info * > uselist;
    458 
    459 enum constraint_kind {
    460 	CK_SAME_REG,
    461 	CK_PACKED_BS,
    462 	CK_PHI
    463 };
    464 
    465 class shader;
    466 class sb_value_pool;
    467 struct ra_chunk;
    468 class ra_constraint;
    469 
    470 class value {
    471 protected:
    472 	value(unsigned sh_id, value_kind k, sel_chan select, unsigned ver = 0)
    473 		: kind(k), flags(),
    474 			rel(), array(),
    475 			version(ver), select(select), pin_gpr(select), gpr(),
    476 			gvn_source(), ghash(),
    477 			def(), adef(), uses(), constraint(), chunk(),
    478 			literal_value(), uid(sh_id) {}
    479 
    480 	~value() { delete_uses(); }
    481 
    482 	friend class sb_value_pool;
    483 public:
    484 	value_kind kind;
    485 	value_flags flags;
    486 
    487 	vvec mdef;
    488 	vvec muse;
    489 	value *rel;
    490 	gpr_array *array;
    491 
    492 	unsigned version;
    493 
    494 	sel_chan select;
    495 	sel_chan pin_gpr;
    496 	sel_chan gpr;
    497 
    498 	value *gvn_source;
    499 	value_hash ghash;
    500 
    501 	node *def, *adef;
    502 	uselist uses;
    503 
    504 	ra_constraint *constraint;
    505 	ra_chunk *chunk;
    506 
    507 	literal literal_value;
    508 
    509 	bool is_const() { return kind == VLK_CONST || kind == VLK_UNDEF; }
    510 
    511 	bool is_AR() {
    512 		return is_special_reg() && select == sel_chan(SV_AR_INDEX, 0);
    513 	}
    514 	bool is_geometry_emit() {
    515 		return is_special_reg() && select == sel_chan(SV_GEOMETRY_EMIT, 0);
    516 	}
    517 
    518 	node* any_def() {
    519 		assert(!(def && adef));
    520 		return def ? def : adef;
    521 	}
    522 
    523 	value* gvalue() {
    524 		value *v = this;
    525 		while (v->gvn_source && v != v->gvn_source)
    526 			// FIXME we really shouldn't have such chains
    527 			v = v->gvn_source;
    528 		return v;
    529 	}
    530 
    531 	bool is_float_0_or_1() {
    532 		value *v = gvalue();
    533 		return v->is_const() && (v->literal_value == literal(0)
    534 						|| v->literal_value == literal(1.0f));
    535 	}
    536 
    537 	bool is_undef() { return gvalue()->kind == VLK_UNDEF; }
    538 
    539 	bool is_any_gpr() {
    540 		return (kind == VLK_REG || kind == VLK_TEMP);
    541 	}
    542 
    543 	bool is_agpr() {
    544 		return array && is_any_gpr();
    545 	}
    546 
    547 	// scalar gpr, as opposed to element of gpr array
    548 	bool is_sgpr() {
    549 		return !array && is_any_gpr();
    550 	}
    551 
    552 	bool is_special_reg() {	return kind == VLK_SPECIAL_REG;	}
    553 	bool is_any_reg() { return is_any_gpr() || is_special_reg(); }
    554 	bool is_kcache() { return kind == VLK_KCACHE; }
    555 	bool is_rel() {	return kind == VLK_REL_REG; }
    556 	bool is_readonly() { return flags & VLF_READONLY; }
    557 
    558 	bool is_chan_pinned() { return flags & VLF_PIN_CHAN; }
    559 	bool is_reg_pinned() { return flags & VLF_PIN_REG; }
    560 
    561 	bool is_global();
    562 	void set_global();
    563 	void set_prealloc();
    564 
    565 	bool is_prealloc();
    566 
    567 	bool is_fixed();
    568 	void fix();
    569 
    570 	bool is_dead() { return flags & VLF_DEAD; }
    571 
    572 	literal & get_const_value() {
    573 		value *v = gvalue();
    574 		assert(v->is_const());
    575 		return v->literal_value;
    576 	}
    577 
    578 	// true if needs to be encoded as literal in alu
    579 	bool is_literal() {
    580 		return is_const()
    581 				&& literal_value != literal(0)
    582 				&& literal_value != literal(1)
    583 				&& literal_value != literal(-1)
    584 				&& literal_value != literal(0.5)
    585 				&& literal_value != literal(1.0);
    586 	}
    587 
    588 	void add_use(node *n, use_kind kind, int arg);
    589 	void remove_use(const node *n);
    590 
    591 	value_hash hash();
    592 	value_hash rel_hash();
    593 
    594 	void assign_source(value *v) {
    595 		assert(!gvn_source || gvn_source == this);
    596 		gvn_source = v->gvalue();
    597 	}
    598 
    599 	bool v_equal(value *v) { return gvalue() == v->gvalue(); }
    600 
    601 	unsigned use_count();
    602 	void delete_uses();
    603 
    604 	sel_chan get_final_gpr() {
    605 		if (array && array->gpr) {
    606 			int reg_offset = select.sel() - array->base_gpr.sel();
    607 			if (rel && rel->is_const())
    608 				reg_offset += rel->get_const_value().i;
    609 			return array->gpr + (reg_offset << 2);
    610 		} else {
    611 			return gpr;
    612 		}
    613 	}
    614 
    615 	unsigned get_final_chan() {
    616 		if (array) {
    617 			assert(array->gpr);
    618 			return array->gpr.chan();
    619 		} else {
    620 			assert(gpr);
    621 			return gpr.chan();
    622 		}
    623 	}
    624 
    625 	val_set interferences;
    626 	unsigned uid;
    627 };
    628 
    629 class expr_handler;
    630 
    631 class value_table {
    632 	typedef std::vector<value*> vt_item;
    633 	typedef std::vector<vt_item> vt_table;
    634 
    635 	expr_handler &ex;
    636 
    637 	unsigned size_bits;
    638 	unsigned size;
    639 	unsigned size_mask;
    640 
    641 	vt_table hashtable;
    642 
    643 	unsigned cnt;
    644 
    645 public:
    646 
    647 	value_table(expr_handler &ex, unsigned size_bits = 10)
    648 		: ex(ex), size_bits(size_bits), size(1u << size_bits),
    649 		  size_mask(size - 1), hashtable(size), cnt() {}
    650 
    651 	~value_table() {}
    652 
    653 	void add_value(value* v);
    654 
    655 	bool expr_equal(value* l, value* r);
    656 
    657 	unsigned count() { return cnt; }
    658 
    659 	void get_values(vvec & v);
    660 };
    661 
    662 class sb_context;
    663 
    664 enum node_type {
    665 	NT_UNKNOWN,
    666 	NT_LIST,
    667 	NT_OP,
    668 	NT_REGION,
    669 	NT_REPEAT,
    670 	NT_DEPART,
    671 	NT_IF,
    672 };
    673 
    674 enum node_subtype {
    675 	NST_UNKNOWN,
    676 	NST_LIST,
    677 	NST_ALU_GROUP,
    678 	NST_ALU_CLAUSE,
    679 	NST_ALU_INST,
    680 	NST_ALU_PACKED_INST,
    681 	NST_CF_INST,
    682 	NST_FETCH_INST,
    683 	NST_TEX_CLAUSE,
    684 	NST_VTX_CLAUSE,
    685 
    686 	NST_BB,
    687 
    688 	NST_PHI,
    689 	NST_PSI,
    690 	NST_COPY,
    691 
    692 	NST_LOOP_PHI_CONTAINER,
    693 	NST_LOOP_CONTINUE,
    694 	NST_LOOP_BREAK
    695 };
    696 
    697 enum node_flags {
    698 	NF_EMPTY = 0,
    699 	NF_DEAD = (1 << 0),
    700 	NF_REG_CONSTRAINT = (1 << 1),
    701 	NF_CHAN_CONSTRAINT = (1 << 2),
    702 	NF_ALU_4SLOT = (1 << 3),
    703 	NF_CONTAINER = (1 << 4),
    704 
    705 	NF_COPY_MOV = (1 << 5),
    706 
    707 	NF_DONT_KILL = (1 << 6),
    708 	NF_DONT_HOIST = (1 << 7),
    709 	NF_DONT_MOVE = (1 << 8),
    710 
    711 	// for KILLxx - we want to schedule them as early as possible
    712 	NF_SCHEDULE_EARLY = (1 << 9),
    713 
    714 	// for ALU_PUSH_BEFORE - when set, replace with PUSH + ALU
    715 	NF_ALU_STACK_WORKAROUND = (1 << 10)
    716 };
    717 
    718 inline node_flags operator |(node_flags l, node_flags r) {
    719 	return (node_flags)((unsigned)l|(unsigned)r);
    720 }
    721 inline node_flags& operator |=(node_flags &l, node_flags r) {
    722 	l = l | r;
    723 	return l;
    724 }
    725 
    726 inline node_flags& operator &=(node_flags &l, node_flags r) {
    727 	l = (node_flags)((unsigned)l & (unsigned)r);
    728 	return l;
    729 }
    730 
    731 inline node_flags operator ~(node_flags r) {
    732 	return (node_flags)~(unsigned)r;
    733 }
    734 
    735 struct node_stats {
    736 	unsigned alu_count;
    737 	unsigned alu_kill_count;
    738 	unsigned alu_copy_mov_count;
    739 	unsigned cf_count;
    740 	unsigned fetch_count;
    741 	unsigned region_count;
    742 	unsigned loop_count;
    743 	unsigned phi_count;
    744 	unsigned loop_phi_count;
    745 	unsigned depart_count;
    746 	unsigned repeat_count;
    747 	unsigned if_count;
    748 
    749 	node_stats() : alu_count(), alu_kill_count(), alu_copy_mov_count(),
    750 			cf_count(), fetch_count(), region_count(),
    751 			loop_count(), phi_count(), loop_phi_count(), depart_count(),
    752 			repeat_count(), if_count() {}
    753 
    754 	void dump();
    755 };
    756 
    757 class shader;
    758 
    759 class vpass;
    760 
    761 class container_node;
    762 class region_node;
    763 
    764 class node {
    765 
    766 protected:
    767 	node(node_type nt, node_subtype nst, node_flags flags = NF_EMPTY)
    768 	: prev(), next(), parent(),
    769 	  type(nt), subtype(nst), flags(flags),
    770 	  pred(), dst(), src() {}
    771 
    772 	virtual ~node() {};
    773 
    774 public:
    775 	node *prev, *next;
    776 	container_node *parent;
    777 
    778 	node_type type;
    779 	node_subtype subtype;
    780 	node_flags flags;
    781 
    782 	value *pred;
    783 
    784 	vvec dst;
    785 	vvec src;
    786 
    787 	virtual bool is_valid() { return true; }
    788 	virtual bool accept(vpass &p, bool enter);
    789 
    790 	void insert_before(node *n);
    791 	void insert_after(node *n);
    792 	void replace_with(node *n);
    793 	void remove();
    794 
    795 	virtual value_hash hash() const;
    796 	value_hash hash_src() const;
    797 
    798 	virtual bool fold_dispatch(expr_handler *ex);
    799 
    800 	bool is_container() { return flags & NF_CONTAINER; }
    801 
    802 	bool is_alu_packed() { return subtype == NST_ALU_PACKED_INST; }
    803 	bool is_alu_inst() { return subtype == NST_ALU_INST; }
    804 	bool is_alu_group() { return subtype == NST_ALU_GROUP; }
    805 	bool is_alu_clause() { return subtype == NST_ALU_CLAUSE; }
    806 
    807 	bool is_fetch_clause() {
    808 		return subtype == NST_TEX_CLAUSE || subtype == NST_VTX_CLAUSE;
    809 	}
    810 
    811 	bool is_copy() { return subtype == NST_COPY; }
    812 	bool is_copy_mov() { return flags & NF_COPY_MOV; }
    813 	bool is_any_alu() { return is_alu_inst() || is_alu_packed() || is_copy(); }
    814 
    815 	bool is_fetch_inst() { return subtype == NST_FETCH_INST; }
    816 	bool is_cf_inst() { return subtype == NST_CF_INST; }
    817 
    818 	bool is_region() { return type == NT_REGION; }
    819 	bool is_depart() { return type == NT_DEPART; }
    820 	bool is_repeat() { return type == NT_REPEAT; }
    821 	bool is_if() { return type == NT_IF; }
    822 	bool is_bb() { return subtype == NST_BB; }
    823 
    824 	bool is_phi() { return subtype == NST_PHI; }
    825 
    826 	bool is_dead() { return flags & NF_DEAD; }
    827 
    828 	bool is_cf_op(unsigned op);
    829 	bool is_alu_op(unsigned op);
    830 	bool is_fetch_op(unsigned op);
    831 
    832 	unsigned cf_op_flags();
    833 	unsigned alu_op_flags();
    834 	unsigned alu_op_slot_flags();
    835 	unsigned fetch_op_flags();
    836 
    837 	bool is_mova();
    838 	bool is_pred_set();
    839 
    840 	bool vec_uses_ar(vvec &vv) {
    841 		for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
    842 			value *v = *I;
    843 			if (v && v->rel && !v->rel->is_const())
    844 				return true;
    845 		}
    846 		return false;
    847 	}
    848 
    849 	bool uses_ar() {
    850 		return vec_uses_ar(dst) || vec_uses_ar(src);
    851 	}
    852 
    853 
    854 	region_node* get_parent_region();
    855 
    856 	friend class shader;
    857 };
    858 
    859 class container_node : public node {
    860 public:
    861 
    862 	container_node(node_type nt = NT_LIST, node_subtype nst = NST_LIST,
    863 	               node_flags flags = NF_EMPTY)
    864 	: node(nt, nst, flags | NF_CONTAINER), first(), last(),
    865 	  live_after(), live_before() {}
    866 
    867 	// child items list
    868 	node *first, *last;
    869 
    870 	val_set live_after;
    871 	val_set live_before;
    872 
    873 	class iterator {
    874 		node *p;
    875 	public:
    876 		iterator(node *pp = NULL) : p(pp) {}
    877 		iterator & operator ++() { p = p->next; return *this;}
    878 		iterator & operator --() { p = p->prev; return *this;}
    879 		node* operator *() { return p; }
    880 		node* operator ->() { return p; }
    881 		const iterator advance(int n) {
    882 			if (!n) return *this;
    883 			iterator I(p);
    884 			if (n > 0) while (n--) ++I;
    885 			else while (n++) --I;
    886 			return I;
    887 		}
    888 		const iterator operator +(int n) { return advance(n); }
    889 		const iterator operator -(int n) { return advance(-n); }
    890 		bool operator !=(const iterator &i) { return p != i.p; }
    891 		bool operator ==(const iterator &i) { return p == i.p; }
    892 	};
    893 
    894 	class riterator {
    895 		iterator i;
    896 	public:
    897 		riterator(node *p = NULL) : i(p) {}
    898 		riterator & operator ++() { --i; return *this;}
    899 		riterator & operator --() { ++i; return *this;}
    900 		node* operator *() { return *i; }
    901 		node* operator ->() { return *i; }
    902 		bool operator !=(const riterator &r) { return i != r.i; }
    903 		bool operator ==(const riterator &r) { return i == r.i; }
    904 	};
    905 
    906 	iterator begin() { return first; }
    907 	iterator end() { return NULL; }
    908 	riterator rbegin() { return last; }
    909 	riterator rend() { return NULL; }
    910 
    911 	bool empty() { assert(first != NULL || first == last); return !first; }
    912 	unsigned count();
    913 
    914 	// used with node containers that represent shceduling queues
    915 	// ignores copies and takes into account alu_packed_node items
    916 	unsigned real_alu_count();
    917 
    918 	void push_back(node *n);
    919 	void push_front(node *n);
    920 
    921 	void insert_node_before(node *s, node *n);
    922 	void insert_node_after(node *s, node *n);
    923 
    924 	void append_from(container_node *c);
    925 
    926 	// remove range [b..e) from some container and assign to this container
    927 	void move(iterator b, iterator e);
    928 
    929 	void expand();
    930 	void expand(container_node *n);
    931 	void remove_node(node *n);
    932 
    933 	node *cut(iterator b, iterator e);
    934 
    935 	void clear() { first = last = NULL; }
    936 
    937 	virtual bool is_valid() { return true; }
    938 	virtual bool accept(vpass &p, bool enter);
    939 	virtual bool fold_dispatch(expr_handler *ex);
    940 
    941 	node* front() { return first; }
    942 	node* back() { return last; }
    943 
    944 	void collect_stats(node_stats &s);
    945 
    946 	friend class shader;
    947 
    948 
    949 };
    950 
    951 typedef container_node::iterator node_iterator;
    952 typedef container_node::riterator node_riterator;
    953 
    954 class alu_group_node : public container_node {
    955 protected:
    956 	alu_group_node() : container_node(NT_LIST, NST_ALU_GROUP), literals() {}
    957 public:
    958 
    959 	std::vector<literal> literals;
    960 
    961 	virtual bool is_valid() { return subtype == NST_ALU_GROUP; }
    962 	virtual bool accept(vpass &p, bool enter);
    963 
    964 
    965 	unsigned literal_chan(literal l) {
    966 		std::vector<literal>::iterator F =
    967 				std::find(literals.begin(), literals.end(), l);
    968 		assert(F != literals.end());
    969 		return F - literals.begin();
    970 	}
    971 
    972 	friend class shader;
    973 };
    974 
    975 class cf_node : public container_node {
    976 protected:
    977 	cf_node() : container_node(NT_OP, NST_CF_INST), jump_target(),
    978 		jump_after_target() { memset(&bc, 0, sizeof(bc_cf)); };
    979 public:
    980 	bc_cf bc;
    981 
    982 	cf_node *jump_target;
    983 	bool jump_after_target;
    984 
    985 	virtual bool is_valid() { return subtype == NST_CF_INST; }
    986 	virtual bool accept(vpass &p, bool enter);
    987 	virtual bool fold_dispatch(expr_handler *ex);
    988 
    989 	void jump(cf_node *c) { jump_target = c; jump_after_target = false; }
    990 	void jump_after(cf_node *c) { jump_target = c; jump_after_target = true; }
    991 
    992 	friend class shader;
    993 };
    994 
    995 class alu_node : public node {
    996 protected:
    997 	alu_node() : node(NT_OP, NST_ALU_INST) { memset(&bc, 0, sizeof(bc_alu)); };
    998 public:
    999 	bc_alu bc;
   1000 
   1001 	virtual bool is_valid() { return subtype == NST_ALU_INST; }
   1002 	virtual bool accept(vpass &p, bool enter);
   1003 	virtual bool fold_dispatch(expr_handler *ex);
   1004 
   1005 	unsigned forced_bank_swizzle() {
   1006 		return ((bc.op_ptr->flags & AF_INTERP) && (bc.slot_flags == AF_4V)) ?
   1007 				VEC_210 : 0;
   1008 	}
   1009 
   1010 	// return param index + 1 if instruction references interpolation param,
   1011 	// otherwise 0
   1012 	unsigned interp_param();
   1013 
   1014 	alu_group_node *get_alu_group_node();
   1015 
   1016 	friend class shader;
   1017 };
   1018 
   1019 // for multi-slot instrs - DOT/INTERP/... (maybe useful for 64bit pairs later)
   1020 class alu_packed_node : public container_node {
   1021 protected:
   1022 	alu_packed_node() : container_node(NT_OP, NST_ALU_PACKED_INST) {}
   1023 public:
   1024 
   1025 	const alu_op_info* op_ptr() {
   1026 		return static_cast<alu_node*>(first)->bc.op_ptr;
   1027 	}
   1028 	unsigned op() { return static_cast<alu_node*>(first)->bc.op; }
   1029 	void init_args(bool repl);
   1030 
   1031 	virtual bool is_valid() { return subtype == NST_ALU_PACKED_INST; }
   1032 	virtual bool accept(vpass &p, bool enter);
   1033 	virtual bool fold_dispatch(expr_handler *ex);
   1034 
   1035 	unsigned get_slot_mask();
   1036 	void update_packed_items(sb_context &ctx);
   1037 
   1038 	friend class shader;
   1039 };
   1040 
   1041 class fetch_node : public node {
   1042 protected:
   1043 	fetch_node() : node(NT_OP, NST_FETCH_INST) { memset(&bc, 0, sizeof(bc_fetch)); };
   1044 public:
   1045 	bc_fetch bc;
   1046 
   1047 	virtual bool is_valid() { return subtype == NST_FETCH_INST; }
   1048 	virtual bool accept(vpass &p, bool enter);
   1049 	virtual bool fold_dispatch(expr_handler *ex);
   1050 
   1051 	bool uses_grad() { return bc.op_ptr->flags & FF_USEGRAD; }
   1052 
   1053 	friend class shader;
   1054 };
   1055 
   1056 class region_node;
   1057 
   1058 class repeat_node : public container_node {
   1059 protected:
   1060 	repeat_node(region_node *target, unsigned id)
   1061 	: container_node(NT_REPEAT, NST_LIST), target(target), rep_id(id) {}
   1062 public:
   1063 	region_node *target;
   1064 	unsigned rep_id;
   1065 
   1066 	virtual bool accept(vpass &p, bool enter);
   1067 
   1068 	friend class shader;
   1069 };
   1070 
   1071 class depart_node : public container_node {
   1072 protected:
   1073 	depart_node(region_node *target, unsigned id)
   1074 	: container_node(NT_DEPART, NST_LIST), target(target), dep_id(id) {}
   1075 public:
   1076 	region_node *target;
   1077 	unsigned dep_id;
   1078 
   1079 	virtual bool accept(vpass &p, bool enter);
   1080 
   1081 	friend class shader;
   1082 };
   1083 
   1084 class if_node : public container_node {
   1085 protected:
   1086 	if_node() : container_node(NT_IF, NST_LIST), cond() {};
   1087 public:
   1088 	value *cond; // glued to pseudo output (dst[2]) of the PRED_SETxxx
   1089 
   1090 	virtual bool accept(vpass &p, bool enter);
   1091 
   1092 	friend class shader;
   1093 };
   1094 
   1095 typedef std::vector<depart_node*> depart_vec;
   1096 typedef std::vector<repeat_node*> repeat_vec;
   1097 
   1098 class region_node : public container_node {
   1099 protected:
   1100 	region_node(unsigned id) : container_node(NT_REGION, NST_LIST), region_id(id),
   1101 			loop_phi(), phi(), vars_defined(), departs(), repeats(), src_loop()
   1102 			{}
   1103 public:
   1104 	unsigned region_id;
   1105 
   1106 	container_node *loop_phi;
   1107 	container_node *phi;
   1108 
   1109 	val_set vars_defined;
   1110 
   1111 	depart_vec departs;
   1112 	repeat_vec repeats;
   1113 
   1114 	// true if region was created for loop in the parser, sometimes repeat_node
   1115 	// may be optimized away so we need to remember this information
   1116 	bool src_loop;
   1117 
   1118 	virtual bool accept(vpass &p, bool enter);
   1119 
   1120 	unsigned dep_count() { return departs.size(); }
   1121 	unsigned rep_count() { return repeats.size() + 1; }
   1122 
   1123 	bool is_loop() { return src_loop || !repeats.empty(); }
   1124 
   1125 	container_node* get_entry_code_location() {
   1126 		node *p = first;
   1127 		while (p && (p->is_depart() || p->is_repeat()))
   1128 			p = static_cast<container_node*>(p)->first;
   1129 
   1130 		container_node *c = static_cast<container_node*>(p);
   1131 		if (c->is_bb())
   1132 			return c;
   1133 		else
   1134 			return c->parent;
   1135 	}
   1136 
   1137 	void expand_depart(depart_node *d);
   1138 	void expand_repeat(repeat_node *r);
   1139 
   1140 	friend class shader;
   1141 };
   1142 
   1143 class bb_node : public container_node {
   1144 protected:
   1145 	bb_node(unsigned id, unsigned loop_level)
   1146 		: container_node(NT_LIST, NST_BB), id(id), loop_level(loop_level) {}
   1147 public:
   1148 	unsigned id;
   1149 	unsigned loop_level;
   1150 
   1151 	virtual bool accept(vpass &p, bool enter);
   1152 
   1153 	friend class shader;
   1154 };
   1155 
   1156 
   1157 typedef std::vector<region_node*> regions_vec;
   1158 typedef std::vector<bb_node*> bbs_vec;
   1159 typedef std::list<node*> sched_queue;
   1160 typedef sched_queue::iterator sq_iterator;
   1161 typedef std::vector<node*> node_vec;
   1162 typedef std::list<node*> node_list;
   1163 typedef std::set<node*> node_set;
   1164 
   1165 
   1166 
   1167 } // namespace r600_sb
   1168 
   1169 #endif /* R600_SB_IR_H_ */
   1170