Home | History | Annotate | Download | only in sb
      1 /*
      2  * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *      Vadim Girlin
     25  */
     26 
     27 #ifndef SB_PASS_H_
     28 #define SB_PASS_H_
     29 
     30 #include <stack>
     31 
     32 namespace r600_sb {
     33 
     34 class pass {
     35 protected:
     36 	sb_context &ctx;
     37 	shader &sh;
     38 
     39 public:
     40 	pass(shader &s);
     41 
     42 	virtual int run();
     43 
     44 	virtual ~pass() {}
     45 };
     46 
     47 class vpass : public pass {
     48 
     49 public:
     50 
     51 	vpass(shader &s) : pass(s) {}
     52 
     53 	virtual int init();
     54 	virtual int done();
     55 
     56 	virtual int run();
     57 	virtual void run_on(container_node &n);
     58 
     59 	virtual bool visit(node &n, bool enter);
     60 	virtual bool visit(container_node &n, bool enter);
     61 	virtual bool visit(alu_group_node &n, bool enter);
     62 	virtual bool visit(cf_node &n, bool enter);
     63 	virtual bool visit(alu_node &n, bool enter);
     64 	virtual bool visit(alu_packed_node &n, bool enter);
     65 	virtual bool visit(fetch_node &n, bool enter);
     66 	virtual bool visit(region_node &n, bool enter);
     67 	virtual bool visit(repeat_node &n, bool enter);
     68 	virtual bool visit(depart_node &n, bool enter);
     69 	virtual bool visit(if_node &n, bool enter);
     70 	virtual bool visit(bb_node &n, bool enter);
     71 
     72 };
     73 
     74 class rev_vpass : public vpass {
     75 
     76 public:
     77 	rev_vpass(shader &s) : vpass(s) {}
     78 
     79 	virtual void run_on(container_node &n);
     80 };
     81 
     82 
     83 // =================== PASSES
     84 
     85 class bytecode;
     86 
     87 class bc_dump : public vpass {
     88 	using vpass::visit;
     89 
     90 	uint32_t *bc_data;
     91 	unsigned ndw;
     92 
     93 	unsigned id;
     94 
     95 	unsigned new_group, group_index;
     96 
     97 public:
     98 
     99 	bc_dump(shader &s, bytecode *bc = NULL);
    100 
    101 	bc_dump(shader &s, uint32_t *bc_ptr, unsigned ndw) :
    102 		vpass(s), bc_data(bc_ptr), ndw(ndw), id(), new_group(), group_index() {}
    103 
    104 	virtual int init();
    105 	virtual int done();
    106 
    107 	virtual bool visit(cf_node &n, bool enter);
    108 	virtual bool visit(alu_node &n, bool enter);
    109 	virtual bool visit(fetch_node &n, bool enter);
    110 
    111 	void dump_dw(unsigned dw_id, unsigned count = 2);
    112 
    113 	void dump(cf_node& n);
    114 	void dump(alu_node& n);
    115 	void dump(fetch_node& n);
    116 };
    117 
    118 
    119 class dce_cleanup : public vpass {
    120 	using vpass::visit;
    121 
    122 	bool remove_unused;
    123 
    124 public:
    125 
    126 	dce_cleanup(shader &s) : vpass(s),
    127 		remove_unused(s.dce_flags & DF_REMOVE_UNUSED), nodes_changed(false) {}
    128 
    129 	virtual int run();
    130 
    131 	virtual bool visit(node &n, bool enter);
    132 	virtual bool visit(alu_group_node &n, bool enter);
    133 	virtual bool visit(cf_node &n, bool enter);
    134 	virtual bool visit(alu_node &n, bool enter);
    135 	virtual bool visit(alu_packed_node &n, bool enter);
    136 	virtual bool visit(fetch_node &n, bool enter);
    137 	virtual bool visit(region_node &n, bool enter);
    138 	virtual bool visit(container_node &n, bool enter);
    139 
    140 private:
    141 
    142 	void cleanup_dst(node &n);
    143 	bool cleanup_dst_vec(vvec &vv);
    144 
    145 	// Did we alter/remove nodes during a single pass?
    146 	bool nodes_changed;
    147 };
    148 
    149 
    150 class def_use : public pass {
    151 
    152 public:
    153 
    154 	def_use(shader &sh) : pass(sh) {}
    155 
    156 	virtual int run();
    157 	void run_on(node *n, bool defs);
    158 
    159 private:
    160 
    161 	void process_uses(node *n);
    162 	void process_defs(node *n, vvec &vv, bool arr_def);
    163 	void process_phi(container_node *c, bool defs, bool uses);
    164 };
    165 
    166 
    167 
    168 class dump : public vpass {
    169 	using vpass::visit;
    170 
    171 	int level;
    172 
    173 public:
    174 
    175 	dump(shader &s) : vpass(s), level(0) {}
    176 
    177 	virtual bool visit(node &n, bool enter);
    178 	virtual bool visit(container_node &n, bool enter);
    179 	virtual bool visit(alu_group_node &n, bool enter);
    180 	virtual bool visit(cf_node &n, bool enter);
    181 	virtual bool visit(alu_node &n, bool enter);
    182 	virtual bool visit(alu_packed_node &n, bool enter);
    183 	virtual bool visit(fetch_node &n, bool enter);
    184 	virtual bool visit(region_node &n, bool enter);
    185 	virtual bool visit(repeat_node &n, bool enter);
    186 	virtual bool visit(depart_node &n, bool enter);
    187 	virtual bool visit(if_node &n, bool enter);
    188 	virtual bool visit(bb_node &n, bool enter);
    189 
    190 
    191 	static void dump_op(node &n, const char *name);
    192 	static void dump_vec(const vvec & vv);
    193 	static void dump_set(shader &sh, val_set & v);
    194 
    195 	static void dump_rels(vvec & vv);
    196 
    197 	static void dump_val(value *v);
    198 	static void dump_op(node *n);
    199 
    200 	static void dump_op_list(container_node *c);
    201 	static void dump_queue(sched_queue &q);
    202 
    203 	static void dump_alu(alu_node *n);
    204 
    205 private:
    206 
    207 	void indent();
    208 
    209 	void dump_common(node &n);
    210 	void dump_flags(node &n);
    211 
    212 	void dump_live_values(container_node &n, bool before);
    213 };
    214 
    215 
    216 // Global Code Motion
    217 
    218 class gcm : public pass {
    219 
    220 	sched_queue bu_ready[SQ_NUM];
    221 	sched_queue bu_ready_next[SQ_NUM];
    222 	sched_queue bu_ready_early[SQ_NUM];
    223 	sched_queue ready;
    224 	sched_queue ready_above;
    225 
    226 	unsigned outstanding_lds_oq;
    227 	container_node pending;
    228 
    229 	struct op_info {
    230 		bb_node* top_bb;
    231 		bb_node* bottom_bb;
    232 		op_info() : top_bb(), bottom_bb() {}
    233 	};
    234 
    235 	typedef std::map<node*, op_info> op_info_map;
    236 
    237 	typedef std::map<node*, unsigned> nuc_map;
    238 
    239 	op_info_map op_map;
    240 	nuc_map uses;
    241 
    242 	typedef std::vector<nuc_map> nuc_stack;
    243 
    244 	nuc_stack nuc_stk;
    245 	unsigned ucs_level;
    246 
    247 	bb_node * bu_bb;
    248 
    249 	vvec pending_defs;
    250 
    251 	node_list pending_nodes;
    252 
    253 	unsigned cur_sq;
    254 
    255 	// for register pressure tracking in bottom-up pass
    256 	val_set live;
    257 	int live_count;
    258 
    259 	static const int rp_threshold = 100;
    260 
    261 	bool pending_exec_mask_update;
    262 
    263 public:
    264 
    265 	gcm(shader &sh) : pass(sh),
    266 		bu_ready(), bu_ready_next(), bu_ready_early(),
    267 		ready(), outstanding_lds_oq(),
    268 		op_map(), uses(), nuc_stk(1), ucs_level(),
    269 		bu_bb(), pending_defs(), pending_nodes(), cur_sq(),
    270 		live(), live_count(), pending_exec_mask_update() {}
    271 
    272 	virtual int run();
    273 
    274 private:
    275 
    276 	void collect_instructions(container_node *c, bool early_pass);
    277 
    278 	void sched_early(container_node *n);
    279 	void td_sched_bb(bb_node *bb);
    280 	bool td_is_ready(node *n);
    281 	void td_release_uses(vvec &v);
    282 	void td_release_val(value *v);
    283 	void td_schedule(bb_node *bb, node *n);
    284 
    285 	void sched_late(container_node *n);
    286 	void bu_sched_bb(bb_node *bb);
    287 	void bu_release_defs(vvec &v, bool src);
    288 	void bu_release_phi_defs(container_node *p, unsigned op);
    289 	bool bu_is_ready(node *n);
    290 	void bu_release_val(value *v);
    291 	void bu_release_op(node * n);
    292 	void bu_find_best_bb(node *n, op_info &oi);
    293 	void bu_schedule(container_node *bb, node *n);
    294 
    295 	void push_uc_stack();
    296 	void pop_uc_stack();
    297 
    298 	void init_def_count(nuc_map &m, container_node &s);
    299 	void init_use_count(nuc_map &m, container_node &s);
    300 	unsigned get_uc_vec(vvec &vv);
    301 	unsigned get_dc_vec(vvec &vv, bool src);
    302 
    303 	void add_ready(node *n);
    304 
    305 	void dump_uc_stack();
    306 
    307 	unsigned real_alu_count(sched_queue &q, unsigned max);
    308 
    309 	// check if we have not less than threshold ready alu instructions
    310 	bool check_alu_ready_count(unsigned threshold);
    311 };
    312 
    313 
    314 class gvn : public vpass {
    315 	using vpass::visit;
    316 
    317 public:
    318 
    319 	gvn(shader &sh) : vpass(sh) {}
    320 
    321 	virtual bool visit(node &n, bool enter);
    322 	virtual bool visit(cf_node &n, bool enter);
    323 	virtual bool visit(alu_node &n, bool enter);
    324 	virtual bool visit(alu_packed_node &n, bool enter);
    325 	virtual bool visit(fetch_node &n, bool enter);
    326 	virtual bool visit(region_node &n, bool enter);
    327 
    328 private:
    329 
    330 	void process_op(node &n, bool rewrite = true);
    331 
    332 	// returns true if the value was rewritten
    333 	bool process_src(value* &v, bool rewrite);
    334 
    335 
    336 	void process_alu_src_constants(node &n, value* &v);
    337 };
    338 
    339 
    340 class if_conversion : public pass {
    341 
    342 public:
    343 
    344 	if_conversion(shader &sh) : pass(sh) {}
    345 
    346 	virtual int run();
    347 
    348 	bool run_on(region_node *r);
    349 
    350 	void convert_kill_instructions(region_node *r, value *em, bool branch,
    351 	                               container_node *c);
    352 
    353 	bool check_and_convert(region_node *r);
    354 
    355 	alu_node* convert_phi(value *select, node *phi);
    356 
    357 };
    358 
    359 
    360 class liveness : public rev_vpass {
    361 	using vpass::visit;
    362 
    363 	val_set live;
    364 	bool live_changed;
    365 
    366 public:
    367 
    368 	liveness(shader &s) : rev_vpass(s), live_changed(false) {}
    369 
    370 	virtual int init();
    371 
    372 	virtual bool visit(node &n, bool enter);
    373 	virtual bool visit(bb_node &n, bool enter);
    374 	virtual bool visit(container_node &n, bool enter);
    375 	virtual bool visit(alu_group_node &n, bool enter);
    376 	virtual bool visit(cf_node &n, bool enter);
    377 	virtual bool visit(alu_node &n, bool enter);
    378 	virtual bool visit(alu_packed_node &n, bool enter);
    379 	virtual bool visit(fetch_node &n, bool enter);
    380 	virtual bool visit(region_node &n, bool enter);
    381 	virtual bool visit(repeat_node &n, bool enter);
    382 	virtual bool visit(depart_node &n, bool enter);
    383 	virtual bool visit(if_node &n, bool enter);
    384 
    385 private:
    386 
    387 	void update_interferences();
    388 	void process_op(node &n);
    389 
    390 	bool remove_val(value *v);
    391 	bool remove_vec(vvec &v);
    392 	bool process_outs(node& n);
    393 	void process_ins(node& n);
    394 
    395 	void process_phi_outs(container_node *phi);
    396 	void process_phi_branch(container_node *phi, unsigned id);
    397 
    398 	bool process_maydef(value *v);
    399 
    400 	bool add_vec(vvec &vv, bool src);
    401 
    402 	void update_src_vec(vvec &vv, bool src);
    403 };
    404 
    405 
    406 struct bool_op_info {
    407 	bool invert;
    408 	unsigned int_cvt;
    409 
    410 	alu_node *n;
    411 };
    412 
    413 class peephole : public pass {
    414 
    415 public:
    416 
    417 	peephole(shader &sh) : pass(sh) {}
    418 
    419 	virtual int run();
    420 
    421 	void run_on(container_node *c);
    422 
    423 	void optimize_cc_op(alu_node *a);
    424 
    425 	void optimize_cc_op2(alu_node *a);
    426 	void optimize_CNDcc_op(alu_node *a);
    427 
    428 	bool get_bool_op_info(value *b, bool_op_info& bop);
    429 	bool get_bool_flt_to_int_source(alu_node* &a);
    430 	void convert_float_setcc(alu_node *f2i, alu_node *s);
    431 };
    432 
    433 
    434 class psi_ops : public rev_vpass {
    435 	using rev_vpass::visit;
    436 
    437 public:
    438 
    439 	psi_ops(shader &s) : rev_vpass(s) {}
    440 
    441 	virtual bool visit(node &n, bool enter);
    442 	virtual bool visit(alu_node &n, bool enter);
    443 
    444 	bool try_inline(node &n);
    445 	bool try_reduce(node &n);
    446 	bool eliminate(node &n);
    447 
    448 	void unpredicate(node *n);
    449 };
    450 
    451 
    452 // check correctness of the generated code, e.g.:
    453 // - expected source operand value is the last value written to its gpr,
    454 // - all arguments of phi node should be allocated to the same gpr,
    455 // TODO other tests
    456 class ra_checker : public pass {
    457 
    458 	typedef std::map<sel_chan, value *> reg_value_map;
    459 
    460 	typedef std::vector<reg_value_map> regmap_stack;
    461 
    462 	regmap_stack rm_stack;
    463 	unsigned rm_stk_level;
    464 
    465 	value* prev_dst[5];
    466 
    467 public:
    468 
    469 	ra_checker(shader &sh) : pass(sh), rm_stk_level(0), prev_dst() {}
    470 
    471 	virtual int run();
    472 
    473 	void run_on(container_node *c);
    474 
    475 	void dump_error(const error_info &e);
    476 	void dump_all_errors();
    477 
    478 private:
    479 
    480 	reg_value_map& rmap() { return rm_stack[rm_stk_level]; }
    481 
    482 	void push_stack();
    483 	void pop_stack();
    484 
    485 	// when going out of the alu clause, values in the clause temporary gprs,
    486 	// AR, predicate values, PS/PV are destroyed
    487 	void kill_alu_only_regs();
    488 	void error(node *n, unsigned id, std::string msg);
    489 
    490 	void check_phi_src(container_node *p, unsigned id);
    491 	void process_phi_dst(container_node *p);
    492 	void check_alu_group(alu_group_node *g);
    493 	void process_op_dst(node *n);
    494 	void check_op_src(node *n);
    495 	void check_src_vec(node *n, unsigned id, vvec &vv, bool src);
    496 	void check_value_gpr(node *n, unsigned id, value *v);
    497 };
    498 
    499 // =======================================
    500 
    501 
    502 class ra_coalesce : public pass {
    503 
    504 public:
    505 
    506 	ra_coalesce(shader &sh) : pass(sh) {}
    507 
    508 	virtual int run();
    509 };
    510 
    511 
    512 
    513 // =======================================
    514 
    515 class ra_init : public pass {
    516 
    517 public:
    518 
    519 	ra_init(shader &sh) : pass(sh), prev_chans() {
    520 
    521 		// The parameter below affects register channels distribution.
    522 		// For cayman (VLIW-4) we're trying to distribute the channels
    523 		// uniformly, this means significantly better alu slots utilization
    524 		// at the expense of higher gpr usage. Hopefully this will improve
    525 		// performance, though it has to be proven with real benchmarks yet.
    526 		// For VLIW-5 this method could also slightly improve slots
    527 		// utilization, but increased register pressure seems more significant
    528 		// and overall performance effect is negative according to some
    529 		// benchmarks, so it's not used currently. Basically, VLIW-5 doesn't
    530 		// really need it because trans slot (unrestricted by register write
    531 		// channel) allows to consume most deviations from uniform channel
    532 		// distribution.
    533 		// Value 3 means that for new allocation we'll use channel that differs
    534 		// from 3 last used channels. 0 for VLIW-5 effectively turns this off.
    535 
    536 		ra_tune = sh.get_ctx().is_cayman() ? 3 : 0;
    537 	}
    538 
    539 	virtual int run();
    540 
    541 private:
    542 
    543 	unsigned prev_chans;
    544 	unsigned ra_tune;
    545 
    546 	void add_prev_chan(unsigned chan);
    547 	unsigned get_preferable_chan_mask();
    548 
    549 	void ra_node(container_node *c);
    550 	void process_op(node *n);
    551 
    552 	void color(value *v);
    553 
    554 	void color_bs_constraint(ra_constraint *c);
    555 
    556 	void assign_color(value *v, sel_chan c);
    557 	void alloc_arrays();
    558 };
    559 
    560 // =======================================
    561 
    562 class ra_split : public pass {
    563 
    564 public:
    565 
    566 	ra_split(shader &sh) : pass(sh) {}
    567 
    568 	virtual int run();
    569 
    570 	void split(container_node *n);
    571 	void split_op(node *n);
    572 	void split_alu_packed(alu_packed_node *n);
    573 	void split_vector_inst(node *n);
    574 
    575 	void split_packed_ins(alu_packed_node *n);
    576 
    577 #if 0
    578 	void split_pinned_outs(node *n);
    579 #endif
    580 
    581 	void split_vec(vvec &vv, vvec &v1, vvec &v2, bool allow_swz);
    582 
    583 	void split_phi_src(container_node *loc, container_node *c, unsigned id,
    584 	                   bool loop);
    585 	void split_phi_dst(node *loc, container_node *c, bool loop);
    586 	void init_phi_constraints(container_node *c);
    587 };
    588 
    589 
    590 
    591 class ssa_prepare : public vpass {
    592 	using vpass::visit;
    593 
    594 	typedef std::vector<val_set> vd_stk;
    595 	vd_stk stk;
    596 
    597 	unsigned level;
    598 
    599 public:
    600 	ssa_prepare(shader &s) : vpass(s), level(0) {}
    601 
    602 	virtual bool visit(cf_node &n, bool enter);
    603 	virtual bool visit(alu_node &n, bool enter);
    604 	virtual bool visit(fetch_node &n, bool enter);
    605 	virtual bool visit(region_node &n, bool enter);
    606 	virtual bool visit(repeat_node &n, bool enter);
    607 	virtual bool visit(depart_node &n, bool enter);
    608 
    609 private:
    610 
    611 	void push_stk() {
    612 		++level;
    613 		if (level + 1 > stk.size())
    614 			stk.resize(level+1);
    615 		else
    616 			stk[level].clear();
    617 	}
    618 	void pop_stk() {
    619 		assert(level);
    620 		--level;
    621 		stk[level].add_set(stk[level + 1]);
    622 	}
    623 
    624 	void add_defs(node &n);
    625 
    626 	val_set & cur_set() { return stk[level]; }
    627 
    628 	container_node* create_phi_nodes(int count);
    629 };
    630 
    631 class ssa_rename : public vpass {
    632 	using vpass::visit;
    633 
    634 	typedef sb_map<value*, unsigned> def_map;
    635 
    636 	def_map def_count;
    637 	def_map lds_oq_count;
    638 	def_map lds_rw_count;
    639 	std::stack<def_map> rename_stack;
    640 	std::stack<def_map> rename_lds_oq_stack;
    641 	std::stack<def_map> rename_lds_rw_stack;
    642 
    643 	typedef std::map<uint32_t, value*> val_map;
    644 	val_map values;
    645 
    646 public:
    647 
    648 	ssa_rename(shader &s) : vpass(s) {}
    649 
    650 	virtual int init();
    651 
    652 	virtual bool visit(container_node &n, bool enter);
    653 	virtual bool visit(node &n, bool enter);
    654 	virtual bool visit(alu_group_node &n, bool enter);
    655 	virtual bool visit(cf_node &n, bool enter);
    656 	virtual bool visit(alu_node &n, bool enter);
    657 	virtual bool visit(alu_packed_node &n, bool enter);
    658 	virtual bool visit(fetch_node &n, bool enter);
    659 	virtual bool visit(region_node &n, bool enter);
    660 	virtual bool visit(repeat_node &n, bool enter);
    661 	virtual bool visit(depart_node &n, bool enter);
    662 	virtual bool visit(if_node &n, bool enter);
    663 
    664 private:
    665 
    666 	void push(node *phi);
    667 	void pop();
    668 
    669 	unsigned get_index(def_map& m, value* v);
    670 	void set_index(def_map& m, value* v, unsigned index);
    671 	unsigned new_index(def_map& m, value* v);
    672 
    673 	value* rename_use(node *n, value* v);
    674 	value* rename_def(node *def, value* v);
    675 
    676 	void rename_src_vec(node *n, vvec &vv, bool src);
    677 	void rename_dst_vec(node *def, vvec &vv, bool set_def);
    678 
    679 	void rename_src(node *n);
    680 	void rename_dst(node *n);
    681 
    682 	void rename_phi_args(container_node *phi, unsigned op, bool def);
    683 
    684 	void rename_virt(node *n);
    685 	void rename_virt_val(node *n, value *v);
    686 };
    687 
    688 class bc_finalizer : public pass {
    689 
    690 	cf_node *last_export[EXP_TYPE_COUNT];
    691 	cf_node *last_cf;
    692 
    693 	unsigned ngpr;
    694 	unsigned nstack;
    695 
    696 public:
    697 
    698 	bc_finalizer(shader &sh) : pass(sh), last_export(), last_cf(), ngpr(),
    699 		nstack() {}
    700 
    701 	virtual int run();
    702 
    703 	void finalize_loop(region_node *r);
    704 	void finalize_if(region_node *r);
    705 
    706 	void run_on(container_node *c);
    707 
    708 	void insert_rv6xx_load_ar_workaround(alu_group_node *b4);
    709 	void finalize_alu_group(alu_group_node *g, node *prev_node);
    710 	bool finalize_alu_src(alu_group_node *g, alu_node *a, alu_group_node *prev_node);
    711 
    712 	void emit_set_grad(fetch_node* f);
    713 	void finalize_fetch(fetch_node *f);
    714 
    715 	void finalize_cf(cf_node *c);
    716 
    717 	sel_chan translate_kcache(cf_node *alu, value *v);
    718 
    719 	void update_ngpr(unsigned gpr);
    720 	void update_nstack(region_node *r, unsigned add = 0);
    721 
    722 	unsigned get_stack_depth(node *n, unsigned &loops, unsigned &ifs,
    723 	                         unsigned add = 0);
    724 
    725 	void cf_peephole();
    726 
    727 private:
    728 	void copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start);
    729 	void emit_set_texture_offsets(fetch_node &f);
    730 };
    731 
    732 
    733 } // namespace r600_sb
    734 
    735 #endif /* SB_PASS_H_ */
    736