Home | History | Annotate | Download | only in sb
      1 /*
      2  * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *      Vadim Girlin
     25  */
     26 
     27 #ifndef SB_BC_H_
     28 #define SB_BC_H_
     29 
     30 #include <stdint.h>
     31 #include "r600_isa.h"
     32 
     33 #include <cstdio>
     34 #include <string>
     35 #include <vector>
     36 #include <stack>
     37 
     38 struct r600_bytecode;
     39 struct r600_shader;
     40 
     41 namespace r600_sb {
     42 
     43 class hw_encoding_format;
     44 class node;
     45 class alu_node;
     46 class cf_node;
     47 class fetch_node;
     48 class alu_group_node;
     49 class region_node;
     50 class shader;
     51 class value;
     52 
     53 class sb_ostream {
     54 public:
     55 	sb_ostream() {}
     56 
     57 	virtual void write(const char *s) = 0;
     58 
     59 	sb_ostream& operator <<(const char *s) {
     60 		write(s);
     61 		return *this;
     62 	}
     63 
     64 	sb_ostream& operator <<(const std::string& s) {
     65 		return *this << s.c_str();
     66 	}
     67 
     68 	sb_ostream& operator <<(void *p) {
     69 		char b[32];
     70 		sprintf(b, "%p", p);
     71 		return *this << b;
     72 	}
     73 
     74 	sb_ostream& operator <<(char c) {
     75 		char b[2];
     76 		sprintf(b, "%c", c);
     77 		return *this << b;
     78 	}
     79 
     80 	sb_ostream& operator <<(int n) {
     81 		char b[32];
     82 		sprintf(b, "%d", n);
     83 		return *this << b;
     84 	}
     85 
     86 	sb_ostream& operator <<(unsigned n) {
     87 		char b[32];
     88 		sprintf(b, "%u", n);
     89 		return *this << b;
     90 	}
     91 
     92 	sb_ostream& operator <<(double d) {
     93 		char b[32];
     94 		snprintf(b, 32, "%g", d);
     95 		return *this << b;
     96 	}
     97 
     98 	// print as field of specified width, right aligned
     99 	void print_w(int n, int width) {
    100 		char b[256],f[8];
    101 		sprintf(f, "%%%dd", width);
    102 		snprintf(b, 256, f, n);
    103 		write(b);
    104 	}
    105 
    106 	// print as field of specified width, left aligned
    107 	void print_wl(int n, int width) {
    108 		char b[256],f[8];
    109 		sprintf(f, "%%-%dd", width);
    110 		snprintf(b, 256, f, n);
    111 		write(b);
    112 	}
    113 
    114 	// print as field of specified width, left aligned
    115 	void print_wl(const std::string &s, int width) {
    116 		write(s.c_str());
    117 		int l = s.length();
    118 		while (l++ < width) {
    119 			write(" ");
    120 		}
    121 	}
    122 
    123 	// print int as field of specified width, right aligned, zero-padded
    124 	void print_zw(int n, int width) {
    125 		char b[256],f[8];
    126 		sprintf(f, "%%0%dd", width);
    127 		snprintf(b, 256, f, n);
    128 		write(b);
    129 	}
    130 
    131 	// print int as field of specified width, right aligned, zero-padded, hex
    132 	void print_zw_hex(int n, int width) {
    133 		char b[256],f[8];
    134 		sprintf(f, "%%0%dx", width);
    135 		snprintf(b, 256, f, n);
    136 		write(b);
    137 	}
    138 };
    139 
    140 class sb_ostringstream : public sb_ostream {
    141 	std::string data;
    142 public:
    143 	sb_ostringstream() : data() {}
    144 
    145 	virtual void write(const char *s) {
    146 		data += s;
    147 	}
    148 
    149 	void clear() { data.clear(); }
    150 
    151 	const char* c_str() { return data.c_str(); }
    152 	std::string& str() { return data; }
    153 };
    154 
    155 class sb_log : public sb_ostream {
    156 	FILE *o;
    157 public:
    158 	sb_log() : o(stderr) {}
    159 
    160 	virtual void write(const char *s) {
    161 		fputs(s, o);
    162 	}
    163 };
    164 
    165 extern sb_log sblog;
    166 
    167 enum shader_target
    168 {
    169 	TARGET_UNKNOWN,
    170 	TARGET_VS,
    171 	TARGET_ES,
    172 	TARGET_PS,
    173 	TARGET_GS,
    174 	TARGET_GS_COPY,
    175 	TARGET_COMPUTE,
    176 	TARGET_FETCH,
    177 	TARGET_HS,
    178 	TARGET_LS,
    179 
    180 	TARGET_NUM
    181 };
    182 
    183 enum sb_hw_class_bits
    184 {
    185 	HB_R6	= (1<<0),
    186 	HB_R7	= (1<<1),
    187 	HB_EG	= (1<<2),
    188 	HB_CM	= (1<<3),
    189 
    190 	HB_R6R7 = (HB_R6 | HB_R7),
    191 	HB_EGCM = (HB_EG | HB_CM),
    192 	HB_R6R7EG = (HB_R6 | HB_R7 | HB_EG),
    193 	HB_R7EGCM = (HB_R7 | HB_EG | HB_CM),
    194 
    195 	HB_ALL = (HB_R6 | HB_R7 | HB_EG | HB_CM)
    196 };
    197 
    198 enum sb_hw_chip
    199 {
    200 	HW_CHIP_UNKNOWN,
    201 	HW_CHIP_R600,
    202 	HW_CHIP_RV610,
    203 	HW_CHIP_RV630,
    204 	HW_CHIP_RV670,
    205 	HW_CHIP_RV620,
    206 	HW_CHIP_RV635,
    207 	HW_CHIP_RS780,
    208 	HW_CHIP_RS880,
    209 	HW_CHIP_RV770,
    210 	HW_CHIP_RV730,
    211 	HW_CHIP_RV710,
    212 	HW_CHIP_RV740,
    213 	HW_CHIP_CEDAR,
    214 	HW_CHIP_REDWOOD,
    215 	HW_CHIP_JUNIPER,
    216 	HW_CHIP_CYPRESS,
    217 	HW_CHIP_HEMLOCK,
    218 	HW_CHIP_PALM,
    219 	HW_CHIP_SUMO,
    220 	HW_CHIP_SUMO2,
    221 	HW_CHIP_BARTS,
    222 	HW_CHIP_TURKS,
    223 	HW_CHIP_CAICOS,
    224 	HW_CHIP_CAYMAN,
    225 	HW_CHIP_ARUBA
    226 };
    227 
    228 enum sb_hw_class
    229 {
    230 	HW_CLASS_UNKNOWN,
    231 	HW_CLASS_R600,
    232 	HW_CLASS_R700,
    233 	HW_CLASS_EVERGREEN,
    234 	HW_CLASS_CAYMAN
    235 };
    236 
    237 enum alu_slots {
    238 	SLOT_X = 0,
    239 	SLOT_Y = 1,
    240 	SLOT_Z = 2,
    241 	SLOT_W = 3,
    242 	SLOT_TRANS = 4
    243 };
    244 
    245 enum misc_consts {
    246 	MAX_ALU_LITERALS = 4,
    247 	MAX_ALU_SLOTS = 128,
    248 	MAX_GPR = 128,
    249 	MAX_CHAN = 4
    250 
    251 };
    252 
    253 enum alu_src_sel {
    254 
    255 	ALU_SRC_LDS_OQ_A = 219,
    256 	ALU_SRC_LDS_OQ_B = 220,
    257 	ALU_SRC_LDS_OQ_A_POP = 221,
    258 	ALU_SRC_LDS_OQ_B_POP = 222,
    259 	ALU_SRC_LDS_DIRECT_A = 223,
    260 	ALU_SRC_LDS_DIRECT_B = 224,
    261 	ALU_SRC_TIME_HI = 227,
    262 	ALU_SRC_TIME_LO = 228,
    263 	ALU_SRC_MASK_HI = 229,
    264 	ALU_SRC_MASK_LO = 230,
    265 	ALU_SRC_HW_WAVE_ID = 231,
    266 	ALU_SRC_SIMD_ID = 232,
    267 	ALU_SRC_SE_ID = 233,
    268 	ALU_SRC_HW_THREADGRP_ID = 234,
    269 	ALU_SRC_WAVE_ID_IN_GRP = 235,
    270 	ALU_SRC_NUM_THREADGRP_WAVES = 236,
    271 	ALU_SRC_HW_ALU_ODD = 237,
    272 	ALU_SRC_LOOP_IDX = 238,
    273 	ALU_SRC_PARAM_BASE_ADDR = 240,
    274 	ALU_SRC_NEW_PRIM_MASK = 241,
    275 	ALU_SRC_PRIM_MASK_HI = 242,
    276 	ALU_SRC_PRIM_MASK_LO = 243,
    277 	ALU_SRC_1_DBL_L = 244,
    278 	ALU_SRC_1_DBL_M = 245,
    279 	ALU_SRC_0_5_DBL_L = 246,
    280 	ALU_SRC_0_5_DBL_M = 247,
    281 	ALU_SRC_0 = 248,
    282 	ALU_SRC_1 = 249,
    283 	ALU_SRC_1_INT = 250,
    284 	ALU_SRC_M_1_INT = 251,
    285 	ALU_SRC_0_5 = 252,
    286 	ALU_SRC_LITERAL = 253,
    287 	ALU_SRC_PV = 254,
    288 	ALU_SRC_PS = 255,
    289 
    290 	ALU_SRC_PARAM_OFFSET = 448
    291 };
    292 
    293 enum alu_predicate_select
    294 {
    295 	PRED_SEL_OFF	= 0,
    296 //	RESERVED		= 1,
    297 	PRED_SEL_0		= 2,
    298 	PRED_SEL_1		= 3
    299 };
    300 
    301 
    302 enum alu_omod {
    303 	OMOD_OFF  = 0,
    304 	OMOD_M2   = 1,
    305 	OMOD_M4   = 2,
    306 	OMOD_D2   = 3
    307 };
    308 
    309 enum alu_index_mode {
    310 	INDEX_AR_X        = 0,
    311 	INDEX_AR_Y_R600   = 1,
    312 	INDEX_AR_Z_R600   = 2,
    313 	INDEX_AR_W_R600   = 3,
    314 
    315 	INDEX_LOOP        = 4,
    316 	INDEX_GLOBAL      = 5,
    317 	INDEX_GLOBAL_AR_X = 6
    318 };
    319 
    320 enum alu_cayman_mova_dst {
    321 	CM_MOVADST_AR_X,
    322 	CM_MOVADST_PC,
    323 	CM_MOVADST_IDX0,
    324 	CM_MOVADST_IDX1,
    325 	CM_MOVADST_CG0,		// clause-global byte 0
    326 	CM_MOVADST_CG1,
    327 	CM_MOVADST_CG2,
    328 	CM_MOVADST_CG3
    329 };
    330 
    331 enum alu_cayman_exec_mask_op {
    332 	CM_EMO_DEACTIVATE,
    333 	CM_EMO_BREAK,
    334 	CM_EMO_CONTINUE,
    335 	CM_EMO_KILL
    336 };
    337 
    338 
    339 enum cf_exp_type {
    340 	EXP_PIXEL,
    341 	EXP_POS,
    342 	EXP_PARAM,
    343 
    344 	EXP_TYPE_COUNT
    345 };
    346 
    347 enum cf_mem_type {
    348 	MEM_WRITE,
    349 	MEM_WRITE_IND,
    350 	MEM_WRITE_ACK,
    351 	MEM_WRITE_IND_ACK
    352 };
    353 
    354 
    355 enum alu_kcache_mode {
    356 	KC_LOCK_NONE,
    357 	KC_LOCK_1,
    358 	KC_LOCK_2,
    359 	KC_LOCK_LOOP
    360 };
    361 
    362 enum alu_kcache_index_mode {
    363 	KC_INDEX_NONE,
    364 	KC_INDEX_0,
    365 	KC_INDEX_1,
    366 	KC_INDEX_INVALID
    367 };
    368 
    369 enum chan_select {
    370 	SEL_X	= 0,
    371 	SEL_Y	= 1,
    372 	SEL_Z	= 2,
    373 	SEL_W	= 3,
    374 	SEL_0	= 4,
    375 	SEL_1	= 5,
    376 //	RESERVED = 6,
    377 	SEL_MASK = 7
    378 };
    379 
    380 enum bank_swizzle {
    381 	VEC_012 = 0,
    382 	VEC_021 = 1,
    383 	VEC_120 = 2,
    384 	VEC_102 = 3,
    385 	VEC_201 = 4,
    386 	VEC_210 = 5,
    387 
    388 	VEC_NUM = 6,
    389 
    390 	SCL_210 = 0,
    391 	SCL_122 = 1,
    392 	SCL_212 = 2,
    393 	SCL_221 = 3,
    394 
    395 	SCL_NUM = 4
    396 
    397 };
    398 
    399 enum sched_queue_id {
    400 	SQ_CF,
    401 	SQ_ALU,
    402 	SQ_TEX,
    403 	SQ_VTX,
    404 
    405 	SQ_NUM
    406 };
    407 
    408 struct literal {
    409 	union {
    410 		int32_t i;
    411 		uint32_t u;
    412 		float f;
    413 	};
    414 
    415 	literal(int32_t i = 0) : i(i) {}
    416 	literal(uint32_t u) : u(u) {}
    417 	literal(float f) : f(f) {}
    418 	literal(double f) : f(f) {}
    419 	operator uint32_t() const { return u; }
    420 	bool operator ==(literal l) { return u == l.u; }
    421 	bool operator ==(int v_int) { return i == v_int; }
    422 	bool operator ==(unsigned v_uns) { return u == v_uns; }
    423 };
    424 
    425 struct bc_kcache {
    426 	unsigned mode;
    427 	unsigned bank;
    428 	unsigned addr;
    429 	unsigned index_mode;
    430 } ;
    431 
    432 // TODO optimize bc structures
    433 
    434 struct bc_cf {
    435 
    436 	bc_kcache kc[4];
    437 
    438 	unsigned id;
    439 
    440 
    441 	const cf_op_info * op_ptr;
    442 	unsigned op;
    443 
    444 	unsigned addr:32;
    445 
    446 	unsigned alt_const:1;
    447 	unsigned uses_waterfall:1;
    448 
    449 	unsigned barrier:1;
    450 	unsigned count:7;
    451 	unsigned pop_count:3;
    452 	unsigned call_count:6;
    453 	unsigned whole_quad_mode:1;
    454 	unsigned valid_pixel_mode:1;
    455 
    456 	unsigned jumptable_sel:3;
    457 	unsigned cf_const:5;
    458 	unsigned cond:2;
    459 	unsigned end_of_program:1;
    460 
    461 	unsigned array_base:13;
    462 	unsigned elem_size:2;
    463 	unsigned index_gpr:7;
    464 	unsigned rw_gpr:7;
    465 	unsigned rw_rel:1;
    466 	unsigned type:2;
    467 
    468 	unsigned burst_count:4;
    469 	unsigned mark:1;
    470 	unsigned sel[4];
    471 
    472 	unsigned array_size:12;
    473 	unsigned comp_mask:4;
    474 
    475 	unsigned rat_id:4;
    476 	unsigned rat_inst:6;
    477 	unsigned rat_index_mode:2;
    478 
    479 	void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_cf(op); }
    480 
    481 	bool is_alu_extended() {
    482 		assert(op_ptr->flags & CF_ALU);
    483 		return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE ||
    484 			kc[0].index_mode != KC_INDEX_NONE || kc[1].index_mode != KC_INDEX_NONE ||
    485 			kc[2].index_mode != KC_INDEX_NONE || kc[3].index_mode != KC_INDEX_NONE;
    486 	}
    487 
    488 };
    489 
    490 struct bc_alu_src {
    491 	unsigned sel:9;
    492 	unsigned chan:2;
    493 	unsigned neg:1;
    494 	unsigned abs:1;
    495 	unsigned rel:1;
    496 	literal value;
    497 };
    498 
    499 struct bc_alu {
    500 	const alu_op_info * op_ptr;
    501 	unsigned op;
    502 
    503 	bc_alu_src src[3];
    504 
    505 	unsigned dst_gpr:7;
    506 	unsigned dst_chan:2;
    507 	unsigned dst_rel:1;
    508 	unsigned clamp:1;
    509 	unsigned omod:2;
    510 	unsigned bank_swizzle:3;
    511 
    512 	unsigned index_mode:3;
    513 	unsigned last:1;
    514 	unsigned pred_sel:2;
    515 
    516 	unsigned fog_merge:1;
    517 	unsigned write_mask:1;
    518 	unsigned update_exec_mask:1;
    519 	unsigned update_pred:1;
    520 
    521 	unsigned slot:3;
    522 
    523 	unsigned lds_idx_offset:6;
    524 
    525 	alu_op_flags slot_flags;
    526 
    527 	void set_op(unsigned op) {
    528 		this->op = op;
    529 		op_ptr = r600_isa_alu(op);
    530 	}
    531 };
    532 
    533 struct bc_fetch {
    534 	const fetch_op_info * op_ptr;
    535 	unsigned op;
    536 
    537 	unsigned bc_frac_mode:1;
    538 	unsigned fetch_whole_quad:1;
    539 	unsigned resource_id:8;
    540 
    541 	unsigned src_gpr:7;
    542 	unsigned src_rel:1;
    543 	unsigned src_rel_global:1; /* for GDS ops */
    544 	unsigned src_sel[4];
    545 
    546 	unsigned dst_gpr:7;
    547 	unsigned dst_rel:1;
    548 	unsigned dst_rel_global:1; /* for GDS ops */
    549 	unsigned dst_sel[4];
    550 
    551 	unsigned alt_const:1;
    552 
    553 	unsigned inst_mod:2;
    554 	unsigned resource_index_mode:2;
    555 	unsigned sampler_index_mode:2;
    556 
    557 	unsigned coord_type[4];
    558 	unsigned lod_bias:7;
    559 
    560 	unsigned offset[3];
    561 
    562 	unsigned sampler_id:5;
    563 
    564 
    565 	unsigned fetch_type:2;
    566 	unsigned mega_fetch_count:6;
    567 	unsigned coalesced_read:1;
    568 	unsigned structured_read:2;
    569 	unsigned lds_req:1;
    570 
    571 	unsigned data_format:6;
    572 	unsigned format_comp_all:1;
    573 	unsigned num_format_all:2;
    574 	unsigned semantic_id:8;
    575 	unsigned srf_mode_all:1;
    576 	unsigned use_const_fields:1;
    577 
    578 	unsigned const_buf_no_stride:1;
    579 	unsigned endian_swap:2;
    580 	unsigned mega_fetch:1;
    581 
    582 	unsigned src2_gpr:7; /* for GDS */
    583 	void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); }
    584 };
    585 
    586 struct shader_stats {
    587 	unsigned	ndw;
    588 	unsigned	ngpr;
    589 	unsigned	nstack;
    590 
    591 	unsigned	cf; // clause instructions not included
    592 	unsigned	alu;
    593 	unsigned	alu_clauses;
    594 	unsigned	fetch_clauses;
    595 	unsigned	fetch;
    596 	unsigned	alu_groups;
    597 
    598 	unsigned	shaders;		// number of shaders (for accumulated stats)
    599 
    600 	shader_stats() : ndw(), ngpr(), nstack(), cf(), alu(), alu_clauses(),
    601 			fetch_clauses(), fetch(), alu_groups(), shaders() {}
    602 
    603 	void collect(node *n);
    604 	void accumulate(shader_stats &s);
    605 	void dump();
    606 	void dump_diff(shader_stats &s);
    607 };
    608 
    609 class sb_context {
    610 
    611 public:
    612 
    613 	shader_stats src_stats, opt_stats;
    614 
    615 	r600_isa *isa;
    616 
    617 	sb_hw_chip hw_chip;
    618 	sb_hw_class hw_class;
    619 
    620 	unsigned alu_temp_gprs;
    621 	unsigned max_fetch;
    622 	bool has_trans;
    623 	unsigned vtx_src_num;
    624 	unsigned num_slots;
    625 	bool uses_mova_gpr;
    626 
    627 	bool r6xx_gpr_index_workaround;
    628 
    629 	bool stack_workaround_8xx;
    630 	bool stack_workaround_9xx;
    631 
    632 	unsigned wavefront_size;
    633 	unsigned stack_entry_size;
    634 
    635 	static unsigned dump_pass;
    636 	static unsigned dump_stat;
    637 
    638 	static unsigned dry_run;
    639 	static unsigned no_fallback;
    640 	static unsigned safe_math;
    641 
    642 	static unsigned dskip_start;
    643 	static unsigned dskip_end;
    644 	static unsigned dskip_mode;
    645 
    646 	sb_context() : src_stats(), opt_stats(), isa(0),
    647 			hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN) {}
    648 
    649 	int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass);
    650 
    651 	bool is_r600() {return hw_class == HW_CLASS_R600;}
    652 	bool is_r700() {return hw_class == HW_CLASS_R700;}
    653 	bool is_evergreen() {return hw_class == HW_CLASS_EVERGREEN;}
    654 	bool is_cayman() {return hw_class == HW_CLASS_CAYMAN;}
    655 	bool is_egcm() {return hw_class >= HW_CLASS_EVERGREEN;}
    656 
    657 	bool needs_8xx_stack_workaround() {
    658 		if (!is_evergreen())
    659 			return false;
    660 
    661 		switch (hw_chip) {
    662 		case HW_CHIP_CYPRESS:
    663 		case HW_CHIP_JUNIPER:
    664 			return false;
    665 		default:
    666 			return true;
    667 		}
    668 	}
    669 
    670 	bool needs_9xx_stack_workaround() {
    671 		return is_cayman();
    672 	}
    673 
    674 	sb_hw_class_bits hw_class_bit() {
    675 		switch (hw_class) {
    676 		case HW_CLASS_R600:return HB_R6;
    677 		case HW_CLASS_R700:return HB_R7;
    678 		case HW_CLASS_EVERGREEN:return HB_EG;
    679 		case HW_CLASS_CAYMAN:return HB_CM;
    680 		default: assert(!"unknown hw class"); return (sb_hw_class_bits)0;
    681 
    682 		}
    683 	}
    684 
    685 	unsigned cf_opcode(unsigned op) {
    686 		return r600_isa_cf_opcode(isa->hw_class, op);
    687 	}
    688 
    689 	unsigned alu_opcode(unsigned op) {
    690 		return r600_isa_alu_opcode(isa->hw_class, op);
    691 	}
    692 
    693 	unsigned alu_slots(unsigned op) {
    694 		return r600_isa_alu_slots(isa->hw_class, op);
    695 	}
    696 
    697 	unsigned alu_slots(const alu_op_info * op_ptr) {
    698 		return op_ptr->slots[isa->hw_class];
    699 	}
    700 
    701 	unsigned alu_slots_mask(const alu_op_info * op_ptr) {
    702 		unsigned mask = 0;
    703 		unsigned slot_flags = alu_slots(op_ptr);
    704 		if (slot_flags & AF_V)
    705 			mask = 0x0F;
    706 		if (!is_cayman() && (slot_flags & AF_S))
    707 			mask |= 0x10;
    708 		return mask;
    709 	}
    710 
    711 	unsigned fetch_opcode(unsigned op) {
    712 		return r600_isa_fetch_opcode(isa->hw_class, op);
    713 	}
    714 
    715 	bool is_kcache_sel(unsigned sel) {
    716 		return ((sel >= 128 && sel < 192) || (sel >= 256 && sel < 320));
    717 	}
    718 
    719 	const char * get_hw_class_name();
    720 	const char * get_hw_chip_name();
    721 
    722 };
    723 
    724 #define SB_DUMP_STAT(a) do { if (sb_context::dump_stat) { a } } while (0)
    725 #define SB_DUMP_PASS(a) do { if (sb_context::dump_pass) { a } } while (0)
    726 
    727 class bc_decoder {
    728 
    729 	sb_context &ctx;
    730 
    731 	uint32_t* dw;
    732 	unsigned ndw;
    733 
    734 public:
    735 
    736 	bc_decoder(sb_context &sctx, uint32_t *data, unsigned size)
    737 		: ctx(sctx), dw(data), ndw(size) {}
    738 
    739 	int decode_cf(unsigned &i, bc_cf &bc);
    740 	int decode_alu(unsigned &i, bc_alu &bc);
    741 	int decode_fetch(unsigned &i, bc_fetch &bc);
    742 
    743 private:
    744 	int decode_cf_alu(unsigned &i, bc_cf &bc);
    745 	int decode_cf_exp(unsigned &i, bc_cf &bc);
    746 	int decode_cf_mem(unsigned &i, bc_cf &bc);
    747 
    748 	int decode_fetch_vtx(unsigned &i, bc_fetch &bc);
    749 	int decode_fetch_gds(unsigned &i, bc_fetch &bc);
    750 };
    751 
    752 // bytecode format definition
    753 
    754 class hw_encoding_format {
    755 	const sb_hw_class_bits hw_target; //FIXME: debug - remove after testing
    756 	hw_encoding_format();
    757 protected:
    758 	uint32_t value;
    759 public:
    760 	hw_encoding_format(sb_hw_class_bits hw)
    761 		: hw_target(hw), value(0) {}
    762 	hw_encoding_format(uint32_t v, sb_hw_class_bits hw)
    763 		: hw_target(hw), value(v) {}
    764 	uint32_t get_value(sb_hw_class_bits hw) const {
    765 		assert((hw & hw_target) == hw);
    766 		return value;
    767 	}
    768 };
    769 
    770 #define BC_FORMAT_BEGIN_HW(fmt, hwset) \
    771 class fmt##_##hwset : public hw_encoding_format {\
    772 	typedef fmt##_##hwset thistype; \
    773 public: \
    774 	fmt##_##hwset() : hw_encoding_format(HB_##hwset) {}; \
    775 	fmt##_##hwset(uint32_t v) : hw_encoding_format(v, HB_##hwset) {};
    776 
    777 #define BC_FORMAT_BEGIN(fmt) BC_FORMAT_BEGIN_HW(fmt, ALL)
    778 
    779 #define BC_FORMAT_END(fmt) };
    780 
    781 // bytecode format field definition
    782 
    783 #define BC_FIELD(fmt, name, shortname, last_bit, first_bit) \
    784 	thistype & name(unsigned v) { \
    785 		value |= ((v&((1ull<<((last_bit)-(first_bit)+1))-1))<<(first_bit)); \
    786 		return *this; \
    787 	} \
    788 	unsigned get_##name() const { \
    789 		return (value>>(first_bit))&((1ull<<((last_bit)-(first_bit)+1))-1); \
    790 	} \
    791 
    792 #define BC_RSRVD(fmt, last_bit, first_bit)
    793 
    794 // CLAMP macro defined elsewhere interferes with bytecode field name
    795 #undef CLAMP
    796 #include "sb_bc_fmt_def.inc"
    797 
    798 #undef BC_FORMAT_BEGIN
    799 #undef BC_FORMAT_END
    800 #undef BC_FIELD
    801 #undef BC_RSRVD
    802 
    803 class bc_parser {
    804 	sb_context & ctx;
    805 
    806 	bc_decoder *dec;
    807 
    808 	r600_bytecode *bc;
    809 	r600_shader *pshader;
    810 
    811 	uint32_t *dw;
    812 	unsigned bc_ndw;
    813 
    814 	unsigned max_cf;
    815 
    816 	shader *sh;
    817 
    818 	int error;
    819 
    820 	alu_node *slots[2][5];
    821 	unsigned cgroup;
    822 
    823 	typedef std::vector<cf_node*> id_cf_map;
    824 	id_cf_map cf_map;
    825 
    826 	typedef std::stack<region_node*> region_stack;
    827 	region_stack loop_stack;
    828 
    829 	bool gpr_reladdr;
    830 
    831 	// Note: currently relies on input emitting SET_CF in same basic block as uses
    832 	value *cf_index_value[2];
    833 	alu_node *mova;
    834 public:
    835 
    836 	bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) :
    837 		ctx(sctx), dec(), bc(bc), pshader(pshader),
    838 		dw(), bc_ndw(), max_cf(),
    839 		sh(), error(), slots(), cgroup(),
    840 		cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(), mova() { }
    841 
    842 	int decode();
    843 	int prepare();
    844 
    845 	shader* get_shader() { assert(!error); return sh; }
    846 
    847 private:
    848 
    849 	int decode_shader();
    850 
    851 	int parse_decls();
    852 
    853 	int decode_cf(unsigned &i, bool &eop);
    854 
    855 	int decode_alu_clause(cf_node *cf);
    856 	int decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt);
    857 
    858 	int decode_fetch_clause(cf_node *cf);
    859 
    860 	int prepare_ir();
    861 	int prepare_alu_clause(cf_node *cf);
    862 	int prepare_alu_group(cf_node* cf, alu_group_node *g);
    863 	int prepare_fetch_clause(cf_node *cf);
    864 
    865 	int prepare_loop(cf_node *c);
    866 	int prepare_if(cf_node *c);
    867 
    868 	void save_set_cf_index(value *val, unsigned idx);
    869 	value *get_cf_index_value(unsigned idx);
    870 	void save_mova(alu_node *mova);
    871 	alu_node *get_mova();
    872 };
    873 
    874 
    875 
    876 
    877 class bytecode {
    878 	typedef std::vector<uint32_t> bc_vector;
    879 	sb_hw_class_bits hw_class_bit;
    880 
    881 	bc_vector bc;
    882 
    883 	unsigned pos;
    884 
    885 public:
    886 
    887 	bytecode(sb_hw_class_bits hw, unsigned rdw = 256)
    888 		: hw_class_bit(hw), pos(0) { bc.reserve(rdw); }
    889 
    890 	unsigned ndw() { return bc.size(); }
    891 
    892 	void write_data(uint32_t* dst) {
    893 		std::copy(bc.begin(), bc.end(), dst);
    894 	}
    895 
    896 	void align(unsigned a) {
    897 		unsigned size = bc.size();
    898 		size = (size + a - 1) & ~(a-1);
    899 		bc.resize(size);
    900 	}
    901 
    902 	void set_size(unsigned sz) {
    903 		assert(sz >= bc.size());
    904 		bc.resize(sz);
    905 	}
    906 
    907 	void seek(unsigned p) {
    908 		if (p != pos) {
    909 			if (p > bc.size()) {
    910 				bc.resize(p);
    911 			}
    912 			pos = p;
    913 		}
    914 	}
    915 
    916 	unsigned get_pos() { return pos; }
    917 	uint32_t *data() { return &bc[0]; }
    918 
    919 	bytecode & operator <<(uint32_t v) {
    920 		if (pos == ndw()) {
    921 			bc.push_back(v);
    922 		} else
    923 			bc.at(pos) = v;
    924 		++pos;
    925 		return *this;
    926 	}
    927 
    928 	bytecode & operator <<(const hw_encoding_format &e) {
    929 		*this << e.get_value(hw_class_bit);
    930 		return *this;
    931 	}
    932 
    933 	bytecode & operator <<(const bytecode &b) {
    934 		bc.insert(bc.end(), b.bc.begin(), b.bc.end());
    935 		return *this;
    936 	}
    937 
    938 	uint32_t at(unsigned dw_id) { return bc.at(dw_id); }
    939 };
    940 
    941 
    942 class bc_builder {
    943 	shader &sh;
    944 	sb_context &ctx;
    945 	bytecode bb;
    946 	int error;
    947 
    948 public:
    949 
    950 	bc_builder(shader &s);
    951 	int build();
    952 	bytecode& get_bytecode() { assert(!error); return bb; }
    953 
    954 private:
    955 
    956 	int build_cf(cf_node *n);
    957 
    958 	int build_cf_alu(cf_node *n);
    959 	int build_cf_mem(cf_node *n);
    960 	int build_cf_exp(cf_node *n);
    961 
    962 	int build_alu_clause(cf_node *n);
    963 	int build_alu_group(alu_group_node *n);
    964 	int build_alu(alu_node *n);
    965 
    966 	int build_fetch_clause(cf_node *n);
    967 	int build_fetch_tex(fetch_node *n);
    968 	int build_fetch_vtx(fetch_node *n);
    969 };
    970 
    971 } // namespace r600_sb
    972 
    973 #endif /* SB_BC_H_ */
    974