Home | History | Annotate | Download | only in compiler
      1 /*
      2  * Copyright 2009 Nicolai Hhnle <nhaehnle (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
     22 
     23 #ifndef RADEON_CODE_H
     24 #define RADEON_CODE_H
     25 
     26 #include <stdint.h>
     27 
     28 #define R300_PFS_MAX_ALU_INST     64
     29 #define R300_PFS_MAX_TEX_INST     32
     30 #define R300_PFS_MAX_TEX_INDIRECT 4
     31 #define R300_PFS_NUM_TEMP_REGS    32
     32 #define R300_PFS_NUM_CONST_REGS   32
     33 
     34 #define R400_PFS_MAX_ALU_INST     512
     35 #define R400_PFS_MAX_TEX_INST     512
     36 
     37 #define R500_PFS_MAX_INST         512
     38 #define R500_PFS_NUM_TEMP_REGS    128
     39 #define R500_PFS_NUM_CONST_REGS   256
     40 #define R500_PFS_MAX_BRANCH_DEPTH_FULL 32
     41 #define R500_PFS_MAX_BRANCH_DEPTH_PARTIAL 4
     42 
     43 /* The r500 maximum depth is not just for loops, but any combination of loops
     44  * and subroutine jumps. */
     45 #define R500_PVS_MAX_LOOP_DEPTH 8
     46 
     47 #define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
     48 
     49 enum {
     50 	/**
     51 	 * External constants are constants whose meaning is unknown to this
     52 	 * compiler. For example, a Mesa gl_program's constants are turned
     53 	 * into external constants.
     54 	 */
     55 	RC_CONSTANT_EXTERNAL = 0,
     56 
     57 	RC_CONSTANT_IMMEDIATE,
     58 
     59 	/**
     60 	 * Constant referring to state that is known by this compiler,
     61 	 * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state.
     62 	 */
     63 	RC_CONSTANT_STATE
     64 };
     65 
     66 enum {
     67 	RC_STATE_SHADOW_AMBIENT = 0,
     68 
     69 	RC_STATE_R300_WINDOW_DIMENSION,
     70 	RC_STATE_R300_TEXRECT_FACTOR,
     71 	RC_STATE_R300_TEXSCALE_FACTOR,
     72 	RC_STATE_R300_VIEWPORT_SCALE,
     73 	RC_STATE_R300_VIEWPORT_OFFSET
     74 };
     75 
     76 struct rc_constant {
     77 	unsigned Type:2; /**< RC_CONSTANT_xxx */
     78 	unsigned Size:3;
     79 
     80 	union {
     81 		unsigned External;
     82 		float Immediate[4];
     83 		unsigned State[2];
     84 	} u;
     85 };
     86 
     87 struct rc_constant_list {
     88 	struct rc_constant * Constants;
     89 	unsigned Count;
     90 
     91 	unsigned _Reserved;
     92 };
     93 
     94 void rc_constants_init(struct rc_constant_list * c);
     95 void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src);
     96 void rc_constants_destroy(struct rc_constant_list * c);
     97 unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant);
     98 unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2);
     99 unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data);
    100 unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle);
    101 void rc_constants_print(struct rc_constant_list * c);
    102 
    103 /**
    104  * Compare functions.
    105  *
    106  * \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you
    107  * the correct GL compare function.
    108  */
    109 typedef enum {
    110 	RC_COMPARE_FUNC_NEVER = 0,
    111 	RC_COMPARE_FUNC_LESS,
    112 	RC_COMPARE_FUNC_EQUAL,
    113 	RC_COMPARE_FUNC_LEQUAL,
    114 	RC_COMPARE_FUNC_GREATER,
    115 	RC_COMPARE_FUNC_NOTEQUAL,
    116 	RC_COMPARE_FUNC_GEQUAL,
    117 	RC_COMPARE_FUNC_ALWAYS
    118 } rc_compare_func;
    119 
    120 /**
    121  * Coordinate wrapping modes.
    122  *
    123  * These are not quite the same as their GL counterparts yet.
    124  */
    125 typedef enum {
    126 	RC_WRAP_NONE = 0,
    127 	RC_WRAP_REPEAT,
    128 	RC_WRAP_MIRRORED_REPEAT,
    129 	RC_WRAP_MIRRORED_CLAMP
    130 } rc_wrap_mode;
    131 
    132 /**
    133  * Stores state that influences the compilation of a fragment program.
    134  */
    135 struct r300_fragment_program_external_state {
    136 	struct {
    137 		/**
    138 		 * This field contains swizzle for some lowering passes
    139 		 * (shadow comparison, unorm->snorm conversion)
    140 		 */
    141 		unsigned texture_swizzle:12;
    142 
    143 		/**
    144 		 * If the sampler is used as a shadow sampler,
    145 		 * this field specifies the compare function.
    146 		 *
    147 		 * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0).
    148 		 * \sa rc_compare_func
    149 		 */
    150 		unsigned texture_compare_func : 3;
    151 
    152 		/**
    153 		 * No matter what the sampler type is,
    154 		 * this field turns it into a shadow sampler.
    155 		 */
    156 		unsigned compare_mode_enabled : 1;
    157 
    158 		/**
    159 		 * If the sampler will receive non-normalized coords,
    160 		 * this field is set. The scaling factor is given by
    161 		 * RC_STATE_R300_TEXRECT_FACTOR.
    162 		 */
    163 		unsigned non_normalized_coords : 1;
    164 
    165 		/**
    166 		 * This field specifies wrapping modes for the sampler.
    167 		 *
    168 		 * If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths
    169 		 * will be performed on the coordinates.
    170 		 */
    171 		unsigned wrap_mode : 3;
    172 
    173 		/**
    174 		 * The coords are scaled after applying the wrap mode emulation
    175 		 * and right before texture fetch. The scaling factor is given by
    176 		 * RC_STATE_R300_TEXSCALE_FACTOR. */
    177 		unsigned clamp_and_scale_before_fetch : 1;
    178 
    179 		/**
    180 		 * Fetch RGTC1_SNORM or LATC1_SNORM as UNORM and convert UNORM -> SNORM
    181 		 * in the shader.
    182 		 */
    183 		unsigned convert_unorm_to_snorm:1;
    184 	} unit[16];
    185 };
    186 
    187 
    188 
    189 struct r300_fragment_program_node {
    190 	int tex_offset; /**< first tex instruction */
    191 	int tex_end; /**< last tex instruction, relative to tex_offset */
    192 	int alu_offset; /**< first ALU instruction */
    193 	int alu_end; /**< last ALU instruction, relative to alu_offset */
    194 	int flags;
    195 };
    196 
    197 /**
    198  * Stores an R300 fragment program in its compiled-to-hardware form.
    199  */
    200 struct r300_fragment_program_code {
    201 	struct {
    202 		unsigned int length; /**< total # of texture instructions used */
    203 		uint32_t inst[R400_PFS_MAX_TEX_INST];
    204 	} tex;
    205 
    206 	struct {
    207 		unsigned int length; /**< total # of ALU instructions used */
    208 		struct {
    209 			uint32_t rgb_inst;
    210 			uint32_t rgb_addr;
    211 			uint32_t alpha_inst;
    212 			uint32_t alpha_addr;
    213 			uint32_t r400_ext_addr;
    214 		} inst[R400_PFS_MAX_ALU_INST];
    215 	} alu;
    216 
    217 	uint32_t config; /* US_CONFIG */
    218 	uint32_t pixsize; /* US_PIXSIZE */
    219 	uint32_t code_offset; /* US_CODE_OFFSET */
    220 	uint32_t r400_code_offset_ext; /* US_CODE_EXT */
    221 	uint32_t code_addr[4]; /* US_CODE_ADDR */
    222 	/*US_CODE_BANK.R390_MODE: Enables 512 instructions and 64 temporaries
    223 	 * for r400 cards */
    224 	unsigned int r390_mode:1;
    225 };
    226 
    227 
    228 struct r500_fragment_program_code {
    229 	struct {
    230 		uint32_t inst0;
    231 		uint32_t inst1;
    232 		uint32_t inst2;
    233 		uint32_t inst3;
    234 		uint32_t inst4;
    235 		uint32_t inst5;
    236 	} inst[R500_PFS_MAX_INST];
    237 
    238 	int inst_end; /* Number of instructions - 1; also, last instruction to be executed */
    239 
    240 	int max_temp_idx;
    241 
    242 	uint32_t us_fc_ctrl;
    243 
    244 	uint32_t int_constants[32];
    245 	uint32_t int_constant_count;
    246 };
    247 
    248 struct rX00_fragment_program_code {
    249 	union {
    250 		struct r300_fragment_program_code r300;
    251 		struct r500_fragment_program_code r500;
    252 	} code;
    253 
    254 	unsigned writes_depth:1;
    255 
    256 	struct rc_constant_list constants;
    257 	unsigned *constants_remap_table;
    258 };
    259 
    260 
    261 #define R300_VS_MAX_ALU		256
    262 #define R300_VS_MAX_ALU_DWORDS  (R300_VS_MAX_ALU * 4)
    263 #define R500_VS_MAX_ALU	        1024
    264 #define R500_VS_MAX_ALU_DWORDS  (R500_VS_MAX_ALU * 4)
    265 #define R300_VS_MAX_TEMPS	32
    266 /* This is the max for all chipsets (r300-r500) */
    267 #define R300_VS_MAX_FC_OPS 16
    268 #define R300_VS_MAX_LOOP_DEPTH 1
    269 
    270 #define VSF_MAX_INPUTS 32
    271 #define VSF_MAX_OUTPUTS 32
    272 
    273 struct r300_vertex_program_code {
    274 	int length;
    275 	union {
    276 		uint32_t d[R500_VS_MAX_ALU_DWORDS];
    277 		float f[R500_VS_MAX_ALU_DWORDS];
    278 	} body;
    279 
    280 	int pos_end;
    281 	int num_temporaries;	/* Number of temp vars used by program */
    282 	int inputs[VSF_MAX_INPUTS];
    283 	int outputs[VSF_MAX_OUTPUTS];
    284 
    285 	struct rc_constant_list constants;
    286 	unsigned *constants_remap_table;
    287 
    288 	uint32_t InputsRead;
    289 	uint32_t OutputsWritten;
    290 
    291 	unsigned int num_fc_ops;
    292 	uint32_t fc_ops;
    293 	union {
    294 	        uint32_t r300[R300_VS_MAX_FC_OPS];
    295 		struct {
    296 			uint32_t lw;
    297 			uint32_t uw;
    298 		} r500[R300_VS_MAX_FC_OPS];
    299 	} fc_op_addrs;
    300 	int32_t fc_loop_index[R300_VS_MAX_FC_OPS];
    301 };
    302 
    303 #endif /* RADEON_CODE_H */
    304 
    305