Home | History | Annotate | Download | only in radeonsi
      1 /*
      2  * Copyright 2016 Advanced Micro Devices, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 #ifndef SI_SHADER_PRIVATE_H
     25 #define SI_SHADER_PRIVATE_H
     26 
     27 #include "si_shader.h"
     28 #include "gallivm/lp_bld_flow.h"
     29 #include "gallivm/lp_bld_init.h"
     30 #include "gallivm/lp_bld_tgsi.h"
     31 #include "tgsi/tgsi_parse.h"
     32 #include "ac_shader_abi.h"
     33 #include "ac_llvm_util.h"
     34 #include "ac_llvm_build.h"
     35 
     36 #include <llvm-c/Core.h>
     37 #include <llvm-c/TargetMachine.h>
     38 
     39 struct pipe_debug_callback;
     40 struct ac_shader_binary;
     41 
     42 #define RADEON_LLVM_MAX_INPUT_SLOTS 32
     43 #define RADEON_LLVM_MAX_INPUTS 32 * 4
     44 #define RADEON_LLVM_MAX_OUTPUTS 32 * 4
     45 
     46 #define RADEON_LLVM_MAX_SYSTEM_VALUES 11
     47 #define RADEON_LLVM_MAX_ADDRS 16
     48 
     49 struct si_shader_context {
     50 	struct lp_build_tgsi_context bld_base;
     51 	struct gallivm_state gallivm;
     52 	struct ac_llvm_context ac;
     53 	struct si_shader *shader;
     54 	struct si_screen *screen;
     55 
     56 	unsigned type; /* PIPE_SHADER_* specifies the type of shader. */
     57 
     58 	/* For clamping the non-constant index in resource indexing: */
     59 	unsigned num_const_buffers;
     60 	unsigned num_shader_buffers;
     61 	unsigned num_images;
     62 	unsigned num_samplers;
     63 
     64 	/* Whether the prolog will be compiled separately. */
     65 	bool separate_prolog;
     66 
     67 	struct ac_shader_abi abi;
     68 
     69 	/** This function is responsible for initilizing the inputs array and will be
     70 	  * called once for each input declared in the TGSI shader.
     71 	  */
     72 	void (*load_input)(struct si_shader_context *,
     73 			   unsigned input_index,
     74 			   const struct tgsi_full_declaration *decl,
     75 			   LLVMValueRef out[4]);
     76 
     77 	/** This array contains the input values for the shader.  Typically these
     78 	  * values will be in the form of a target intrinsic that will inform the
     79 	  * backend how to load the actual inputs to the shader.
     80 	  */
     81 	struct tgsi_full_declaration input_decls[RADEON_LLVM_MAX_INPUT_SLOTS];
     82 	LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS];
     83 	LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS][TGSI_NUM_CHANNELS];
     84 	LLVMValueRef addrs[RADEON_LLVM_MAX_ADDRS][TGSI_NUM_CHANNELS];
     85 
     86 	/** This pointer is used to contain the temporary values.
     87 	  * The amount of temporary used in tgsi can't be bound to a max value and
     88 	  * thus we must allocate this array at runtime.
     89 	  */
     90 	LLVMValueRef *temps;
     91 	unsigned temps_count;
     92 	LLVMValueRef system_values[RADEON_LLVM_MAX_SYSTEM_VALUES];
     93 
     94 	LLVMValueRef *imms;
     95 	unsigned imms_num;
     96 
     97 	struct lp_build_if_state merged_wrap_if_state;
     98 
     99 	struct tgsi_array_info *temp_arrays;
    100 	LLVMValueRef *temp_array_allocas;
    101 
    102 	LLVMValueRef undef_alloca;
    103 
    104 	LLVMValueRef main_fn;
    105 	LLVMTypeRef return_type;
    106 
    107 	/* Parameter indices for LLVMGetParam. */
    108 	int param_rw_buffers;
    109 	int param_const_and_shader_buffers;
    110 	int param_samplers_and_images;
    111 	int param_bindless_samplers_and_images;
    112 	/* Common inputs for merged shaders. */
    113 	int param_merged_wave_info;
    114 	int param_merged_scratch_offset;
    115 	/* API VS */
    116 	int param_vertex_buffers;
    117 	int param_rel_auto_id;
    118 	int param_vs_prim_id;
    119 	int param_vertex_index0;
    120 	/* VS states and layout of LS outputs / TCS inputs at the end
    121 	 *   [0] = clamp vertex color
    122 	 *   [1] = indexed
    123 	 *   [8:20] = stride between patches in DW = num_inputs * num_vertices * 4
    124 	 *            max = 32*32*4 + 32*4
    125 	 *   [24:31] = stride between vertices in DW = num_inputs * 4
    126 	 *             max = 32*4
    127 	 */
    128 	int param_vs_state_bits;
    129 	int param_vs_blit_inputs;
    130 	/* HW VS */
    131 	int param_streamout_config;
    132 	int param_streamout_write_index;
    133 	int param_streamout_offset[4];
    134 
    135 	/* API TCS & TES */
    136 	/* Layout of TCS outputs in the offchip buffer
    137 	 * # 6 bits
    138 	 *   [0:5] = the number of patches per threadgroup, max = NUM_PATCHES (40)
    139 	 * # 6 bits
    140 	 *   [6:11] = the number of output vertices per patch, max = 32
    141 	 * # 20 bits
    142 	 *   [12:31] = the offset of per patch attributes in the buffer in bytes.
    143 	 *             max = NUM_PATCHES*32*32*16
    144 	 */
    145 	int param_tcs_offchip_layout;
    146 
    147 	/* API TCS */
    148 	/* Offsets where TCS outputs and TCS patch outputs live in LDS:
    149 	 *   [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32
    150 	 *   [16:31] = TCS output patch0 offset for per-patch / 16
    151 	 *             max = (NUM_PATCHES + 1) * 32*32
    152 	 */
    153 	int param_tcs_out_lds_offsets;
    154 	/* Layout of TCS outputs / TES inputs:
    155 	 *   [0:12] = stride between output patches in DW, num_outputs * num_vertices * 4
    156 	 *            max = 32*32*4 + 32*4
    157 	 *   [26:31] = gl_PatchVerticesIn, max = 32
    158 	 */
    159 	int param_tcs_out_lds_layout;
    160 	int param_tcs_offchip_addr_base64k;
    161 	int param_tcs_factor_addr_base64k;
    162 	int param_tcs_offchip_offset;
    163 	int param_tcs_factor_offset;
    164 
    165 	/* API TES */
    166 	int param_tes_u;
    167 	int param_tes_v;
    168 	int param_tes_rel_patch_id;
    169 	/* HW ES */
    170 	int param_es2gs_offset;
    171 	/* API GS */
    172 	int param_gs2vs_offset;
    173 	int param_gs_wave_id; /* GFX6 */
    174 	LLVMValueRef gs_vtx_offset[6]; /* in dwords (GFX6) */
    175 	int param_gs_vtx01_offset; /* in dwords (GFX9) */
    176 	int param_gs_vtx23_offset; /* in dwords (GFX9) */
    177 	int param_gs_vtx45_offset; /* in dwords (GFX9) */
    178 	/* CS */
    179 	int param_grid_size;
    180 	int param_block_size;
    181 	int param_block_id[3];
    182 	int param_thread_id;
    183 
    184 	LLVMTargetMachineRef tm;
    185 
    186 	unsigned range_md_kind;
    187 	unsigned fpmath_md_kind;
    188 	LLVMValueRef fpmath_md_2p5_ulp;
    189 
    190 	/* Preloaded descriptors. */
    191 	LLVMValueRef esgs_ring;
    192 	LLVMValueRef gsvs_ring[4];
    193 
    194 	LLVMValueRef invoc0_tess_factors[6]; /* outer[4], inner[2] */
    195 	LLVMValueRef gs_next_vertex[4];
    196 	LLVMValueRef postponed_kill;
    197 	LLVMValueRef return_value;
    198 
    199 	LLVMTypeRef voidt;
    200 	LLVMTypeRef i1;
    201 	LLVMTypeRef i8;
    202 	LLVMTypeRef i32;
    203 	LLVMTypeRef i64;
    204 	LLVMTypeRef i128;
    205 	LLVMTypeRef f32;
    206 	LLVMTypeRef v2i32;
    207 	LLVMTypeRef v4i32;
    208 	LLVMTypeRef v4f32;
    209 	LLVMTypeRef v8i32;
    210 
    211 	LLVMValueRef i32_0;
    212 	LLVMValueRef i32_1;
    213 };
    214 
    215 static inline struct si_shader_context *
    216 si_shader_context(struct lp_build_tgsi_context *bld_base)
    217 {
    218 	return (struct si_shader_context*)bld_base;
    219 }
    220 
    221 static inline struct si_shader_context *
    222 si_shader_context_from_abi(struct ac_shader_abi *abi)
    223 {
    224 	struct si_shader_context *ctx = NULL;
    225 	return container_of(abi, ctx, abi);
    226 }
    227 
    228 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value);
    229 
    230 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
    231 			 LLVMTargetMachineRef tm,
    232 			 struct pipe_debug_callback *debug);
    233 
    234 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
    235 			  enum tgsi_opcode_type type);
    236 
    237 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
    238 		     enum tgsi_opcode_type type, LLVMValueRef value);
    239 
    240 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
    241 				 LLVMValueRef index,
    242 				 unsigned num);
    243 
    244 void si_llvm_context_init(struct si_shader_context *ctx,
    245 			  struct si_screen *sscreen,
    246 			  LLVMTargetMachineRef tm);
    247 void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
    248 			      struct si_shader *shader);
    249 
    250 void si_llvm_create_func(struct si_shader_context *ctx,
    251 			 const char *name,
    252 			 LLVMTypeRef *return_types, unsigned num_return_elems,
    253 			 LLVMTypeRef *ParamTypes, unsigned ParamCount);
    254 
    255 void si_llvm_dispose(struct si_shader_context *ctx);
    256 
    257 void si_llvm_optimize_module(struct si_shader_context *ctx);
    258 
    259 LLVMValueRef si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
    260 				      LLVMTypeRef type,
    261 				      LLVMValueRef ptr,
    262 				      LLVMValueRef ptr2);
    263 
    264 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
    265 				const struct tgsi_full_src_register *reg,
    266 				enum tgsi_opcode_type type,
    267 				unsigned swizzle);
    268 
    269 LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi,
    270 				   LLVMValueRef vertex_index,
    271 				   LLVMValueRef param_index,
    272 				   unsigned const_index,
    273 				   unsigned location,
    274 				   unsigned driver_location,
    275 				   unsigned component,
    276 				   unsigned num_components,
    277 				   bool is_patch,
    278 				   bool is_compact,
    279 				   bool load_input);
    280 
    281 LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
    282 				   unsigned input_index,
    283 				   unsigned vtx_offset_param,
    284 				   LLVMTypeRef type,
    285 				   unsigned swizzle);
    286 
    287 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
    288 			const struct tgsi_full_instruction *inst,
    289 			const struct tgsi_opcode_info *info,
    290 			unsigned index,
    291 			LLVMValueRef dst[4]);
    292 
    293 /* Combine these with & instead of |. */
    294 #define NOOP_WAITCNT 0xf7f
    295 #define LGKM_CNT 0x07f
    296 #define VM_CNT 0xf70
    297 
    298 LLVMValueRef si_get_indirect_index(struct si_shader_context *ctx,
    299 				   const struct tgsi_ind_register *ind,
    300 				   unsigned addr_mul, int rel_index);
    301 LLVMValueRef si_get_bounded_indirect_index(struct si_shader_context *ctx,
    302 					   const struct tgsi_ind_register *ind,
    303 					   int rel_index, unsigned num);
    304 
    305 LLVMTypeRef si_const_array(LLVMTypeRef elem_type, int num_elements);
    306 
    307 void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base);
    308 void si_shader_context_init_mem(struct si_shader_context *ctx);
    309 
    310 LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
    311 				  LLVMValueRef list, LLVMValueRef index,
    312 				  enum ac_descriptor_type type);
    313 LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
    314 				LLVMValueRef list, LLVMValueRef index,
    315 				enum ac_descriptor_type desc_type, bool dcc_off);
    316 
    317 void si_load_system_value(struct si_shader_context *ctx,
    318 			  unsigned index,
    319 			  const struct tgsi_full_declaration *decl);
    320 void si_declare_compute_memory(struct si_shader_context *ctx,
    321 			       const struct tgsi_full_declaration *decl);
    322 
    323 void si_llvm_load_input_vs(
    324 	struct si_shader_context *ctx,
    325 	unsigned input_index,
    326 	LLVMValueRef out[4]);
    327 void si_llvm_load_input_fs(
    328 	struct si_shader_context *ctx,
    329 	unsigned input_index,
    330 	LLVMValueRef out[4]);
    331 
    332 bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir);
    333 
    334 LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi,
    335 				  unsigned location,
    336 				  unsigned driver_location,
    337 				  unsigned component,
    338 				  unsigned num_components,
    339 				  unsigned vertex_index,
    340 				  unsigned const_index,
    341 				  LLVMTypeRef type);
    342 
    343 #endif
    344