Home | History | Annotate | Download | only in radeonsi
      1 /*
      2  * Copyright 2012 Advanced Micro Devices, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *	Tom Stellard <thomas.stellard (at) amd.com>
     25  *	Michel Dnzer <michel.daenzer (at) amd.com>
     26  *      Christian Knig <christian.koenig (at) amd.com>
     27  */
     28 
     29 /* How linking shader inputs and outputs between vertex, tessellation, and
     30  * geometry shaders works.
     31  *
     32  * Inputs and outputs between shaders are stored in a buffer. This buffer
     33  * lives in LDS (typical case for tessellation), but it can also live
     34  * in memory (ESGS). Each input or output has a fixed location within a vertex.
     35  * The highest used input or output determines the stride between vertices.
     36  *
     37  * Since GS and tessellation are only possible in the OpenGL core profile,
     38  * only these semantics are valid for per-vertex data:
     39  *
     40  *   Name             Location
     41  *
     42  *   POSITION         0
     43  *   PSIZE            1
     44  *   CLIPDIST0..1     2..3
     45  *   CULLDIST0..1     (not implemented)
     46  *   GENERIC0..31     4..35
     47  *
     48  * For example, a shader only writing GENERIC0 has the output stride of 5.
     49  *
     50  * Only these semantics are valid for per-patch data:
     51  *
     52  *   Name             Location
     53  *
     54  *   TESSOUTER        0
     55  *   TESSINNER        1
     56  *   PATCH0..29       2..31
     57  *
     58  * That's how independent shaders agree on input and output locations.
     59  * The si_shader_io_get_unique_index function assigns the locations.
     60  *
     61  * For tessellation, other required information for calculating the input and
     62  * output addresses like the vertex stride, the patch stride, and the offsets
     63  * where per-vertex and per-patch data start, is passed to the shader via
     64  * user data SGPRs. The offsets and strides are calculated at draw time and
     65  * aren't available at compile time.
     66  */
     67 
     68 #ifndef SI_SHADER_H
     69 #define SI_SHADER_H
     70 
     71 #include <llvm-c/Core.h> /* LLVMModuleRef */
     72 #include <llvm-c/TargetMachine.h>
     73 #include "tgsi/tgsi_scan.h"
     74 #include "util/u_queue.h"
     75 #include "si_state.h"
     76 
     77 struct radeon_shader_binary;
     78 struct radeon_shader_reloc;
     79 
     80 #define SI_MAX_VS_OUTPUTS	40
     81 
     82 /* SGPR user data indices */
     83 enum {
     84 	SI_SGPR_RW_BUFFERS,  /* rings (& stream-out, VS only) */
     85 	SI_SGPR_RW_BUFFERS_HI,
     86 	SI_SGPR_CONST_BUFFERS,
     87 	SI_SGPR_CONST_BUFFERS_HI,
     88 	SI_SGPR_SAMPLERS,  /* images & sampler states interleaved */
     89 	SI_SGPR_SAMPLERS_HI,
     90 	SI_SGPR_IMAGES,
     91 	SI_SGPR_IMAGES_HI,
     92 	SI_SGPR_SHADER_BUFFERS,
     93 	SI_SGPR_SHADER_BUFFERS_HI,
     94 	SI_NUM_RESOURCE_SGPRS,
     95 
     96 	/* all VS variants */
     97 	SI_SGPR_VERTEX_BUFFERS	= SI_NUM_RESOURCE_SGPRS,
     98 	SI_SGPR_VERTEX_BUFFERS_HI,
     99 	SI_SGPR_BASE_VERTEX,
    100 	SI_SGPR_START_INSTANCE,
    101 	SI_SGPR_DRAWID,
    102 	SI_ES_NUM_USER_SGPR,
    103 
    104 	/* hw VS only */
    105 	SI_SGPR_VS_STATE_BITS	= SI_ES_NUM_USER_SGPR,
    106 	SI_VS_NUM_USER_SGPR,
    107 
    108 	/* hw LS only */
    109 	SI_SGPR_LS_OUT_LAYOUT	= SI_ES_NUM_USER_SGPR,
    110 	SI_LS_NUM_USER_SGPR,
    111 
    112 	/* both TCS and TES */
    113 	SI_SGPR_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_SGPRS,
    114 	SI_TES_NUM_USER_SGPR,
    115 
    116 	/* TCS only */
    117 	SI_SGPR_TCS_OUT_OFFSETS = SI_TES_NUM_USER_SGPR,
    118 	SI_SGPR_TCS_OUT_LAYOUT,
    119 	SI_SGPR_TCS_IN_LAYOUT,
    120 	SI_TCS_NUM_USER_SGPR,
    121 
    122 	/* GS limits */
    123 	SI_GS_NUM_USER_SGPR = SI_NUM_RESOURCE_SGPRS,
    124 	SI_GSCOPY_NUM_USER_SGPR = SI_SGPR_RW_BUFFERS_HI + 1,
    125 
    126 	/* PS only */
    127 	SI_SGPR_ALPHA_REF	= SI_NUM_RESOURCE_SGPRS,
    128 	SI_PS_NUM_USER_SGPR,
    129 
    130 	/* CS only */
    131 	SI_SGPR_GRID_SIZE = SI_NUM_RESOURCE_SGPRS,
    132 	SI_SGPR_BLOCK_SIZE = SI_SGPR_GRID_SIZE + 3,
    133 	SI_CS_NUM_USER_SGPR = SI_SGPR_BLOCK_SIZE + 3
    134 };
    135 
    136 /* LLVM function parameter indices */
    137 enum {
    138 	SI_PARAM_RW_BUFFERS,
    139 	SI_PARAM_CONST_BUFFERS,
    140 	SI_PARAM_SAMPLERS,
    141 	SI_PARAM_IMAGES,
    142 	SI_PARAM_SHADER_BUFFERS,
    143 	SI_NUM_RESOURCE_PARAMS,
    144 
    145 	/* VS only parameters */
    146 	SI_PARAM_VERTEX_BUFFERS	= SI_NUM_RESOURCE_PARAMS,
    147 	SI_PARAM_BASE_VERTEX,
    148 	SI_PARAM_START_INSTANCE,
    149 	SI_PARAM_DRAWID,
    150 	/* [0] = clamp vertex color, VS as VS only */
    151 	SI_PARAM_VS_STATE_BITS,
    152 	/* same value as TCS_IN_LAYOUT, VS as LS only */
    153 	SI_PARAM_LS_OUT_LAYOUT = SI_PARAM_DRAWID + 1,
    154 	/* the other VS parameters are assigned dynamically */
    155 
    156 	/* Layout of TCS outputs in the offchip buffer
    157 	 *   [0:8] = the number of patches per threadgroup.
    158 	 *   [9:15] = the number of output vertices per patch.
    159 	 *   [16:31] = the offset of per patch attributes in the buffer in bytes.
    160 	 */
    161 	SI_PARAM_TCS_OFFCHIP_LAYOUT = SI_NUM_RESOURCE_PARAMS, /* for TCS & TES */
    162 
    163 	/* TCS only parameters. */
    164 
    165 	/* Offsets where TCS outputs and TCS patch outputs live in LDS:
    166 	 *   [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32
    167 	 *   [16:31] = TCS output patch0 offset for per-patch / 16, max = NUM_PATCHES*32*32* + 32*32
    168 	 */
    169 	SI_PARAM_TCS_OUT_OFFSETS,
    170 
    171 	/* Layout of TCS outputs / TES inputs:
    172 	 *   [0:12] = stride between output patches in dwords, num_outputs * num_vertices * 4, max = 32*32*4
    173 	 *   [13:20] = stride between output vertices in dwords = num_inputs * 4, max = 32*4
    174 	 *   [26:31] = gl_PatchVerticesIn, max = 32
    175 	 */
    176 	SI_PARAM_TCS_OUT_LAYOUT,
    177 
    178 	/* Layout of LS outputs / TCS inputs
    179 	 *   [0:12] = stride between patches in dwords = num_inputs * num_vertices * 4, max = 32*32*4
    180 	 *   [13:20] = stride between vertices in dwords = num_inputs * 4, max = 32*4
    181 	 */
    182 	SI_PARAM_TCS_IN_LAYOUT,
    183 
    184 	SI_PARAM_TCS_OC_LDS,
    185 	SI_PARAM_TESS_FACTOR_OFFSET,
    186 	SI_PARAM_PATCH_ID,
    187 	SI_PARAM_REL_IDS,
    188 
    189 	/* GS only parameters */
    190 	SI_PARAM_GS2VS_OFFSET = SI_NUM_RESOURCE_PARAMS,
    191 	SI_PARAM_GS_WAVE_ID,
    192 	SI_PARAM_VTX0_OFFSET,
    193 	SI_PARAM_VTX1_OFFSET,
    194 	SI_PARAM_PRIMITIVE_ID,
    195 	SI_PARAM_VTX2_OFFSET,
    196 	SI_PARAM_VTX3_OFFSET,
    197 	SI_PARAM_VTX4_OFFSET,
    198 	SI_PARAM_VTX5_OFFSET,
    199 	SI_PARAM_GS_INSTANCE_ID,
    200 
    201 	/* PS only parameters */
    202 	SI_PARAM_ALPHA_REF = SI_NUM_RESOURCE_PARAMS,
    203 	SI_PARAM_PRIM_MASK,
    204 	SI_PARAM_PERSP_SAMPLE,
    205 	SI_PARAM_PERSP_CENTER,
    206 	SI_PARAM_PERSP_CENTROID,
    207 	SI_PARAM_PERSP_PULL_MODEL,
    208 	SI_PARAM_LINEAR_SAMPLE,
    209 	SI_PARAM_LINEAR_CENTER,
    210 	SI_PARAM_LINEAR_CENTROID,
    211 	SI_PARAM_LINE_STIPPLE_TEX,
    212 	SI_PARAM_POS_X_FLOAT,
    213 	SI_PARAM_POS_Y_FLOAT,
    214 	SI_PARAM_POS_Z_FLOAT,
    215 	SI_PARAM_POS_W_FLOAT,
    216 	SI_PARAM_FRONT_FACE,
    217 	SI_PARAM_ANCILLARY,
    218 	SI_PARAM_SAMPLE_COVERAGE,
    219 	SI_PARAM_POS_FIXED_PT,
    220 
    221 	/* CS only parameters */
    222 	SI_PARAM_GRID_SIZE = SI_NUM_RESOURCE_PARAMS,
    223 	SI_PARAM_BLOCK_SIZE,
    224 	SI_PARAM_BLOCK_ID,
    225 	SI_PARAM_THREAD_ID,
    226 
    227 	SI_NUM_PARAMS = SI_PARAM_POS_FIXED_PT + 9, /* +8 for COLOR[0..1] */
    228 };
    229 
    230 /* SI-specific system values. */
    231 enum {
    232 	TGSI_SEMANTIC_DEFAULT_TESSOUTER_SI = TGSI_SEMANTIC_COUNT,
    233 	TGSI_SEMANTIC_DEFAULT_TESSINNER_SI,
    234 };
    235 
    236 /* For VS shader key fix_fetch. */
    237 enum {
    238 	SI_FIX_FETCH_NONE = 0,
    239 	SI_FIX_FETCH_A2_SNORM,
    240 	SI_FIX_FETCH_A2_SSCALED,
    241 	SI_FIX_FETCH_A2_SINT,
    242 	SI_FIX_FETCH_RGBA_32_UNORM,
    243 	SI_FIX_FETCH_RGBX_32_UNORM,
    244 	SI_FIX_FETCH_RGBA_32_SNORM,
    245 	SI_FIX_FETCH_RGBX_32_SNORM,
    246 	SI_FIX_FETCH_RGBA_32_USCALED,
    247 	SI_FIX_FETCH_RGBA_32_SSCALED,
    248 	SI_FIX_FETCH_RGBA_32_FIXED,
    249 	SI_FIX_FETCH_RGBX_32_FIXED,
    250 };
    251 
    252 struct si_shader;
    253 
    254 /* State of the context creating the shader object. */
    255 struct si_compiler_ctx_state {
    256 	/* Should only be used by si_init_shader_selector_async and
    257 	 * si_build_shader_variant if thread_index == -1 (non-threaded). */
    258 	LLVMTargetMachineRef		tm;
    259 
    260 	/* Used if thread_index == -1 or if debug.async is true. */
    261 	struct pipe_debug_callback	debug;
    262 
    263 	/* Used for creating the log string for gallium/ddebug. */
    264 	bool				is_debug_context;
    265 };
    266 
    267 /* A shader selector is a gallium CSO and contains shader variants and
    268  * binaries for one TGSI program. This can be shared by multiple contexts.
    269  */
    270 struct si_shader_selector {
    271 	struct si_screen	*screen;
    272 	struct util_queue_fence ready;
    273 	struct si_compiler_ctx_state compiler_ctx_state;
    274 
    275 	pipe_mutex		mutex;
    276 	struct si_shader	*first_variant; /* immutable after the first variant */
    277 	struct si_shader	*last_variant; /* mutable */
    278 
    279 	/* The compiled TGSI shader expecting a prolog and/or epilog (not
    280 	 * uploaded to a buffer).
    281 	 */
    282 	struct si_shader	*main_shader_part;
    283 
    284 	struct si_shader	*gs_copy_shader;
    285 
    286 	struct tgsi_token       *tokens;
    287 	struct pipe_stream_output_info  so;
    288 	struct tgsi_shader_info		info;
    289 
    290 	/* PIPE_SHADER_[VERTEX|FRAGMENT|...] */
    291 	unsigned	type;
    292 
    293 	/* GS parameters. */
    294 	unsigned	esgs_itemsize;
    295 	unsigned	gs_input_verts_per_prim;
    296 	unsigned	gs_output_prim;
    297 	unsigned	gs_max_out_vertices;
    298 	unsigned	gs_num_invocations;
    299 	unsigned	max_gs_stream; /* count - 1 */
    300 	unsigned	gsvs_vertex_size;
    301 	unsigned	max_gsvs_emit_size;
    302 
    303 	/* PS parameters. */
    304 	unsigned	color_attr_index[2];
    305 	unsigned	db_shader_control;
    306 	/* Set 0xf or 0x0 (4 bits) per each written output.
    307 	 * ANDed with spi_shader_col_format.
    308 	 */
    309 	unsigned	colors_written_4bit;
    310 
    311 	/* CS parameters */
    312 	unsigned local_size;
    313 
    314 	uint64_t	outputs_written;	/* "get_unique_index" bits */
    315 	uint32_t	patch_outputs_written;	/* "get_unique_index" bits */
    316 	uint32_t	outputs_written2;	/* "get_unique_index2" bits */
    317 
    318 	uint64_t	inputs_read;		/* "get_unique_index" bits */
    319 	uint32_t	inputs_read2;		/* "get_unique_index2" bits */
    320 };
    321 
    322 /* Valid shader configurations:
    323  *
    324  * API shaders       VS | TCS | TES | GS |pass| PS
    325  * are compiled as:     |     |     |    |thru|
    326  *                      |     |     |    |    |
    327  * Only VS & PS:     VS | --  | --  | -- | -- | PS
    328  * With GS:          ES | --  | --  | GS | VS | PS
    329  * With Tessel.:     LS | HS  | VS  | -- | -- | PS
    330  * With both:        LS | HS  | ES  | GS | VS | PS
    331  */
    332 
    333 /* Common VS bits between the shader key and the prolog key. */
    334 struct si_vs_prolog_bits {
    335 	unsigned	instance_divisors[SI_NUM_VERTEX_BUFFERS];
    336 };
    337 
    338 /* Common VS bits between the shader key and the epilog key. */
    339 struct si_vs_epilog_bits {
    340 	unsigned	export_prim_id:1; /* when PS needs it and GS is disabled */
    341 };
    342 
    343 /* Common TCS bits between the shader key and the epilog key. */
    344 struct si_tcs_epilog_bits {
    345 	unsigned	prim_mode:3;
    346 };
    347 
    348 struct si_gs_prolog_bits {
    349 	unsigned	tri_strip_adj_fix:1;
    350 };
    351 
    352 /* Common PS bits between the shader key and the prolog key. */
    353 struct si_ps_prolog_bits {
    354 	unsigned	color_two_side:1;
    355 	unsigned	flatshade_colors:1;
    356 	unsigned	poly_stipple:1;
    357 	unsigned	force_persp_sample_interp:1;
    358 	unsigned	force_linear_sample_interp:1;
    359 	unsigned	force_persp_center_interp:1;
    360 	unsigned	force_linear_center_interp:1;
    361 	unsigned	bc_optimize_for_persp:1;
    362 	unsigned	bc_optimize_for_linear:1;
    363 };
    364 
    365 /* Common PS bits between the shader key and the epilog key. */
    366 struct si_ps_epilog_bits {
    367 	unsigned	spi_shader_col_format;
    368 	unsigned	color_is_int8:8;
    369 	unsigned	color_is_int10:8;
    370 	unsigned	last_cbuf:3;
    371 	unsigned	alpha_func:3;
    372 	unsigned	alpha_to_one:1;
    373 	unsigned	poly_line_smoothing:1;
    374 	unsigned	clamp_color:1;
    375 };
    376 
    377 union si_shader_part_key {
    378 	struct {
    379 		struct si_vs_prolog_bits states;
    380 		unsigned	num_input_sgprs:5;
    381 		unsigned	last_input:4;
    382 	} vs_prolog;
    383 	struct {
    384 		struct si_vs_epilog_bits states;
    385 		unsigned	prim_id_param_offset:5;
    386 	} vs_epilog;
    387 	struct {
    388 		struct si_tcs_epilog_bits states;
    389 	} tcs_epilog;
    390 	struct {
    391 		struct si_gs_prolog_bits states;
    392 	} gs_prolog;
    393 	struct {
    394 		struct si_ps_prolog_bits states;
    395 		unsigned	num_input_sgprs:5;
    396 		unsigned	num_input_vgprs:5;
    397 		/* Color interpolation and two-side color selection. */
    398 		unsigned	colors_read:8; /* color input components read */
    399 		unsigned	num_interp_inputs:5; /* BCOLOR is at this location */
    400 		unsigned	face_vgpr_index:5;
    401 		unsigned	wqm:1;
    402 		char		color_attr_index[2];
    403 		char		color_interp_vgpr_index[2]; /* -1 == constant */
    404 	} ps_prolog;
    405 	struct {
    406 		struct si_ps_epilog_bits states;
    407 		unsigned	colors_written:8;
    408 		unsigned	writes_z:1;
    409 		unsigned	writes_stencil:1;
    410 		unsigned	writes_samplemask:1;
    411 	} ps_epilog;
    412 };
    413 
    414 struct si_shader_key {
    415 	/* Prolog and epilog flags. */
    416 	union {
    417 		struct {
    418 			struct si_ps_prolog_bits prolog;
    419 			struct si_ps_epilog_bits epilog;
    420 		} ps;
    421 		struct {
    422 			struct si_vs_prolog_bits prolog;
    423 			struct si_vs_epilog_bits epilog;
    424 		} vs;
    425 		struct {
    426 			struct si_tcs_epilog_bits epilog;
    427 		} tcs; /* tessellation control shader */
    428 		struct {
    429 			struct si_vs_epilog_bits epilog; /* same as VS */
    430 		} tes; /* tessellation evaluation shader */
    431 		struct {
    432 			struct si_gs_prolog_bits prolog;
    433 		} gs;
    434 	} part;
    435 
    436 	/* These two are initially set according to the NEXT_SHADER property,
    437 	 * or guessed if the property doesn't seem correct.
    438 	 */
    439 	unsigned as_es:1; /* export shader */
    440 	unsigned as_ls:1; /* local shader */
    441 
    442 	/* Flags for monolithic compilation only. */
    443 	union {
    444 		struct {
    445 			/* One nibble for every input: SI_FIX_FETCH_* enums. */
    446 			uint64_t	fix_fetch;
    447 		} vs;
    448 		struct {
    449 			uint64_t	inputs_to_copy; /* for fixed-func TCS */
    450 		} tcs;
    451 	} mono;
    452 
    453 	/* Optimization flags for asynchronous compilation only. */
    454 	union {
    455 		struct {
    456 			uint64_t	kill_outputs; /* "get_unique_index" bits */
    457 			uint32_t	kill_outputs2; /* "get_unique_index2" bits */
    458 			unsigned	clip_disable:1;
    459 		} hw_vs; /* HW VS (it can be VS, TES, GS) */
    460 	} opt;
    461 };
    462 
    463 struct si_shader_config {
    464 	unsigned			num_sgprs;
    465 	unsigned			num_vgprs;
    466 	unsigned			spilled_sgprs;
    467 	unsigned			spilled_vgprs;
    468 	unsigned			private_mem_vgprs;
    469 	unsigned			lds_size;
    470 	unsigned			spi_ps_input_ena;
    471 	unsigned			spi_ps_input_addr;
    472 	unsigned			float_mode;
    473 	unsigned			scratch_bytes_per_wave;
    474 	unsigned			rsrc1;
    475 	unsigned			rsrc2;
    476 };
    477 
    478 enum {
    479 	/* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
    480 	EXP_PARAM_OFFSET_0 = 0,
    481 	EXP_PARAM_OFFSET_31 = 31,
    482 	/* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
    483 	EXP_PARAM_DEFAULT_VAL_0000 = 64,
    484 	EXP_PARAM_DEFAULT_VAL_0001,
    485 	EXP_PARAM_DEFAULT_VAL_1110,
    486 	EXP_PARAM_DEFAULT_VAL_1111,
    487 	EXP_PARAM_UNDEFINED = 255,
    488 };
    489 
    490 /* GCN-specific shader info. */
    491 struct si_shader_info {
    492 	ubyte			vs_output_param_offset[SI_MAX_VS_OUTPUTS];
    493 	ubyte			num_input_sgprs;
    494 	ubyte			num_input_vgprs;
    495 	char			face_vgpr_index;
    496 	bool			uses_instanceid;
    497 	ubyte			nr_pos_exports;
    498 	ubyte			nr_param_exports;
    499 };
    500 
    501 struct si_shader {
    502 	struct si_compiler_ctx_state	compiler_ctx_state;
    503 
    504 	struct si_shader_selector	*selector;
    505 	struct si_shader		*next_variant;
    506 
    507 	struct si_shader_part		*prolog;
    508 	struct si_shader_part		*epilog;
    509 
    510 	struct si_pm4_state		*pm4;
    511 	struct r600_resource		*bo;
    512 	struct r600_resource		*scratch_bo;
    513 	struct si_shader_key		key;
    514 	struct util_queue_fence		optimized_ready;
    515 	bool				compilation_failed;
    516 	bool				is_monolithic;
    517 	bool				is_optimized;
    518 	bool				is_binary_shared;
    519 	bool				is_gs_copy_shader;
    520 
    521 	/* The following data is all that's needed for binary shaders. */
    522 	struct radeon_shader_binary	binary;
    523 	struct si_shader_config		config;
    524 	struct si_shader_info		info;
    525 
    526 	/* Shader key + LLVM IR + disassembly + statistics.
    527 	 * Generated for debug contexts only.
    528 	 */
    529 	char				*shader_log;
    530 	size_t				shader_log_size;
    531 };
    532 
    533 struct si_shader_part {
    534 	struct si_shader_part *next;
    535 	union si_shader_part_key key;
    536 	struct radeon_shader_binary binary;
    537 	struct si_shader_config config;
    538 };
    539 
    540 /* si_shader.c */
    541 struct si_shader *
    542 si_generate_gs_copy_shader(struct si_screen *sscreen,
    543 			   LLVMTargetMachineRef tm,
    544 			   struct si_shader_selector *gs_selector,
    545 			   struct pipe_debug_callback *debug);
    546 int si_compile_tgsi_shader(struct si_screen *sscreen,
    547 			   LLVMTargetMachineRef tm,
    548 			   struct si_shader *shader,
    549 			   bool is_monolithic,
    550 			   struct pipe_debug_callback *debug);
    551 int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
    552 		     struct si_shader *shader,
    553 		     struct pipe_debug_callback *debug);
    554 int si_compile_llvm(struct si_screen *sscreen,
    555 		    struct radeon_shader_binary *binary,
    556 		    struct si_shader_config *conf,
    557 		    LLVMTargetMachineRef tm,
    558 		    LLVMModuleRef mod,
    559 		    struct pipe_debug_callback *debug,
    560 		    unsigned processor,
    561 		    const char *name);
    562 void si_shader_destroy(struct si_shader *shader);
    563 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
    564 unsigned si_shader_io_get_unique_index2(unsigned name, unsigned index);
    565 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
    566 void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
    567 		    struct pipe_debug_callback *debug, unsigned processor,
    568 		    FILE *f, bool check_debug_option);
    569 void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
    570 				      unsigned *lds_size);
    571 void si_shader_apply_scratch_relocs(struct si_context *sctx,
    572 			struct si_shader *shader,
    573 			struct si_shader_config *config,
    574 			uint64_t scratch_va);
    575 void si_shader_binary_read_config(struct radeon_shader_binary *binary,
    576 				  struct si_shader_config *conf,
    577 				  unsigned symbol_offset);
    578 unsigned si_get_spi_shader_z_format(bool writes_z, bool writes_stencil,
    579 				    bool writes_samplemask);
    580 
    581 #endif
    582