Home | History | Annotate | Download | only in radeonsi
      1 /*
      2  * Copyright 2017 Advanced Micro Devices, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 #include "si_shader.h"
     25 #include "si_shader_internal.h"
     26 
     27 #include "ac_nir_to_llvm.h"
     28 
     29 #include "tgsi/tgsi_from_mesa.h"
     30 
     31 #include "compiler/nir/nir.h"
     32 #include "compiler/nir_types.h"
     33 
     34 
     35 static int
     36 type_size(const struct glsl_type *type)
     37 {
     38    return glsl_count_attribute_slots(type, false);
     39 }
     40 
     41 static void scan_instruction(struct tgsi_shader_info *info,
     42 			     nir_instr *instr)
     43 {
     44 	if (instr->type == nir_instr_type_alu) {
     45 		nir_alu_instr *alu = nir_instr_as_alu(instr);
     46 
     47 		switch (alu->op) {
     48 		case nir_op_fddx:
     49 		case nir_op_fddy:
     50 		case nir_op_fddx_fine:
     51 		case nir_op_fddy_fine:
     52 		case nir_op_fddx_coarse:
     53 		case nir_op_fddy_coarse:
     54 			info->uses_derivatives = true;
     55 			break;
     56 		default:
     57 			break;
     58 		}
     59 	} else if (instr->type == nir_instr_type_tex) {
     60 		nir_tex_instr *tex = nir_instr_as_tex(instr);
     61 
     62 		if (!tex->texture) {
     63 			info->samplers_declared |=
     64 				u_bit_consecutive(tex->sampler_index, 1);
     65 		}
     66 
     67 		switch (tex->op) {
     68 		case nir_texop_tex:
     69 		case nir_texop_txb:
     70 		case nir_texop_lod:
     71 			info->uses_derivatives = true;
     72 			break;
     73 		default:
     74 			break;
     75 		}
     76 	} else if (instr->type == nir_instr_type_intrinsic) {
     77 		nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
     78 
     79 		switch (intr->intrinsic) {
     80 		case nir_intrinsic_load_front_face:
     81 			info->uses_frontface = 1;
     82 			break;
     83 		case nir_intrinsic_load_instance_id:
     84 			info->uses_instanceid = 1;
     85 			break;
     86 		case nir_intrinsic_load_invocation_id:
     87 			info->uses_invocationid = true;
     88 			break;
     89 		case nir_intrinsic_load_vertex_id:
     90 			info->uses_vertexid = 1;
     91 			break;
     92 		case nir_intrinsic_load_vertex_id_zero_base:
     93 			info->uses_vertexid_nobase = 1;
     94 			break;
     95 		case nir_intrinsic_load_base_vertex:
     96 			info->uses_basevertex = 1;
     97 			break;
     98 		case nir_intrinsic_load_primitive_id:
     99 			info->uses_primid = 1;
    100 			break;
    101 		case nir_intrinsic_load_sample_mask_in:
    102 			info->reads_samplemask = true;
    103 			break;
    104 		case nir_intrinsic_load_tess_level_inner:
    105 		case nir_intrinsic_load_tess_level_outer:
    106 			info->reads_tess_factors = true;
    107 			break;
    108 		case nir_intrinsic_image_store:
    109 		case nir_intrinsic_image_atomic_add:
    110 		case nir_intrinsic_image_atomic_min:
    111 		case nir_intrinsic_image_atomic_max:
    112 		case nir_intrinsic_image_atomic_and:
    113 		case nir_intrinsic_image_atomic_or:
    114 		case nir_intrinsic_image_atomic_xor:
    115 		case nir_intrinsic_image_atomic_exchange:
    116 		case nir_intrinsic_image_atomic_comp_swap:
    117 		case nir_intrinsic_store_ssbo:
    118 		case nir_intrinsic_ssbo_atomic_add:
    119 		case nir_intrinsic_ssbo_atomic_imin:
    120 		case nir_intrinsic_ssbo_atomic_umin:
    121 		case nir_intrinsic_ssbo_atomic_imax:
    122 		case nir_intrinsic_ssbo_atomic_umax:
    123 		case nir_intrinsic_ssbo_atomic_and:
    124 		case nir_intrinsic_ssbo_atomic_or:
    125 		case nir_intrinsic_ssbo_atomic_xor:
    126 		case nir_intrinsic_ssbo_atomic_exchange:
    127 		case nir_intrinsic_ssbo_atomic_comp_swap:
    128 			info->writes_memory = true;
    129 			break;
    130 		default:
    131 			break;
    132 		}
    133 	}
    134 }
    135 
    136 void si_nir_scan_tess_ctrl(const struct nir_shader *nir,
    137 			   const struct tgsi_shader_info *info,
    138 			   struct tgsi_tessctrl_info *out)
    139 {
    140 	memset(out, 0, sizeof(*out));
    141 
    142 	if (nir->info.stage != MESA_SHADER_TESS_CTRL)
    143 		return;
    144 
    145 	/* Initial value = true. Here the pass will accumulate results from
    146 	 * multiple segments surrounded by barriers. If tess factors aren't
    147 	 * written at all, it's a shader bug and we don't care if this will be
    148 	 * true.
    149 	 */
    150 	out->tessfactors_are_def_in_all_invocs = true;
    151 
    152 	/* TODO: Implement scanning of tess factors, see tgsi backend. */
    153 }
    154 
    155 void si_nir_scan_shader(const struct nir_shader *nir,
    156 			struct tgsi_shader_info *info)
    157 {
    158 	nir_function *func;
    159 	unsigned i;
    160 
    161 	assert(nir->info.stage == MESA_SHADER_VERTEX ||
    162 	       nir->info.stage == MESA_SHADER_GEOMETRY ||
    163 	       nir->info.stage == MESA_SHADER_TESS_CTRL ||
    164 	       nir->info.stage == MESA_SHADER_TESS_EVAL ||
    165 	       nir->info.stage == MESA_SHADER_FRAGMENT);
    166 
    167 	info->processor = pipe_shader_type_from_mesa(nir->info.stage);
    168 	info->num_tokens = 2; /* indicate that the shader is non-empty */
    169 	info->num_instructions = 2;
    170 
    171 	if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
    172 		info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT] =
    173 			nir->info.tess.tcs_vertices_out;
    174 	}
    175 
    176 	if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
    177 		if (nir->info.tess.primitive_mode == GL_ISOLINES)
    178 			info->properties[TGSI_PROPERTY_TES_PRIM_MODE] = PIPE_PRIM_LINES;
    179 		else
    180 			info->properties[TGSI_PROPERTY_TES_PRIM_MODE] = nir->info.tess.primitive_mode;
    181 
    182 		STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
    183 		STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
    184 			      PIPE_TESS_SPACING_FRACTIONAL_ODD);
    185 		STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
    186 			      PIPE_TESS_SPACING_FRACTIONAL_EVEN);
    187 
    188 		info->properties[TGSI_PROPERTY_TES_SPACING] = (nir->info.tess.spacing + 1) % 3;
    189 		info->properties[TGSI_PROPERTY_TES_VERTEX_ORDER_CW] = !nir->info.tess.ccw;
    190 		info->properties[TGSI_PROPERTY_TES_POINT_MODE] = nir->info.tess.point_mode;
    191 	}
    192 
    193 	if (nir->info.stage == MESA_SHADER_GEOMETRY) {
    194 		info->properties[TGSI_PROPERTY_GS_INPUT_PRIM] = nir->info.gs.input_primitive;
    195 		info->properties[TGSI_PROPERTY_GS_OUTPUT_PRIM] = nir->info.gs.output_primitive;
    196 		info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES] = nir->info.gs.vertices_out;
    197 		info->properties[TGSI_PROPERTY_GS_INVOCATIONS] = nir->info.gs.invocations;
    198 	}
    199 
    200 	i = 0;
    201 	uint64_t processed_inputs = 0;
    202 	unsigned num_inputs = 0;
    203 	nir_foreach_variable(variable, &nir->inputs) {
    204 		unsigned semantic_name, semantic_index;
    205 		unsigned attrib_count = glsl_count_attribute_slots(variable->type,
    206 								   nir->info.stage == MESA_SHADER_VERTEX);
    207 
    208 		/* Vertex shader inputs don't have semantics. The state
    209 		 * tracker has already mapped them to attributes via
    210 		 * variable->data.driver_location.
    211 		 */
    212 		if (nir->info.stage == MESA_SHADER_VERTEX)
    213 			continue;
    214 
    215 		assert(nir->info.stage != MESA_SHADER_FRAGMENT ||
    216 		       (attrib_count == 1 && "not implemented"));
    217 
    218 		/* Fragment shader position is a system value. */
    219 		if (nir->info.stage == MESA_SHADER_FRAGMENT &&
    220 		    variable->data.location == VARYING_SLOT_POS) {
    221 			if (variable->data.pixel_center_integer)
    222 				info->properties[TGSI_PROPERTY_FS_COORD_PIXEL_CENTER] =
    223 					TGSI_FS_COORD_PIXEL_CENTER_INTEGER;
    224 
    225 			num_inputs++;
    226 			continue;
    227 		}
    228 
    229 		i = variable->data.driver_location;
    230 		if (processed_inputs & ((uint64_t)1 << i))
    231 			continue;
    232 
    233 		processed_inputs |= ((uint64_t)1 << i);
    234 		num_inputs++;
    235 
    236 		tgsi_get_gl_varying_semantic(variable->data.location, true,
    237 					     &semantic_name, &semantic_index);
    238 
    239 		info->input_semantic_name[i] = semantic_name;
    240 		info->input_semantic_index[i] = semantic_index;
    241 
    242 		if (semantic_name == TGSI_SEMANTIC_PRIMID)
    243 			info->uses_primid = true;
    244 
    245 		if (variable->data.sample)
    246 			info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_SAMPLE;
    247 		else if (variable->data.centroid)
    248 			info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTROID;
    249 		else
    250 			info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTER;
    251 
    252 		enum glsl_base_type base_type =
    253 			glsl_get_base_type(glsl_without_array(variable->type));
    254 
    255 		switch (variable->data.interpolation) {
    256 		case INTERP_MODE_NONE:
    257 			if (glsl_base_type_is_integer(base_type)) {
    258 				info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT;
    259 				break;
    260 			}
    261 
    262 			if (semantic_name == TGSI_SEMANTIC_COLOR) {
    263 				info->input_interpolate[i] = TGSI_INTERPOLATE_COLOR;
    264 				goto persp_locations;
    265 			}
    266 			/* fall-through */
    267 		case INTERP_MODE_SMOOTH:
    268 			assert(!glsl_base_type_is_integer(base_type));
    269 
    270 			info->input_interpolate[i] = TGSI_INTERPOLATE_PERSPECTIVE;
    271 
    272 		persp_locations:
    273 			if (variable->data.sample)
    274 				info->uses_persp_sample = true;
    275 			else if (variable->data.centroid)
    276 				info->uses_persp_centroid = true;
    277 			else
    278 				info->uses_persp_center = true;
    279 			break;
    280 
    281 		case INTERP_MODE_NOPERSPECTIVE:
    282 			assert(!glsl_base_type_is_integer(base_type));
    283 
    284 			info->input_interpolate[i] = TGSI_INTERPOLATE_LINEAR;
    285 
    286 			if (variable->data.sample)
    287 				info->uses_linear_sample = true;
    288 			else if (variable->data.centroid)
    289 				info->uses_linear_centroid = true;
    290 			else
    291 				info->uses_linear_center = true;
    292 			break;
    293 
    294 		case INTERP_MODE_FLAT:
    295 			info->input_interpolate[i] = TGSI_INTERPOLATE_CONSTANT;
    296 			break;
    297 		}
    298 
    299 		/* TODO make this more precise */
    300 		if (variable->data.location == VARYING_SLOT_COL0)
    301 			info->colors_read |= 0x0f;
    302 		else if (variable->data.location == VARYING_SLOT_COL1)
    303 			info->colors_read |= 0xf0;
    304 	}
    305 
    306 	if (nir->info.stage != MESA_SHADER_VERTEX)
    307 		info->num_inputs = num_inputs;
    308 	else
    309 		info->num_inputs = nir->num_inputs;
    310 
    311 	i = 0;
    312 	uint64_t processed_outputs = 0;
    313 	unsigned num_outputs = 0;
    314 	nir_foreach_variable(variable, &nir->outputs) {
    315 		unsigned semantic_name, semantic_index;
    316 
    317 		if (nir->info.stage == MESA_SHADER_FRAGMENT) {
    318 			tgsi_get_gl_frag_result_semantic(variable->data.location,
    319 				&semantic_name, &semantic_index);
    320 
    321 			/* Adjust for dual source blending */
    322 			if (variable->data.index > 0) {
    323 				semantic_index++;
    324 			}
    325 		} else {
    326 			tgsi_get_gl_varying_semantic(variable->data.location, true,
    327 						     &semantic_name, &semantic_index);
    328 		}
    329 
    330 		i = variable->data.driver_location;
    331 		if (processed_outputs & ((uint64_t)1 << i))
    332 			continue;
    333 
    334 		processed_outputs |= ((uint64_t)1 << i);
    335 		num_outputs++;
    336 
    337 		info->output_semantic_name[i] = semantic_name;
    338 		info->output_semantic_index[i] = semantic_index;
    339 		info->output_usagemask[i] = TGSI_WRITEMASK_XYZW;
    340 
    341 		unsigned num_components = 4;
    342 		unsigned vector_elements = glsl_get_vector_elements(glsl_without_array(variable->type));
    343 		if (vector_elements)
    344 			num_components = vector_elements;
    345 
    346 		unsigned gs_out_streams;
    347 		if (variable->data.stream & (1u << 31)) {
    348 			gs_out_streams = variable->data.stream & ~(1u << 31);
    349 		} else {
    350 			assert(variable->data.stream < 4);
    351 			gs_out_streams = 0;
    352 			for (unsigned j = 0; j < num_components; ++j)
    353 				gs_out_streams |= variable->data.stream << (2 * (variable->data.location_frac + j));
    354 		}
    355 
    356 		unsigned streamx = gs_out_streams & 3;
    357 		unsigned streamy = (gs_out_streams >> 2) & 3;
    358 		unsigned streamz = (gs_out_streams >> 4) & 3;
    359 		unsigned streamw = (gs_out_streams >> 6) & 3;
    360 
    361 		if (info->output_usagemask[i] & TGSI_WRITEMASK_X) {
    362 			info->output_streams[i] |= streamx;
    363 			info->num_stream_output_components[streamx]++;
    364 		}
    365 		if (info->output_usagemask[i] & TGSI_WRITEMASK_Y) {
    366 			info->output_streams[i] |= streamy << 2;
    367 			info->num_stream_output_components[streamy]++;
    368 		}
    369 		if (info->output_usagemask[i] & TGSI_WRITEMASK_Z) {
    370 			info->output_streams[i] |= streamz << 4;
    371 			info->num_stream_output_components[streamz]++;
    372 		}
    373 		if (info->output_usagemask[i] & TGSI_WRITEMASK_W) {
    374 			info->output_streams[i] |= streamw << 6;
    375 			info->num_stream_output_components[streamw]++;
    376 		}
    377 
    378 		switch (semantic_name) {
    379 		case TGSI_SEMANTIC_PRIMID:
    380 			info->writes_primid = true;
    381 			break;
    382 		case TGSI_SEMANTIC_VIEWPORT_INDEX:
    383 			info->writes_viewport_index = true;
    384 			break;
    385 		case TGSI_SEMANTIC_LAYER:
    386 			info->writes_layer = true;
    387 			break;
    388 		case TGSI_SEMANTIC_PSIZE:
    389 			info->writes_psize = true;
    390 			break;
    391 		case TGSI_SEMANTIC_CLIPVERTEX:
    392 			info->writes_clipvertex = true;
    393 			break;
    394 		case TGSI_SEMANTIC_COLOR:
    395 			info->colors_written |= 1 << semantic_index;
    396 			break;
    397 		case TGSI_SEMANTIC_STENCIL:
    398 			info->writes_stencil = true;
    399 			break;
    400 		case TGSI_SEMANTIC_SAMPLEMASK:
    401 			info->writes_samplemask = true;
    402 			break;
    403 		case TGSI_SEMANTIC_EDGEFLAG:
    404 			info->writes_edgeflag = true;
    405 			break;
    406 		case TGSI_SEMANTIC_POSITION:
    407 			if (info->processor == PIPE_SHADER_FRAGMENT)
    408 				info->writes_z = true;
    409 			else
    410 				info->writes_position = true;
    411 			break;
    412 		}
    413 
    414 		if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
    415 			switch (semantic_name) {
    416 			case TGSI_SEMANTIC_PATCH:
    417 				info->reads_perpatch_outputs = true;
    418 			break;
    419 			case TGSI_SEMANTIC_TESSINNER:
    420 			case TGSI_SEMANTIC_TESSOUTER:
    421 				info->reads_tessfactor_outputs = true;
    422 			break;
    423 			default:
    424 				info->reads_pervertex_outputs = true;
    425 			}
    426 		}
    427 	}
    428 
    429 	info->num_outputs = num_outputs;
    430 
    431 	nir_foreach_variable(variable, &nir->uniforms) {
    432 		const struct glsl_type *type = variable->type;
    433 		enum glsl_base_type base_type =
    434 			glsl_get_base_type(glsl_without_array(type));
    435 		unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type));
    436 
    437 		/* We rely on the fact that nir_lower_samplers_as_deref has
    438 		 * eliminated struct dereferences.
    439 		 */
    440 		if (base_type == GLSL_TYPE_SAMPLER)
    441 			info->samplers_declared |=
    442 				u_bit_consecutive(variable->data.binding, aoa_size);
    443 		else if (base_type == GLSL_TYPE_IMAGE)
    444 			info->images_declared |=
    445 				u_bit_consecutive(variable->data.binding, aoa_size);
    446 	}
    447 
    448 	info->num_written_clipdistance = nir->info.clip_distance_array_size;
    449 	info->num_written_culldistance = nir->info.cull_distance_array_size;
    450 	info->clipdist_writemask = u_bit_consecutive(0, info->num_written_clipdistance);
    451 	info->culldist_writemask = u_bit_consecutive(0, info->num_written_culldistance);
    452 
    453 	if (info->processor == PIPE_SHADER_FRAGMENT)
    454 		info->uses_kill = nir->info.fs.uses_discard;
    455 
    456 	/* TODO make this more accurate */
    457 	info->const_buffers_declared = u_bit_consecutive(0, SI_NUM_CONST_BUFFERS);
    458 	info->shader_buffers_declared = u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
    459 
    460 	func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
    461 	nir_foreach_block(block, func->impl) {
    462 		nir_foreach_instr(instr, block)
    463 			scan_instruction(info, instr);
    464 	}
    465 }
    466 
    467 /**
    468  * Perform "lowering" operations on the NIR that are run once when the shader
    469  * selector is created.
    470  */
    471 void
    472 si_lower_nir(struct si_shader_selector* sel)
    473 {
    474 	/* Adjust the driver location of inputs and outputs. The state tracker
    475 	 * interprets them as slots, while the ac/nir backend interprets them
    476 	 * as individual components.
    477 	 */
    478 	nir_foreach_variable(variable, &sel->nir->inputs)
    479 		variable->data.driver_location *= 4;
    480 
    481 	nir_foreach_variable(variable, &sel->nir->outputs) {
    482 		variable->data.driver_location *= 4;
    483 
    484 		if (sel->nir->info.stage == MESA_SHADER_FRAGMENT) {
    485 			if (variable->data.location == FRAG_RESULT_DEPTH)
    486 				variable->data.driver_location += 2;
    487 			else if (variable->data.location == FRAG_RESULT_STENCIL)
    488 				variable->data.driver_location += 1;
    489 		}
    490 	}
    491 
    492 	/* Perform lowerings (and optimizations) of code.
    493 	 *
    494 	 * Performance considerations aside, we must:
    495 	 * - lower certain ALU operations
    496 	 * - ensure constant offsets for texture instructions are folded
    497 	 *   and copy-propagated
    498 	 */
    499 	NIR_PASS_V(sel->nir, nir_lower_io, nir_var_uniform, type_size,
    500 		   (nir_lower_io_options)0);
    501 	NIR_PASS_V(sel->nir, nir_lower_uniforms_to_ubo);
    502 
    503 	NIR_PASS_V(sel->nir, nir_lower_returns);
    504 	NIR_PASS_V(sel->nir, nir_lower_vars_to_ssa);
    505 	NIR_PASS_V(sel->nir, nir_lower_alu_to_scalar);
    506 	NIR_PASS_V(sel->nir, nir_lower_phis_to_scalar);
    507 
    508 	static const struct nir_lower_tex_options lower_tex_options = {
    509 		.lower_txp = ~0u,
    510 	};
    511 	NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
    512 
    513 	const nir_lower_subgroups_options subgroups_options = {
    514 		.subgroup_size = 64,
    515 		.ballot_bit_size = 32,
    516 		.lower_to_scalar = true,
    517 		.lower_subgroup_masks = true,
    518 		.lower_vote_trivial = false,
    519 	};
    520 	NIR_PASS_V(sel->nir, nir_lower_subgroups, &subgroups_options);
    521 
    522 	bool progress;
    523 	do {
    524 		progress = false;
    525 
    526 		/* (Constant) copy propagation is needed for txf with offsets. */
    527 		NIR_PASS(progress, sel->nir, nir_copy_prop);
    528 		NIR_PASS(progress, sel->nir, nir_opt_remove_phis);
    529 		NIR_PASS(progress, sel->nir, nir_opt_dce);
    530 		if (nir_opt_trivial_continues(sel->nir)) {
    531 			progress = true;
    532 			NIR_PASS(progress, sel->nir, nir_copy_prop);
    533 			NIR_PASS(progress, sel->nir, nir_opt_dce);
    534 		}
    535 		NIR_PASS(progress, sel->nir, nir_opt_if);
    536 		NIR_PASS(progress, sel->nir, nir_opt_dead_cf);
    537 		NIR_PASS(progress, sel->nir, nir_opt_cse);
    538 		NIR_PASS(progress, sel->nir, nir_opt_peephole_select, 8);
    539 
    540 		/* Needed for algebraic lowering */
    541 		NIR_PASS(progress, sel->nir, nir_opt_algebraic);
    542 		NIR_PASS(progress, sel->nir, nir_opt_constant_folding);
    543 
    544 		NIR_PASS(progress, sel->nir, nir_opt_undef);
    545 		NIR_PASS(progress, sel->nir, nir_opt_conditional_discard);
    546 		if (sel->nir->options->max_unroll_iterations) {
    547 			NIR_PASS(progress, sel->nir, nir_opt_loop_unroll, 0);
    548 		}
    549 	} while (progress);
    550 }
    551 
    552 static void declare_nir_input_vs(struct si_shader_context *ctx,
    553 				 struct nir_variable *variable,
    554 				 LLVMValueRef out[4])
    555 {
    556 	si_llvm_load_input_vs(ctx, variable->data.driver_location / 4, out);
    557 }
    558 
    559 static void declare_nir_input_fs(struct si_shader_context *ctx,
    560 				 struct nir_variable *variable,
    561 				 unsigned input_index,
    562 				 LLVMValueRef out[4])
    563 {
    564 	unsigned slot = variable->data.location;
    565 	if (slot == VARYING_SLOT_POS) {
    566 		out[0] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_X_FLOAT);
    567 		out[1] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Y_FLOAT);
    568 		out[2] = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_Z_FLOAT);
    569 		out[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
    570 				LLVMGetParam(ctx->main_fn, SI_PARAM_POS_W_FLOAT));
    571 		return;
    572 	}
    573 
    574 	si_llvm_load_input_fs(ctx, input_index, out);
    575 }
    576 
    577 LLVMValueRef si_nir_load_input_gs(struct ac_shader_abi *abi,
    578 				  unsigned location,
    579 				  unsigned driver_location,
    580 				  unsigned component,
    581 				  unsigned num_components,
    582 				  unsigned vertex_index,
    583 				  unsigned const_index,
    584 				  LLVMTypeRef type)
    585 {
    586 	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
    587 
    588 	LLVMValueRef value[4];
    589 	for (unsigned i = component; i < num_components + component; i++) {
    590 		value[i] = si_llvm_load_input_gs(&ctx->abi, driver_location  / 4,
    591 						 vertex_index, type, i);
    592 	}
    593 
    594 	return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
    595 }
    596 
    597 static LLVMValueRef
    598 si_nir_load_sampler_desc(struct ac_shader_abi *abi,
    599 		         unsigned descriptor_set, unsigned base_index,
    600 		         unsigned constant_index, LLVMValueRef dynamic_index,
    601 		         enum ac_descriptor_type desc_type, bool image,
    602 			 bool write)
    603 {
    604 	struct si_shader_context *ctx = si_shader_context_from_abi(abi);
    605 	LLVMBuilderRef builder = ctx->ac.builder;
    606 	LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers_and_images);
    607 	LLVMValueRef index = dynamic_index;
    608 
    609 	assert(!descriptor_set);
    610 
    611 	if (!index)
    612 		index = ctx->ac.i32_0;
    613 
    614 	index = LLVMBuildAdd(builder, index,
    615 			     LLVMConstInt(ctx->ac.i32, base_index + constant_index, false),
    616 			     "");
    617 
    618 	if (image) {
    619 		assert(desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_BUFFER);
    620 		assert(base_index + constant_index < ctx->num_images);
    621 
    622 		if (dynamic_index)
    623 			index = si_llvm_bound_index(ctx, index, ctx->num_images);
    624 
    625 		index = LLVMBuildSub(ctx->gallivm.builder,
    626 				     LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0),
    627 				     index, "");
    628 
    629 		/* TODO: be smarter about when we use dcc_off */
    630 		return si_load_image_desc(ctx, list, index, desc_type, write);
    631 	}
    632 
    633 	assert(base_index + constant_index < ctx->num_samplers);
    634 
    635 	if (dynamic_index)
    636 		index = si_llvm_bound_index(ctx, index, ctx->num_samplers);
    637 
    638 	index = LLVMBuildAdd(ctx->gallivm.builder, index,
    639 			     LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), "");
    640 
    641 	return si_load_sampler_desc(ctx, list, index, desc_type);
    642 }
    643 
    644 bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
    645 {
    646 	struct tgsi_shader_info *info = &ctx->shader->selector->info;
    647 
    648 	if (nir->info.stage == MESA_SHADER_VERTEX ||
    649 	    nir->info.stage == MESA_SHADER_FRAGMENT) {
    650 		uint64_t processed_inputs = 0;
    651 		nir_foreach_variable(variable, &nir->inputs) {
    652 			unsigned attrib_count = glsl_count_attribute_slots(variable->type,
    653 									   nir->info.stage == MESA_SHADER_VERTEX);
    654 			unsigned input_idx = variable->data.driver_location;
    655 
    656 			assert(attrib_count == 1);
    657 
    658 			LLVMValueRef data[4];
    659 			unsigned loc = variable->data.location;
    660 
    661 			/* Packed components share the same location so skip
    662 			 * them if we have already processed the location.
    663 			 */
    664 			if (processed_inputs & ((uint64_t)1 << loc))
    665 				continue;
    666 
    667 			if (nir->info.stage == MESA_SHADER_VERTEX)
    668 				declare_nir_input_vs(ctx, variable, data);
    669 			else if (nir->info.stage == MESA_SHADER_FRAGMENT)
    670 				declare_nir_input_fs(ctx, variable, input_idx / 4, data);
    671 
    672 			for (unsigned chan = 0; chan < 4; chan++) {
    673 				ctx->inputs[input_idx + chan] =
    674 					LLVMBuildBitCast(ctx->ac.builder, data[chan], ctx->ac.i32, "");
    675 			}
    676 			processed_inputs |= ((uint64_t)1 << loc);
    677 		}
    678 	}
    679 
    680 	ctx->abi.inputs = &ctx->inputs[0];
    681 	ctx->abi.load_sampler_desc = si_nir_load_sampler_desc;
    682 	ctx->abi.clamp_shadow_reference = true;
    683 
    684 	ctx->num_samplers = util_last_bit(info->samplers_declared);
    685 	ctx->num_images = util_last_bit(info->images_declared);
    686 
    687 	ac_nir_translate(&ctx->ac, &ctx->abi, nir, NULL);
    688 
    689 	return true;
    690 }
    691