Home | History | Annotate | Download | only in common
      1 /*
      2  * Copyright  2016 Bas Nieuwenhuizen
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include "ac_nir_to_llvm.h"
     25 #include "ac_llvm_util.h"
     26 #include "ac_binary.h"
     27 #include "sid.h"
     28 #include "nir/nir.h"
     29 #include "../vulkan/radv_descriptor_set.h"
     30 #include "util/bitscan.h"
     31 #include <llvm-c/Transforms/Scalar.h>
     32 
     33 enum radeon_llvm_calling_convention {
     34 	RADEON_LLVM_AMDGPU_VS = 87,
     35 	RADEON_LLVM_AMDGPU_GS = 88,
     36 	RADEON_LLVM_AMDGPU_PS = 89,
     37 	RADEON_LLVM_AMDGPU_CS = 90,
     38 };
     39 
     40 #define CONST_ADDR_SPACE 2
     41 #define LOCAL_ADDR_SPACE 3
     42 
     43 #define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1)
     44 #define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
     45 
     46 enum desc_type {
     47 	DESC_IMAGE,
     48 	DESC_FMASK,
     49 	DESC_SAMPLER,
     50 	DESC_BUFFER,
     51 };
     52 
     53 struct nir_to_llvm_context {
     54 	struct ac_llvm_context ac;
     55 	const struct ac_nir_compiler_options *options;
     56 	struct ac_shader_variant_info *shader_info;
     57 
     58 	LLVMContextRef context;
     59 	LLVMModuleRef module;
     60 	LLVMBuilderRef builder;
     61 	LLVMValueRef main_function;
     62 
     63 	struct hash_table *defs;
     64 	struct hash_table *phis;
     65 
     66 	LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
     67 	LLVMValueRef push_constants;
     68 	LLVMValueRef num_work_groups;
     69 	LLVMValueRef workgroup_ids;
     70 	LLVMValueRef local_invocation_ids;
     71 	LLVMValueRef tg_size;
     72 
     73 	LLVMValueRef vertex_buffers;
     74 	LLVMValueRef base_vertex;
     75 	LLVMValueRef start_instance;
     76 	LLVMValueRef vertex_id;
     77 	LLVMValueRef rel_auto_id;
     78 	LLVMValueRef vs_prim_id;
     79 	LLVMValueRef instance_id;
     80 
     81 	LLVMValueRef prim_mask;
     82 	LLVMValueRef sample_positions;
     83 	LLVMValueRef persp_sample, persp_center, persp_centroid;
     84 	LLVMValueRef linear_sample, linear_center, linear_centroid;
     85 	LLVMValueRef front_face;
     86 	LLVMValueRef ancillary;
     87 	LLVMValueRef frag_pos[4];
     88 
     89 	LLVMBasicBlockRef continue_block;
     90 	LLVMBasicBlockRef break_block;
     91 
     92 	LLVMTypeRef i1;
     93 	LLVMTypeRef i8;
     94 	LLVMTypeRef i16;
     95 	LLVMTypeRef i32;
     96 	LLVMTypeRef i64;
     97 	LLVMTypeRef v2i32;
     98 	LLVMTypeRef v3i32;
     99 	LLVMTypeRef v4i32;
    100 	LLVMTypeRef v8i32;
    101 	LLVMTypeRef f32;
    102 	LLVMTypeRef f16;
    103 	LLVMTypeRef v2f32;
    104 	LLVMTypeRef v4f32;
    105 	LLVMTypeRef v16i8;
    106 	LLVMTypeRef voidt;
    107 
    108 	LLVMValueRef i32zero;
    109 	LLVMValueRef i32one;
    110 	LLVMValueRef f32zero;
    111 	LLVMValueRef f32one;
    112 	LLVMValueRef v4f32empty;
    113 
    114 	unsigned range_md_kind;
    115 	unsigned uniform_md_kind;
    116 	unsigned invariant_load_md_kind;
    117 	LLVMValueRef empty_md;
    118 	gl_shader_stage stage;
    119 
    120 	LLVMValueRef lds;
    121 	LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
    122 	LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
    123 
    124 	LLVMValueRef shared_memory;
    125 	uint64_t input_mask;
    126 	uint64_t output_mask;
    127 	int num_locals;
    128 	LLVMValueRef *locals;
    129 	bool has_ddxy;
    130 	unsigned num_clips;
    131 	unsigned num_culls;
    132 
    133 	bool has_ds_bpermute;
    134 };
    135 
    136 struct ac_tex_info {
    137 	LLVMValueRef args[12];
    138 	int arg_count;
    139 	LLVMTypeRef dst_type;
    140 	bool has_offset;
    141 };
    142 
    143 static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
    144 				     nir_deref_var *deref,
    145 				     enum desc_type desc_type);
    146 static unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
    147 {
    148 	return (index * 4) + chan;
    149 }
    150 
    151 static unsigned llvm_get_type_size(LLVMTypeRef type)
    152 {
    153 	LLVMTypeKind kind = LLVMGetTypeKind(type);
    154 
    155 	switch (kind) {
    156 	case LLVMIntegerTypeKind:
    157 		return LLVMGetIntTypeWidth(type) / 8;
    158 	case LLVMFloatTypeKind:
    159 		return 4;
    160 	case LLVMPointerTypeKind:
    161 		return 8;
    162 	case LLVMVectorTypeKind:
    163 		return LLVMGetVectorSize(type) *
    164 		       llvm_get_type_size(LLVMGetElementType(type));
    165 	default:
    166 		assert(0);
    167 		return 0;
    168 	}
    169 }
    170 
    171 static void set_llvm_calling_convention(LLVMValueRef func,
    172                                         gl_shader_stage stage)
    173 {
    174 	enum radeon_llvm_calling_convention calling_conv;
    175 
    176 	switch (stage) {
    177 	case MESA_SHADER_VERTEX:
    178 	case MESA_SHADER_TESS_CTRL:
    179 	case MESA_SHADER_TESS_EVAL:
    180 		calling_conv = RADEON_LLVM_AMDGPU_VS;
    181 		break;
    182 	case MESA_SHADER_GEOMETRY:
    183 		calling_conv = RADEON_LLVM_AMDGPU_GS;
    184 		break;
    185 	case MESA_SHADER_FRAGMENT:
    186 		calling_conv = RADEON_LLVM_AMDGPU_PS;
    187 		break;
    188 	case MESA_SHADER_COMPUTE:
    189 		calling_conv = RADEON_LLVM_AMDGPU_CS;
    190 		break;
    191 	default:
    192 		unreachable("Unhandle shader type");
    193 	}
    194 
    195 	LLVMSetFunctionCallConv(func, calling_conv);
    196 }
    197 
    198 static LLVMValueRef
    199 create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
    200                      LLVMBuilderRef builder, LLVMTypeRef *return_types,
    201                      unsigned num_return_elems, LLVMTypeRef *param_types,
    202                      unsigned param_count, unsigned array_params_mask,
    203                      unsigned sgpr_params, bool unsafe_math)
    204 {
    205 	LLVMTypeRef main_function_type, ret_type;
    206 	LLVMBasicBlockRef main_function_body;
    207 
    208 	if (num_return_elems)
    209 		ret_type = LLVMStructTypeInContext(ctx, return_types,
    210 		                                   num_return_elems, true);
    211 	else
    212 		ret_type = LLVMVoidTypeInContext(ctx);
    213 
    214 	/* Setup the function */
    215 	main_function_type =
    216 	    LLVMFunctionType(ret_type, param_types, param_count, 0);
    217 	LLVMValueRef main_function =
    218 	    LLVMAddFunction(module, "main", main_function_type);
    219 	main_function_body =
    220 	    LLVMAppendBasicBlockInContext(ctx, main_function, "main_body");
    221 	LLVMPositionBuilderAtEnd(builder, main_function_body);
    222 
    223 	LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS);
    224 	for (unsigned i = 0; i < sgpr_params; ++i) {
    225 		if (array_params_mask & (1 << i)) {
    226 			LLVMValueRef P = LLVMGetParam(main_function, i);
    227 			ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_BYVAL);
    228 			ac_add_attr_dereferenceable(P, UINT64_MAX);
    229 		}
    230 		else {
    231 			ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_INREG);
    232 		}
    233 	}
    234 
    235 	if (unsafe_math) {
    236 		/* These were copied from some LLVM test. */
    237 		LLVMAddTargetDependentFunctionAttr(main_function,
    238 						   "less-precise-fpmad",
    239 						   "true");
    240 		LLVMAddTargetDependentFunctionAttr(main_function,
    241 						   "no-infs-fp-math",
    242 						   "true");
    243 		LLVMAddTargetDependentFunctionAttr(main_function,
    244 						   "no-nans-fp-math",
    245 						   "true");
    246 		LLVMAddTargetDependentFunctionAttr(main_function,
    247 						   "unsafe-fp-math",
    248 						   "true");
    249 	}
    250 	return main_function;
    251 }
    252 
    253 static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
    254 {
    255 	return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
    256 	                       CONST_ADDR_SPACE);
    257 }
    258 
    259 static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx,
    260 					  int idx,
    261 					  LLVMTypeRef type)
    262 {
    263 	LLVMValueRef offset;
    264 	LLVMValueRef ptr;
    265 	int addr_space;
    266 
    267 	offset = LLVMConstInt(ctx->i32, idx * 16, false);
    268 
    269 	ptr = ctx->shared_memory;
    270 	ptr = LLVMBuildGEP(ctx->builder, ptr, &offset, 1, "");
    271 	addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
    272 	ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), "");
    273 	return ptr;
    274 }
    275 
    276 static LLVMValueRef to_integer(struct nir_to_llvm_context *ctx, LLVMValueRef v)
    277 {
    278 	LLVMTypeRef type = LLVMTypeOf(v);
    279 	if (type == ctx->f32) {
    280 		return LLVMBuildBitCast(ctx->builder, v, ctx->i32, "");
    281 	} else if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
    282 		LLVMTypeRef elem_type = LLVMGetElementType(type);
    283 		if (elem_type == ctx->f32) {
    284 			LLVMTypeRef nt = LLVMVectorType(ctx->i32, LLVMGetVectorSize(type));
    285 			return LLVMBuildBitCast(ctx->builder, v, nt, "");
    286 		}
    287 	}
    288 	return v;
    289 }
    290 
    291 static LLVMValueRef to_float(struct nir_to_llvm_context *ctx, LLVMValueRef v)
    292 {
    293 	LLVMTypeRef type = LLVMTypeOf(v);
    294 	if (type == ctx->i32) {
    295 		return LLVMBuildBitCast(ctx->builder, v, ctx->f32, "");
    296 	} else if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
    297 		LLVMTypeRef elem_type = LLVMGetElementType(type);
    298 		if (elem_type == ctx->i32) {
    299 			LLVMTypeRef nt = LLVMVectorType(ctx->f32, LLVMGetVectorSize(type));
    300 			return LLVMBuildBitCast(ctx->builder, v, nt, "");
    301 		}
    302 	}
    303 	return v;
    304 }
    305 
    306 static LLVMValueRef unpack_param(struct nir_to_llvm_context *ctx,
    307 				 LLVMValueRef param, unsigned rshift,
    308 				 unsigned bitwidth)
    309 {
    310 	LLVMValueRef value = param;
    311 	if (rshift)
    312 		value = LLVMBuildLShr(ctx->builder, value,
    313 				      LLVMConstInt(ctx->i32, rshift, false), "");
    314 
    315 	if (rshift + bitwidth < 32) {
    316 		unsigned mask = (1 << bitwidth) - 1;
    317 		value = LLVMBuildAnd(ctx->builder, value,
    318 				     LLVMConstInt(ctx->i32, mask, false), "");
    319 	}
    320 	return value;
    321 }
    322 
    323 static LLVMValueRef build_gep0(struct nir_to_llvm_context *ctx,
    324 			       LLVMValueRef base_ptr, LLVMValueRef index)
    325 {
    326 	LLVMValueRef indices[2] = {
    327 		ctx->i32zero,
    328 		index,
    329 	};
    330 	return LLVMBuildGEP(ctx->builder, base_ptr,
    331 			    indices, 2, "");
    332 }
    333 
    334 static LLVMValueRef build_indexed_load(struct nir_to_llvm_context *ctx,
    335 				       LLVMValueRef base_ptr, LLVMValueRef index,
    336 				       bool uniform)
    337 {
    338 	LLVMValueRef pointer;
    339 	pointer = build_gep0(ctx, base_ptr, index);
    340 	if (uniform)
    341 		LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
    342 	return LLVMBuildLoad(ctx->builder, pointer, "");
    343 }
    344 
    345 static LLVMValueRef build_indexed_load_const(struct nir_to_llvm_context *ctx,
    346 					     LLVMValueRef base_ptr, LLVMValueRef index)
    347 {
    348 	LLVMValueRef result = build_indexed_load(ctx, base_ptr, index, true);
    349 	LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
    350 	return result;
    351 }
    352 
    353 static void set_userdata_location(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs)
    354 {
    355 	ud_info->sgpr_idx = sgpr_idx;
    356 	ud_info->num_sgprs = num_sgprs;
    357 	ud_info->indirect = false;
    358 	ud_info->indirect_offset = 0;
    359 }
    360 
    361 static void set_userdata_location_shader(struct nir_to_llvm_context *ctx,
    362 					 int idx, uint8_t sgpr_idx, uint8_t num_sgprs)
    363 {
    364 	set_userdata_location(&ctx->shader_info->user_sgprs_locs.shader_data[idx], sgpr_idx, num_sgprs);
    365 }
    366 
    367 #if 0
    368 static void set_userdata_location_indirect(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs,
    369 					   uint32_t indirect_offset)
    370 {
    371 	ud_info->sgpr_idx = sgpr_idx;
    372 	ud_info->num_sgprs = num_sgprs;
    373 	ud_info->indirect = true;
    374 	ud_info->indirect_offset = indirect_offset;
    375 }
    376 #endif
    377 
    378 static void create_function(struct nir_to_llvm_context *ctx)
    379 {
    380 	LLVMTypeRef arg_types[23];
    381 	unsigned arg_idx = 0;
    382 	unsigned array_params_mask = 0;
    383 	unsigned sgpr_count = 0, user_sgpr_count;
    384 	unsigned i;
    385 	unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0;
    386 	unsigned user_sgpr_idx;
    387 	bool need_push_constants;
    388 
    389 	need_push_constants = true;
    390 	if (!ctx->options->layout)
    391 		need_push_constants = false;
    392 	else if (!ctx->options->layout->push_constant_size &&
    393 		 !ctx->options->layout->dynamic_offset_count)
    394 		need_push_constants = false;
    395 
    396 	/* 1 for each descriptor set */
    397 	for (unsigned i = 0; i < num_sets; ++i) {
    398 		if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
    399 			array_params_mask |= (1 << arg_idx);
    400 			arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
    401 		}
    402 	}
    403 
    404 	if (need_push_constants) {
    405 		/* 1 for push constants and dynamic descriptors */
    406 		array_params_mask |= (1 << arg_idx);
    407 		arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
    408 	}
    409 
    410 	switch (ctx->stage) {
    411 	case MESA_SHADER_COMPUTE:
    412 		arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3); /* grid size */
    413 		user_sgpr_count = arg_idx;
    414 		arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3);
    415 		arg_types[arg_idx++] = ctx->i32;
    416 		sgpr_count = arg_idx;
    417 
    418 		arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3);
    419 		break;
    420 	case MESA_SHADER_VERTEX:
    421 		arg_types[arg_idx++] = const_array(ctx->v16i8, 16); /* vertex buffers */
    422 		arg_types[arg_idx++] = ctx->i32; // base vertex
    423 		arg_types[arg_idx++] = ctx->i32; // start instance
    424 		user_sgpr_count = sgpr_count = arg_idx;
    425 		arg_types[arg_idx++] = ctx->i32; // vertex id
    426 		arg_types[arg_idx++] = ctx->i32; // rel auto id
    427 		arg_types[arg_idx++] = ctx->i32; // vs prim id
    428 		arg_types[arg_idx++] = ctx->i32; // instance id
    429 		break;
    430 	case MESA_SHADER_FRAGMENT:
    431 		arg_types[arg_idx++] = const_array(ctx->f32, 32); /* sample positions */
    432 		user_sgpr_count = arg_idx;
    433 		arg_types[arg_idx++] = ctx->i32; /* prim mask */
    434 		sgpr_count = arg_idx;
    435 		arg_types[arg_idx++] = ctx->v2i32; /* persp sample */
    436 		arg_types[arg_idx++] = ctx->v2i32; /* persp center */
    437 		arg_types[arg_idx++] = ctx->v2i32; /* persp centroid */
    438 		arg_types[arg_idx++] = ctx->v3i32; /* persp pull model */
    439 		arg_types[arg_idx++] = ctx->v2i32; /* linear sample */
    440 		arg_types[arg_idx++] = ctx->v2i32; /* linear center */
    441 		arg_types[arg_idx++] = ctx->v2i32; /* linear centroid */
    442 		arg_types[arg_idx++] = ctx->f32;  /* line stipple tex */
    443 		arg_types[arg_idx++] = ctx->f32;  /* pos x float */
    444 		arg_types[arg_idx++] = ctx->f32;  /* pos y float */
    445 		arg_types[arg_idx++] = ctx->f32;  /* pos z float */
    446 		arg_types[arg_idx++] = ctx->f32;  /* pos w float */
    447 		arg_types[arg_idx++] = ctx->i32;  /* front face */
    448 		arg_types[arg_idx++] = ctx->i32;  /* ancillary */
    449 		arg_types[arg_idx++] = ctx->f32;  /* sample coverage */
    450 		arg_types[arg_idx++] = ctx->i32;  /* fixed pt */
    451 		break;
    452 	default:
    453 		unreachable("Shader stage not implemented");
    454 	}
    455 
    456 	ctx->main_function = create_llvm_function(
    457 	    ctx->context, ctx->module, ctx->builder, NULL, 0, arg_types,
    458 	    arg_idx, array_params_mask, sgpr_count, ctx->options->unsafe_math);
    459 	set_llvm_calling_convention(ctx->main_function, ctx->stage);
    460 
    461 
    462 	ctx->shader_info->num_input_sgprs = 0;
    463 	ctx->shader_info->num_input_vgprs = 0;
    464 
    465 	for (i = 0; i < user_sgpr_count; i++)
    466 		ctx->shader_info->num_user_sgprs += llvm_get_type_size(arg_types[i]) / 4;
    467 
    468 	ctx->shader_info->num_input_sgprs = ctx->shader_info->num_user_sgprs;
    469 	for (; i < sgpr_count; i++)
    470 		ctx->shader_info->num_input_sgprs += llvm_get_type_size(arg_types[i]) / 4;
    471 
    472 	if (ctx->stage != MESA_SHADER_FRAGMENT)
    473 		for (; i < arg_idx; ++i)
    474 			ctx->shader_info->num_input_vgprs += llvm_get_type_size(arg_types[i]) / 4;
    475 
    476 	arg_idx = 0;
    477 	user_sgpr_idx = 0;
    478 	for (unsigned i = 0; i < num_sets; ++i) {
    479 		if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
    480 			set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], user_sgpr_idx, 2);
    481 			user_sgpr_idx += 2;
    482 			ctx->descriptor_sets[i] =
    483 				LLVMGetParam(ctx->main_function, arg_idx++);
    484 		} else
    485 			ctx->descriptor_sets[i] = NULL;
    486 	}
    487 
    488 	if (need_push_constants) {
    489 		ctx->push_constants = LLVMGetParam(ctx->main_function, arg_idx++);
    490 		set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2);
    491 		user_sgpr_idx += 2;
    492 	}
    493 
    494 	switch (ctx->stage) {
    495 	case MESA_SHADER_COMPUTE:
    496 		set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, user_sgpr_idx, 3);
    497 		user_sgpr_idx += 3;
    498 		ctx->num_work_groups =
    499 		    LLVMGetParam(ctx->main_function, arg_idx++);
    500 		ctx->workgroup_ids =
    501 		    LLVMGetParam(ctx->main_function, arg_idx++);
    502 		ctx->tg_size =
    503 		    LLVMGetParam(ctx->main_function, arg_idx++);
    504 		ctx->local_invocation_ids =
    505 		    LLVMGetParam(ctx->main_function, arg_idx++);
    506 		break;
    507 	case MESA_SHADER_VERTEX:
    508 		set_userdata_location_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx, 2);
    509 		user_sgpr_idx += 2;
    510 		ctx->vertex_buffers = LLVMGetParam(ctx->main_function, arg_idx++);
    511 		set_userdata_location_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, 2);
    512 		user_sgpr_idx += 2;
    513 		ctx->base_vertex = LLVMGetParam(ctx->main_function, arg_idx++);
    514 		ctx->start_instance = LLVMGetParam(ctx->main_function, arg_idx++);
    515 		ctx->vertex_id = LLVMGetParam(ctx->main_function, arg_idx++);
    516 		ctx->rel_auto_id = LLVMGetParam(ctx->main_function, arg_idx++);
    517 		ctx->vs_prim_id = LLVMGetParam(ctx->main_function, arg_idx++);
    518 		ctx->instance_id = LLVMGetParam(ctx->main_function, arg_idx++);
    519 		break;
    520 	case MESA_SHADER_FRAGMENT:
    521 		set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS, user_sgpr_idx, 2);
    522 		user_sgpr_idx += 2;
    523 		ctx->sample_positions = LLVMGetParam(ctx->main_function, arg_idx++);
    524 		ctx->prim_mask = LLVMGetParam(ctx->main_function, arg_idx++);
    525 		ctx->persp_sample = LLVMGetParam(ctx->main_function, arg_idx++);
    526 		ctx->persp_center = LLVMGetParam(ctx->main_function, arg_idx++);
    527 		ctx->persp_centroid = LLVMGetParam(ctx->main_function, arg_idx++);
    528 		arg_idx++;
    529 		ctx->linear_sample = LLVMGetParam(ctx->main_function, arg_idx++);
    530 		ctx->linear_center = LLVMGetParam(ctx->main_function, arg_idx++);
    531 		ctx->linear_centroid = LLVMGetParam(ctx->main_function, arg_idx++);
    532 		arg_idx++; /* line stipple */
    533 		ctx->frag_pos[0] = LLVMGetParam(ctx->main_function, arg_idx++);
    534 		ctx->frag_pos[1] = LLVMGetParam(ctx->main_function, arg_idx++);
    535 		ctx->frag_pos[2] = LLVMGetParam(ctx->main_function, arg_idx++);
    536 		ctx->frag_pos[3] = LLVMGetParam(ctx->main_function, arg_idx++);
    537 		ctx->front_face = LLVMGetParam(ctx->main_function, arg_idx++);
    538 		ctx->ancillary = LLVMGetParam(ctx->main_function, arg_idx++);
    539 		break;
    540 	default:
    541 		unreachable("Shader stage not implemented");
    542 	}
    543 }
    544 
    545 static void setup_types(struct nir_to_llvm_context *ctx)
    546 {
    547 	LLVMValueRef args[4];
    548 
    549 	ctx->voidt = LLVMVoidTypeInContext(ctx->context);
    550 	ctx->i1 = LLVMIntTypeInContext(ctx->context, 1);
    551 	ctx->i8 = LLVMIntTypeInContext(ctx->context, 8);
    552 	ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
    553 	ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
    554 	ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
    555 	ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
    556 	ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
    557 	ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
    558 	ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
    559 	ctx->f32 = LLVMFloatTypeInContext(ctx->context);
    560 	ctx->f16 = LLVMHalfTypeInContext(ctx->context);
    561 	ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
    562 	ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
    563 	ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
    564 
    565 	ctx->i32zero = LLVMConstInt(ctx->i32, 0, false);
    566 	ctx->i32one = LLVMConstInt(ctx->i32, 1, false);
    567 	ctx->f32zero = LLVMConstReal(ctx->f32, 0.0);
    568 	ctx->f32one = LLVMConstReal(ctx->f32, 1.0);
    569 
    570 	args[0] = ctx->f32zero;
    571 	args[1] = ctx->f32zero;
    572 	args[2] = ctx->f32zero;
    573 	args[3] = ctx->f32one;
    574 	ctx->v4f32empty = LLVMConstVector(args, 4);
    575 
    576 	ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
    577 						      "range", 5);
    578 	ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
    579 							       "invariant.load", 14);
    580 	ctx->uniform_md_kind =
    581 	    LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14);
    582 	ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
    583 
    584 	args[0] = LLVMConstReal(ctx->f32, 2.5);
    585 }
    586 
    587 static int get_llvm_num_components(LLVMValueRef value)
    588 {
    589 	LLVMTypeRef type = LLVMTypeOf(value);
    590 	unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
    591 	                              ? LLVMGetVectorSize(type)
    592 	                              : 1;
    593 	return num_components;
    594 }
    595 
    596 static LLVMValueRef llvm_extract_elem(struct nir_to_llvm_context *ctx,
    597 				      LLVMValueRef value,
    598 				      int index)
    599 {
    600 	int count = get_llvm_num_components(value);
    601 
    602 	assert(index < count);
    603 	if (count == 1)
    604 		return value;
    605 
    606 	return LLVMBuildExtractElement(ctx->builder, value,
    607 				       LLVMConstInt(ctx->i32, index, false), "");
    608 }
    609 
    610 static LLVMValueRef trim_vector(struct nir_to_llvm_context *ctx,
    611                                 LLVMValueRef value, unsigned count)
    612 {
    613 	unsigned num_components = get_llvm_num_components(value);
    614 	if (count == num_components)
    615 		return value;
    616 
    617 	LLVMValueRef masks[] = {
    618 	    LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
    619 	    LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
    620 
    621 	if (count == 1)
    622 		return LLVMBuildExtractElement(ctx->builder, value, masks[0],
    623 		                               "");
    624 
    625 	LLVMValueRef swizzle = LLVMConstVector(masks, count);
    626 	return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
    627 }
    628 
    629 static void
    630 build_store_values_extended(struct nir_to_llvm_context *ctx,
    631 			     LLVMValueRef *values,
    632 			     unsigned value_count,
    633 			     unsigned value_stride,
    634 			     LLVMValueRef vec)
    635 {
    636 	LLVMBuilderRef builder = ctx->builder;
    637 	unsigned i;
    638 
    639 	if (value_count == 1) {
    640 		LLVMBuildStore(builder, vec, values[0]);
    641 		return;
    642 	}
    643 
    644 	for (i = 0; i < value_count; i++) {
    645 		LLVMValueRef ptr = values[i * value_stride];
    646 		LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
    647 		LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
    648 		LLVMBuildStore(builder, value, ptr);
    649 	}
    650 }
    651 
    652 static LLVMTypeRef get_def_type(struct nir_to_llvm_context *ctx,
    653                                 nir_ssa_def *def)
    654 {
    655 	LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, def->bit_size);
    656 	if (def->num_components > 1) {
    657 		type = LLVMVectorType(type, def->num_components);
    658 	}
    659 	return type;
    660 }
    661 
    662 static LLVMValueRef get_src(struct nir_to_llvm_context *ctx, nir_src src)
    663 {
    664 	assert(src.is_ssa);
    665 	struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, src.ssa);
    666 	return (LLVMValueRef)entry->data;
    667 }
    668 
    669 
    670 static LLVMBasicBlockRef get_block(struct nir_to_llvm_context *ctx,
    671                                    struct nir_block *b)
    672 {
    673 	struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, b);
    674 	return (LLVMBasicBlockRef)entry->data;
    675 }
    676 
    677 static LLVMValueRef get_alu_src(struct nir_to_llvm_context *ctx,
    678                                 nir_alu_src src,
    679                                 unsigned num_components)
    680 {
    681 	LLVMValueRef value = get_src(ctx, src.src);
    682 	bool need_swizzle = false;
    683 
    684 	assert(value);
    685 	LLVMTypeRef type = LLVMTypeOf(value);
    686 	unsigned src_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
    687 	                              ? LLVMGetVectorSize(type)
    688 	                              : 1;
    689 
    690 	for (unsigned i = 0; i < num_components; ++i) {
    691 		assert(src.swizzle[i] < src_components);
    692 		if (src.swizzle[i] != i)
    693 			need_swizzle = true;
    694 	}
    695 
    696 	if (need_swizzle || num_components != src_components) {
    697 		LLVMValueRef masks[] = {
    698 		    LLVMConstInt(ctx->i32, src.swizzle[0], false),
    699 		    LLVMConstInt(ctx->i32, src.swizzle[1], false),
    700 		    LLVMConstInt(ctx->i32, src.swizzle[2], false),
    701 		    LLVMConstInt(ctx->i32, src.swizzle[3], false)};
    702 
    703 		if (src_components > 1 && num_components == 1) {
    704 			value = LLVMBuildExtractElement(ctx->builder, value,
    705 			                                masks[0], "");
    706 		} else if (src_components == 1 && num_components > 1) {
    707 			LLVMValueRef values[] = {value, value, value, value};
    708 			value = ac_build_gather_values(&ctx->ac, values, num_components);
    709 		} else {
    710 			LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
    711 			value = LLVMBuildShuffleVector(ctx->builder, value, value,
    712 		                                       swizzle, "");
    713 		}
    714 	}
    715 	assert(!src.negate);
    716 	assert(!src.abs);
    717 	return value;
    718 }
    719 
    720 static LLVMValueRef emit_int_cmp(struct nir_to_llvm_context *ctx,
    721                                  LLVMIntPredicate pred, LLVMValueRef src0,
    722                                  LLVMValueRef src1)
    723 {
    724 	LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
    725 	return LLVMBuildSelect(ctx->builder, result,
    726 	                       LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
    727 	                       LLVMConstInt(ctx->i32, 0, false), "");
    728 }
    729 
    730 static LLVMValueRef emit_float_cmp(struct nir_to_llvm_context *ctx,
    731                                    LLVMRealPredicate pred, LLVMValueRef src0,
    732                                    LLVMValueRef src1)
    733 {
    734 	LLVMValueRef result;
    735 	src0 = to_float(ctx, src0);
    736 	src1 = to_float(ctx, src1);
    737 	result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
    738 	return LLVMBuildSelect(ctx->builder, result,
    739 	                       LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
    740 	                       LLVMConstInt(ctx->i32, 0, false), "");
    741 }
    742 
    743 static LLVMValueRef emit_intrin_1f_param(struct nir_to_llvm_context *ctx,
    744 					 const char *intrin,
    745 					 LLVMValueRef src0)
    746 {
    747 	LLVMValueRef params[] = {
    748 		to_float(ctx, src0),
    749 	};
    750 	return ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->f32, params, 1, AC_FUNC_ATTR_READNONE);
    751 }
    752 
    753 static LLVMValueRef emit_intrin_2f_param(struct nir_to_llvm_context *ctx,
    754 				       const char *intrin,
    755 				       LLVMValueRef src0, LLVMValueRef src1)
    756 {
    757 	LLVMValueRef params[] = {
    758 		to_float(ctx, src0),
    759 		to_float(ctx, src1),
    760 	};
    761 	return ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->f32, params, 2, AC_FUNC_ATTR_READNONE);
    762 }
    763 
    764 static LLVMValueRef emit_intrin_3f_param(struct nir_to_llvm_context *ctx,
    765 					 const char *intrin,
    766 					 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
    767 {
    768 	LLVMValueRef params[] = {
    769 		to_float(ctx, src0),
    770 		to_float(ctx, src1),
    771 		to_float(ctx, src2),
    772 	};
    773 	return ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->f32, params, 3, AC_FUNC_ATTR_READNONE);
    774 }
    775 
    776 static LLVMValueRef emit_bcsel(struct nir_to_llvm_context *ctx,
    777 			       LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
    778 {
    779 	LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
    780 				       ctx->i32zero, "");
    781 	return LLVMBuildSelect(ctx->builder, v, src1, src2, "");
    782 }
    783 
    784 static LLVMValueRef emit_find_lsb(struct nir_to_llvm_context *ctx,
    785 				  LLVMValueRef src0)
    786 {
    787 	LLVMValueRef params[2] = {
    788 		src0,
    789 
    790 		/* The value of 1 means that ffs(x=0) = undef, so LLVM won't
    791 		 * add special code to check for x=0. The reason is that
    792 		 * the LLVM behavior for x=0 is different from what we
    793 		 * need here.
    794 		 *
    795 		 * The hardware already implements the correct behavior.
    796 		 */
    797 		LLVMConstInt(ctx->i32, 1, false),
    798 	};
    799 	return ac_emit_llvm_intrinsic(&ctx->ac, "llvm.cttz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE);
    800 }
    801 
    802 static LLVMValueRef emit_ifind_msb(struct nir_to_llvm_context *ctx,
    803 				   LLVMValueRef src0)
    804 {
    805 	LLVMValueRef msb = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.flbit.i32",
    806 					       ctx->i32, &src0, 1,
    807 					       AC_FUNC_ATTR_READNONE);
    808 
    809 	/* The HW returns the last bit index from MSB, but NIR wants
    810 	 * the index from LSB. Invert it by doing "31 - msb". */
    811 	msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
    812 			   msb, "");
    813 
    814 	LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
    815 	LLVMValueRef cond = LLVMBuildOr(ctx->builder,
    816 					LLVMBuildICmp(ctx->builder, LLVMIntEQ,
    817 						      src0, ctx->i32zero, ""),
    818 					LLVMBuildICmp(ctx->builder, LLVMIntEQ,
    819 						      src0, all_ones, ""), "");
    820 
    821 	return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, "");
    822 }
    823 
    824 static LLVMValueRef emit_ufind_msb(struct nir_to_llvm_context *ctx,
    825 				   LLVMValueRef src0)
    826 {
    827 	LLVMValueRef args[2] = {
    828 		src0,
    829 		ctx->i32one,
    830 	};
    831 	LLVMValueRef msb = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctlz.i32",
    832 					       ctx->i32, args, ARRAY_SIZE(args),
    833 					       AC_FUNC_ATTR_READNONE);
    834 
    835 	/* The HW returns the last bit index from MSB, but NIR wants
    836 	 * the index from LSB. Invert it by doing "31 - msb". */
    837 	msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
    838 			   msb, "");
    839 
    840 	return LLVMBuildSelect(ctx->builder,
    841 			       LLVMBuildICmp(ctx->builder, LLVMIntEQ, src0,
    842 					     ctx->i32zero, ""),
    843 			       LLVMConstInt(ctx->i32, -1, true), msb, "");
    844 }
    845 
    846 static LLVMValueRef emit_minmax_int(struct nir_to_llvm_context *ctx,
    847 				    LLVMIntPredicate pred,
    848 				    LLVMValueRef src0, LLVMValueRef src1)
    849 {
    850 	return LLVMBuildSelect(ctx->builder,
    851 			       LLVMBuildICmp(ctx->builder, pred, src0, src1, ""),
    852 			       src0,
    853 			       src1, "");
    854 
    855 }
    856 static LLVMValueRef emit_iabs(struct nir_to_llvm_context *ctx,
    857 			      LLVMValueRef src0)
    858 {
    859 	return emit_minmax_int(ctx, LLVMIntSGT, src0,
    860 			       LLVMBuildNeg(ctx->builder, src0, ""));
    861 }
    862 
    863 static LLVMValueRef emit_fsign(struct nir_to_llvm_context *ctx,
    864 			       LLVMValueRef src0)
    865 {
    866 	LLVMValueRef cmp, val;
    867 
    868 	cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, ctx->f32zero, "");
    869 	val = LLVMBuildSelect(ctx->builder, cmp, ctx->f32one, src0, "");
    870 	cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, ctx->f32zero, "");
    871 	val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(ctx->f32, -1.0), "");
    872 	return val;
    873 }
    874 
    875 static LLVMValueRef emit_isign(struct nir_to_llvm_context *ctx,
    876 			       LLVMValueRef src0)
    877 {
    878 	LLVMValueRef cmp, val;
    879 
    880 	cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, ctx->i32zero, "");
    881 	val = LLVMBuildSelect(ctx->builder, cmp, ctx->i32one, src0, "");
    882 	cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, ctx->i32zero, "");
    883 	val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(ctx->i32, -1, true), "");
    884 	return val;
    885 }
    886 
    887 static LLVMValueRef emit_ffract(struct nir_to_llvm_context *ctx,
    888 				LLVMValueRef src0)
    889 {
    890 	const char *intr = "llvm.floor.f32";
    891 	LLVMValueRef fsrc0 = to_float(ctx, src0);
    892 	LLVMValueRef params[] = {
    893 		fsrc0,
    894 	};
    895 	LLVMValueRef floor = ac_emit_llvm_intrinsic(&ctx->ac, intr,
    896 						 ctx->f32, params, 1,
    897 						 AC_FUNC_ATTR_READNONE);
    898 	return LLVMBuildFSub(ctx->builder, fsrc0, floor, "");
    899 }
    900 
    901 static LLVMValueRef emit_uint_carry(struct nir_to_llvm_context *ctx,
    902 				    const char *intrin,
    903 				    LLVMValueRef src0, LLVMValueRef src1)
    904 {
    905 	LLVMTypeRef ret_type;
    906 	LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
    907 	LLVMValueRef res;
    908 	LLVMValueRef params[] = { src0, src1 };
    909 	ret_type = LLVMStructTypeInContext(ctx->context, types,
    910 					   2, true);
    911 
    912 	res = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ret_type,
    913 				  params, 2, AC_FUNC_ATTR_READNONE);
    914 
    915 	res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
    916 	res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
    917 	return res;
    918 }
    919 
    920 static LLVMValueRef emit_b2f(struct nir_to_llvm_context *ctx,
    921 			     LLVMValueRef src0)
    922 {
    923 	return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
    924 }
    925 
    926 static LLVMValueRef emit_umul_high(struct nir_to_llvm_context *ctx,
    927 				   LLVMValueRef src0, LLVMValueRef src1)
    928 {
    929 	LLVMValueRef dst64, result;
    930 	src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
    931 	src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
    932 
    933 	dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
    934 	dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
    935 	result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
    936 	return result;
    937 }
    938 
    939 static LLVMValueRef emit_imul_high(struct nir_to_llvm_context *ctx,
    940 				   LLVMValueRef src0, LLVMValueRef src1)
    941 {
    942 	LLVMValueRef dst64, result;
    943 	src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
    944 	src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
    945 
    946 	dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
    947 	dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
    948 	result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
    949 	return result;
    950 }
    951 
    952 static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context *ctx,
    953 					  const char *intrin,
    954 					  LLVMValueRef srcs[3])
    955 {
    956 	LLVMValueRef result;
    957 	LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
    958 	result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3, AC_FUNC_ATTR_READNONE);
    959 
    960 	result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
    961 	return result;
    962 }
    963 
    964 static LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context *ctx,
    965 					 LLVMValueRef src0, LLVMValueRef src1,
    966 					 LLVMValueRef src2, LLVMValueRef src3)
    967 {
    968 	LLVMValueRef bfi_args[3], result;
    969 
    970 	bfi_args[0] = LLVMBuildShl(ctx->builder,
    971 				   LLVMBuildSub(ctx->builder,
    972 						LLVMBuildShl(ctx->builder,
    973 							     ctx->i32one,
    974 							     src3, ""),
    975 						ctx->i32one, ""),
    976 				   src2, "");
    977 	bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, "");
    978 	bfi_args[2] = src0;
    979 
    980 	LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), "");
    981 
    982 	/* Calculate:
    983 	 *   (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
    984 	 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
    985 	 */
    986 	result = LLVMBuildXor(ctx->builder, bfi_args[2],
    987 			      LLVMBuildAnd(ctx->builder, bfi_args[0],
    988 					   LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), "");
    989 
    990 	result = LLVMBuildSelect(ctx->builder, icond, src1, result, "");
    991 	return result;
    992 }
    993 
    994 static LLVMValueRef emit_pack_half_2x16(struct nir_to_llvm_context *ctx,
    995 					LLVMValueRef src0)
    996 {
    997 	LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
    998 	int i;
    999 	LLVMValueRef comp[2];
   1000 
   1001 	src0 = to_float(ctx, src0);
   1002 	comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, "");
   1003 	comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, "");
   1004 	for (i = 0; i < 2; i++) {
   1005 		comp[i] = LLVMBuildFPTrunc(ctx->builder, comp[i], ctx->f16, "");
   1006 		comp[i] = LLVMBuildBitCast(ctx->builder, comp[i], ctx->i16, "");
   1007 		comp[i] = LLVMBuildZExt(ctx->builder, comp[i], ctx->i32, "");
   1008 	}
   1009 
   1010 	comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
   1011 	comp[0] = LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
   1012 
   1013 	return comp[0];
   1014 }
   1015 
   1016 static LLVMValueRef emit_unpack_half_2x16(struct nir_to_llvm_context *ctx,
   1017 					  LLVMValueRef src0)
   1018 {
   1019 	LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
   1020 	LLVMValueRef temps[2], result, val;
   1021 	int i;
   1022 
   1023 	for (i = 0; i < 2; i++) {
   1024 		val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
   1025 		val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
   1026 		val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
   1027 		temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
   1028 	}
   1029 
   1030 	result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0],
   1031 					ctx->i32zero, "");
   1032 	result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
   1033 					ctx->i32one, "");
   1034 	return result;
   1035 }
   1036 
   1037 /**
   1038  * Set range metadata on an instruction.  This can only be used on load and
   1039  * call instructions.  If you know an instruction can only produce the values
   1040  * 0, 1, 2, you would do set_range_metadata(value, 0, 3);
   1041  * \p lo is the minimum value inclusive.
   1042  * \p hi is the maximum value exclusive.
   1043  */
   1044 static void set_range_metadata(struct nir_to_llvm_context *ctx,
   1045 			       LLVMValueRef value, unsigned lo, unsigned hi)
   1046 {
   1047 	LLVMValueRef range_md, md_args[2];
   1048 	LLVMTypeRef type = LLVMTypeOf(value);
   1049 	LLVMContextRef context = LLVMGetTypeContext(type);
   1050 
   1051 	md_args[0] = LLVMConstInt(type, lo, false);
   1052 	md_args[1] = LLVMConstInt(type, hi, false);
   1053 	range_md = LLVMMDNodeInContext(context, md_args, 2);
   1054 	LLVMSetMetadata(value, ctx->range_md_kind, range_md);
   1055 }
   1056 
   1057 static LLVMValueRef get_thread_id(struct nir_to_llvm_context *ctx)
   1058 {
   1059 	LLVMValueRef tid;
   1060 	LLVMValueRef tid_args[2];
   1061 	tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false);
   1062 	tid_args[1] = ctx->i32zero;
   1063 	tid_args[1] = ac_emit_llvm_intrinsic(&ctx->ac,
   1064 					  "llvm.amdgcn.mbcnt.lo", ctx->i32,
   1065 					  tid_args, 2, AC_FUNC_ATTR_READNONE);
   1066 
   1067 	tid = ac_emit_llvm_intrinsic(&ctx->ac,
   1068 				  "llvm.amdgcn.mbcnt.hi", ctx->i32,
   1069 				  tid_args, 2, AC_FUNC_ATTR_READNONE);
   1070 	set_range_metadata(ctx, tid, 0, 64);
   1071 	return tid;
   1072 }
   1073 
   1074 /*
   1075  * SI implements derivatives using the local data store (LDS)
   1076  * All writes to the LDS happen in all executing threads at
   1077  * the same time. TID is the Thread ID for the current
   1078  * thread and is a value between 0 and 63, representing
   1079  * the thread's position in the wavefront.
   1080  *
   1081  * For the pixel shader threads are grouped into quads of four pixels.
   1082  * The TIDs of the pixels of a quad are:
   1083  *
   1084  *  +------+------+
   1085  *  |4n + 0|4n + 1|
   1086  *  +------+------+
   1087  *  |4n + 2|4n + 3|
   1088  *  +------+------+
   1089  *
   1090  * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
   1091  * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
   1092  * the current pixel's column, and masking with 0xfffffffe yields the TID
   1093  * of the left pixel of the current pixel's row.
   1094  *
   1095  * Adding 1 yields the TID of the pixel to the right of the left pixel, and
   1096  * adding 2 yields the TID of the pixel below the top pixel.
   1097  */
   1098 /* masks for thread ID. */
   1099 #define TID_MASK_TOP_LEFT 0xfffffffc
   1100 #define TID_MASK_TOP      0xfffffffd
   1101 #define TID_MASK_LEFT     0xfffffffe
   1102 static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
   1103 			      nir_op op,
   1104 			      LLVMValueRef src0)
   1105 {
   1106 	LLVMValueRef tl, trbl, result;
   1107 	LLVMValueRef tl_tid, trbl_tid;
   1108 	LLVMValueRef args[2];
   1109 	LLVMValueRef thread_id;
   1110 	unsigned mask;
   1111 	int idx;
   1112 	ctx->has_ddxy = true;
   1113 
   1114 	if (!ctx->lds && !ctx->has_ds_bpermute)
   1115 		ctx->lds = LLVMAddGlobalInAddressSpace(ctx->module,
   1116 						       LLVMArrayType(ctx->i32, 64),
   1117 						       "ddxy_lds", LOCAL_ADDR_SPACE);
   1118 
   1119 	thread_id = get_thread_id(ctx);
   1120 	if (op == nir_op_fddx_fine || op == nir_op_fddx)
   1121 		mask = TID_MASK_LEFT;
   1122 	else if (op == nir_op_fddy_fine || op == nir_op_fddy)
   1123 		mask = TID_MASK_TOP;
   1124 	else
   1125 		mask = TID_MASK_TOP_LEFT;
   1126 
   1127 	tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
   1128 			      LLVMConstInt(ctx->i32, mask, false), "");
   1129 	/* for DDX we want to next X pixel, DDY next Y pixel. */
   1130 	if (op == nir_op_fddx_fine ||
   1131 	    op == nir_op_fddx_coarse ||
   1132 	    op == nir_op_fddx)
   1133 		idx = 1;
   1134 	else
   1135 		idx = 2;
   1136 
   1137 	trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
   1138 				LLVMConstInt(ctx->i32, idx, false), "");
   1139 
   1140 	if (ctx->has_ds_bpermute) {
   1141 		args[0] = LLVMBuildMul(ctx->builder, tl_tid,
   1142 				       LLVMConstInt(ctx->i32, 4, false), "");
   1143 		args[1] = src0;
   1144 		tl = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.ds.bpermute",
   1145 					 ctx->i32, args, 2,
   1146 					 AC_FUNC_ATTR_READNONE);
   1147 
   1148 		args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
   1149 				       LLVMConstInt(ctx->i32, 4, false), "");
   1150 		trbl = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.ds.bpermute",
   1151 					   ctx->i32, args, 2,
   1152 					   AC_FUNC_ATTR_READNONE);
   1153 	} else {
   1154 		LLVMValueRef store_ptr, load_ptr0, load_ptr1;
   1155 
   1156 		store_ptr = build_gep0(ctx, ctx->lds, thread_id);
   1157 		load_ptr0 = build_gep0(ctx, ctx->lds, tl_tid);
   1158 		load_ptr1 = build_gep0(ctx, ctx->lds, trbl_tid);
   1159 
   1160 		LLVMBuildStore(ctx->builder, src0, store_ptr);
   1161 		tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
   1162 		trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
   1163 	}
   1164 	tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
   1165 	trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
   1166 	result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
   1167 	return result;
   1168 }
   1169 
   1170 /*
   1171  * this takes an I,J coordinate pair,
   1172  * and works out the X and Y derivatives.
   1173  * it returns DDX(I), DDX(J), DDY(I), DDY(J).
   1174  */
   1175 static LLVMValueRef emit_ddxy_interp(
   1176 	struct nir_to_llvm_context *ctx,
   1177 	LLVMValueRef interp_ij)
   1178 {
   1179 	LLVMValueRef result[4], a;
   1180 	unsigned i;
   1181 
   1182 	for (i = 0; i < 2; i++) {
   1183 		a = LLVMBuildExtractElement(ctx->builder, interp_ij,
   1184 					    LLVMConstInt(ctx->i32, i, false), "");
   1185 		result[i] = emit_ddxy(ctx, nir_op_fddx, a);
   1186 		result[2+i] = emit_ddxy(ctx, nir_op_fddy, a);
   1187 	}
   1188 	return ac_build_gather_values(&ctx->ac, result, 4);
   1189 }
   1190 
   1191 static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
   1192 {
   1193 	LLVMValueRef src[4], result = NULL;
   1194 	unsigned num_components = instr->dest.dest.ssa.num_components;
   1195 	unsigned src_components;
   1196 
   1197 	assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
   1198 	switch (instr->op) {
   1199 	case nir_op_vec2:
   1200 	case nir_op_vec3:
   1201 	case nir_op_vec4:
   1202 		src_components = 1;
   1203 		break;
   1204 	case nir_op_pack_half_2x16:
   1205 		src_components = 2;
   1206 		break;
   1207 	case nir_op_unpack_half_2x16:
   1208 		src_components = 1;
   1209 		break;
   1210 	default:
   1211 		src_components = num_components;
   1212 		break;
   1213 	}
   1214 	for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
   1215 		src[i] = get_alu_src(ctx, instr->src[i], src_components);
   1216 
   1217 	switch (instr->op) {
   1218 	case nir_op_fmov:
   1219 	case nir_op_imov:
   1220 		result = src[0];
   1221 		break;
   1222 	case nir_op_fneg:
   1223 	        src[0] = to_float(ctx, src[0]);
   1224 		result = LLVMBuildFNeg(ctx->builder, src[0], "");
   1225 		break;
   1226 	case nir_op_ineg:
   1227 		result = LLVMBuildNeg(ctx->builder, src[0], "");
   1228 		break;
   1229 	case nir_op_inot:
   1230 		result = LLVMBuildNot(ctx->builder, src[0], "");
   1231 		break;
   1232 	case nir_op_iadd:
   1233 		result = LLVMBuildAdd(ctx->builder, src[0], src[1], "");
   1234 		break;
   1235 	case nir_op_fadd:
   1236 		src[0] = to_float(ctx, src[0]);
   1237 		src[1] = to_float(ctx, src[1]);
   1238 		result = LLVMBuildFAdd(ctx->builder, src[0], src[1], "");
   1239 		break;
   1240 	case nir_op_fsub:
   1241 		src[0] = to_float(ctx, src[0]);
   1242 		src[1] = to_float(ctx, src[1]);
   1243 		result = LLVMBuildFSub(ctx->builder, src[0], src[1], "");
   1244 		break;
   1245 	case nir_op_isub:
   1246 		result = LLVMBuildSub(ctx->builder, src[0], src[1], "");
   1247 		break;
   1248 	case nir_op_imul:
   1249 		result = LLVMBuildMul(ctx->builder, src[0], src[1], "");
   1250 		break;
   1251 	case nir_op_imod:
   1252 		result = LLVMBuildSRem(ctx->builder, src[0], src[1], "");
   1253 		break;
   1254 	case nir_op_umod:
   1255 		result = LLVMBuildURem(ctx->builder, src[0], src[1], "");
   1256 		break;
   1257 	case nir_op_fmod:
   1258 		src[0] = to_float(ctx, src[0]);
   1259 		src[1] = to_float(ctx, src[1]);
   1260 		result = ac_emit_fdiv(&ctx->ac, src[0], src[1]);
   1261 		result = emit_intrin_1f_param(ctx, "llvm.floor.f32", result);
   1262 		result = LLVMBuildFMul(ctx->builder, src[1] , result, "");
   1263 		result = LLVMBuildFSub(ctx->builder, src[0], result, "");
   1264 		break;
   1265 	case nir_op_frem:
   1266 		src[0] = to_float(ctx, src[0]);
   1267 		src[1] = to_float(ctx, src[1]);
   1268 		result = LLVMBuildFRem(ctx->builder, src[0], src[1], "");
   1269 		break;
   1270 	case nir_op_irem:
   1271 		result = LLVMBuildSRem(ctx->builder, src[0], src[1], "");
   1272 		break;
   1273 	case nir_op_idiv:
   1274 		result = LLVMBuildSDiv(ctx->builder, src[0], src[1], "");
   1275 		break;
   1276 	case nir_op_udiv:
   1277 		result = LLVMBuildUDiv(ctx->builder, src[0], src[1], "");
   1278 		break;
   1279 	case nir_op_fmul:
   1280 		src[0] = to_float(ctx, src[0]);
   1281 		src[1] = to_float(ctx, src[1]);
   1282 		result = LLVMBuildFMul(ctx->builder, src[0], src[1], "");
   1283 		break;
   1284 	case nir_op_fdiv:
   1285 		src[0] = to_float(ctx, src[0]);
   1286 		src[1] = to_float(ctx, src[1]);
   1287 		result = ac_emit_fdiv(&ctx->ac, src[0], src[1]);
   1288 		break;
   1289 	case nir_op_frcp:
   1290 		src[0] = to_float(ctx, src[0]);
   1291 		result = ac_emit_fdiv(&ctx->ac, ctx->f32one, src[0]);
   1292 		break;
   1293 	case nir_op_iand:
   1294 		result = LLVMBuildAnd(ctx->builder, src[0], src[1], "");
   1295 		break;
   1296 	case nir_op_ior:
   1297 		result = LLVMBuildOr(ctx->builder, src[0], src[1], "");
   1298 		break;
   1299 	case nir_op_ixor:
   1300 		result = LLVMBuildXor(ctx->builder, src[0], src[1], "");
   1301 		break;
   1302 	case nir_op_ishl:
   1303 		result = LLVMBuildShl(ctx->builder, src[0], src[1], "");
   1304 		break;
   1305 	case nir_op_ishr:
   1306 		result = LLVMBuildAShr(ctx->builder, src[0], src[1], "");
   1307 		break;
   1308 	case nir_op_ushr:
   1309 		result = LLVMBuildLShr(ctx->builder, src[0], src[1], "");
   1310 		break;
   1311 	case nir_op_ilt:
   1312 		result = emit_int_cmp(ctx, LLVMIntSLT, src[0], src[1]);
   1313 		break;
   1314 	case nir_op_ine:
   1315 		result = emit_int_cmp(ctx, LLVMIntNE, src[0], src[1]);
   1316 		break;
   1317 	case nir_op_ieq:
   1318 		result = emit_int_cmp(ctx, LLVMIntEQ, src[0], src[1]);
   1319 		break;
   1320 	case nir_op_ige:
   1321 		result = emit_int_cmp(ctx, LLVMIntSGE, src[0], src[1]);
   1322 		break;
   1323 	case nir_op_ult:
   1324 		result = emit_int_cmp(ctx, LLVMIntULT, src[0], src[1]);
   1325 		break;
   1326 	case nir_op_uge:
   1327 		result = emit_int_cmp(ctx, LLVMIntUGE, src[0], src[1]);
   1328 		break;
   1329 	case nir_op_feq:
   1330 		result = emit_float_cmp(ctx, LLVMRealUEQ, src[0], src[1]);
   1331 		break;
   1332 	case nir_op_fne:
   1333 		result = emit_float_cmp(ctx, LLVMRealUNE, src[0], src[1]);
   1334 		break;
   1335 	case nir_op_flt:
   1336 		result = emit_float_cmp(ctx, LLVMRealULT, src[0], src[1]);
   1337 		break;
   1338 	case nir_op_fge:
   1339 		result = emit_float_cmp(ctx, LLVMRealUGE, src[0], src[1]);
   1340 		break;
   1341 	case nir_op_fabs:
   1342 		result = emit_intrin_1f_param(ctx, "llvm.fabs.f32", src[0]);
   1343 		break;
   1344 	case nir_op_iabs:
   1345 		result = emit_iabs(ctx, src[0]);
   1346 		break;
   1347 	case nir_op_imax:
   1348 		result = emit_minmax_int(ctx, LLVMIntSGT, src[0], src[1]);
   1349 		break;
   1350 	case nir_op_imin:
   1351 		result = emit_minmax_int(ctx, LLVMIntSLT, src[0], src[1]);
   1352 		break;
   1353 	case nir_op_umax:
   1354 		result = emit_minmax_int(ctx, LLVMIntUGT, src[0], src[1]);
   1355 		break;
   1356 	case nir_op_umin:
   1357 		result = emit_minmax_int(ctx, LLVMIntULT, src[0], src[1]);
   1358 		break;
   1359 	case nir_op_isign:
   1360 		result = emit_isign(ctx, src[0]);
   1361 		break;
   1362 	case nir_op_fsign:
   1363 		src[0] = to_float(ctx, src[0]);
   1364 		result = emit_fsign(ctx, src[0]);
   1365 		break;
   1366 	case nir_op_ffloor:
   1367 		result = emit_intrin_1f_param(ctx, "llvm.floor.f32", src[0]);
   1368 		break;
   1369 	case nir_op_ftrunc:
   1370 		result = emit_intrin_1f_param(ctx, "llvm.trunc.f32", src[0]);
   1371 		break;
   1372 	case nir_op_fceil:
   1373 		result = emit_intrin_1f_param(ctx, "llvm.ceil.f32", src[0]);
   1374 		break;
   1375 	case nir_op_fround_even:
   1376 		result = emit_intrin_1f_param(ctx, "llvm.rint.f32", src[0]);
   1377 		break;
   1378 	case nir_op_ffract:
   1379 		result = emit_ffract(ctx, src[0]);
   1380 		break;
   1381 	case nir_op_fsin:
   1382 		result = emit_intrin_1f_param(ctx, "llvm.sin.f32", src[0]);
   1383 		break;
   1384 	case nir_op_fcos:
   1385 		result = emit_intrin_1f_param(ctx, "llvm.cos.f32", src[0]);
   1386 		break;
   1387 	case nir_op_fsqrt:
   1388 		result = emit_intrin_1f_param(ctx, "llvm.sqrt.f32", src[0]);
   1389 		break;
   1390 	case nir_op_fexp2:
   1391 		result = emit_intrin_1f_param(ctx, "llvm.exp2.f32", src[0]);
   1392 		break;
   1393 	case nir_op_flog2:
   1394 		result = emit_intrin_1f_param(ctx, "llvm.log2.f32", src[0]);
   1395 		break;
   1396 	case nir_op_frsq:
   1397 		result = emit_intrin_1f_param(ctx, "llvm.sqrt.f32", src[0]);
   1398 		result = ac_emit_fdiv(&ctx->ac, ctx->f32one, result);
   1399 		break;
   1400 	case nir_op_fpow:
   1401 		result = emit_intrin_2f_param(ctx, "llvm.pow.f32", src[0], src[1]);
   1402 		break;
   1403 	case nir_op_fmax:
   1404 		result = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", src[0], src[1]);
   1405 		break;
   1406 	case nir_op_fmin:
   1407 		result = emit_intrin_2f_param(ctx, "llvm.minnum.f32", src[0], src[1]);
   1408 		break;
   1409 	case nir_op_ffma:
   1410 		result = emit_intrin_3f_param(ctx, "llvm.fma.f32", src[0], src[1], src[2]);
   1411 		break;
   1412 	case nir_op_ibitfield_extract:
   1413 		result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32", src);
   1414 		break;
   1415 	case nir_op_ubitfield_extract:
   1416 		result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32", src);
   1417 		break;
   1418 	case nir_op_bitfield_insert:
   1419 		result = emit_bitfield_insert(ctx, src[0], src[1], src[2], src[3]);
   1420 		break;
   1421 	case nir_op_bitfield_reverse:
   1422 		result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
   1423 		break;
   1424 	case nir_op_bit_count:
   1425 		result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
   1426 		break;
   1427 	case nir_op_vec2:
   1428 	case nir_op_vec3:
   1429 	case nir_op_vec4:
   1430 		for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
   1431 			src[i] = to_integer(ctx, src[i]);
   1432 		result = ac_build_gather_values(&ctx->ac, src, num_components);
   1433 		break;
   1434 	case nir_op_f2i:
   1435 		src[0] = to_float(ctx, src[0]);
   1436 		result = LLVMBuildFPToSI(ctx->builder, src[0], ctx->i32, "");
   1437 		break;
   1438 	case nir_op_f2u:
   1439 		src[0] = to_float(ctx, src[0]);
   1440 		result = LLVMBuildFPToUI(ctx->builder, src[0], ctx->i32, "");
   1441 		break;
   1442 	case nir_op_i2f:
   1443 		result = LLVMBuildSIToFP(ctx->builder, src[0], ctx->f32, "");
   1444 		break;
   1445 	case nir_op_u2f:
   1446 		result = LLVMBuildUIToFP(ctx->builder, src[0], ctx->f32, "");
   1447 		break;
   1448 	case nir_op_bcsel:
   1449 		result = emit_bcsel(ctx, src[0], src[1], src[2]);
   1450 		break;
   1451 	case nir_op_find_lsb:
   1452 		result = emit_find_lsb(ctx, src[0]);
   1453 		break;
   1454 	case nir_op_ufind_msb:
   1455 		result = emit_ufind_msb(ctx, src[0]);
   1456 		break;
   1457 	case nir_op_ifind_msb:
   1458 		result = emit_ifind_msb(ctx, src[0]);
   1459 		break;
   1460 	case nir_op_uadd_carry:
   1461 		result = emit_uint_carry(ctx, "llvm.uadd.with.overflow.i32", src[0], src[1]);
   1462 		break;
   1463 	case nir_op_usub_borrow:
   1464 		result = emit_uint_carry(ctx, "llvm.usub.with.overflow.i32", src[0], src[1]);
   1465 		break;
   1466 	case nir_op_b2f:
   1467 		result = emit_b2f(ctx, src[0]);
   1468 		break;
   1469 	case nir_op_fquantize2f16:
   1470 		src[0] = to_float(ctx, src[0]);
   1471 		result = LLVMBuildFPTrunc(ctx->builder, src[0], ctx->f16, "");
   1472 		/* need to convert back up to f32 */
   1473 		result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
   1474 		break;
   1475 	case nir_op_umul_high:
   1476 		result = emit_umul_high(ctx, src[0], src[1]);
   1477 		break;
   1478 	case nir_op_imul_high:
   1479 		result = emit_imul_high(ctx, src[0], src[1]);
   1480 		break;
   1481 	case nir_op_pack_half_2x16:
   1482 		result = emit_pack_half_2x16(ctx, src[0]);
   1483 		break;
   1484 	case nir_op_unpack_half_2x16:
   1485 		result = emit_unpack_half_2x16(ctx, src[0]);
   1486 		break;
   1487 	case nir_op_fddx:
   1488 	case nir_op_fddy:
   1489 	case nir_op_fddx_fine:
   1490 	case nir_op_fddy_fine:
   1491 	case nir_op_fddx_coarse:
   1492 	case nir_op_fddy_coarse:
   1493 		result = emit_ddxy(ctx, instr->op, src[0]);
   1494 		break;
   1495 	default:
   1496 		fprintf(stderr, "Unknown NIR alu instr: ");
   1497 		nir_print_instr(&instr->instr, stderr);
   1498 		fprintf(stderr, "\n");
   1499 		abort();
   1500 	}
   1501 
   1502 	if (result) {
   1503 		assert(instr->dest.dest.is_ssa);
   1504 		result = to_integer(ctx, result);
   1505 		_mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa,
   1506 		                        result);
   1507 	}
   1508 }
   1509 
   1510 static void visit_load_const(struct nir_to_llvm_context *ctx,
   1511                              nir_load_const_instr *instr)
   1512 {
   1513 	LLVMValueRef values[4], value = NULL;
   1514 	LLVMTypeRef element_type =
   1515 	    LLVMIntTypeInContext(ctx->context, instr->def.bit_size);
   1516 
   1517 	for (unsigned i = 0; i < instr->def.num_components; ++i) {
   1518 		switch (instr->def.bit_size) {
   1519 		case 32:
   1520 			values[i] = LLVMConstInt(element_type,
   1521 			                         instr->value.u32[i], false);
   1522 			break;
   1523 		case 64:
   1524 			values[i] = LLVMConstInt(element_type,
   1525 			                         instr->value.u64[i], false);
   1526 			break;
   1527 		default:
   1528 			fprintf(stderr,
   1529 			        "unsupported nir load_const bit_size: %d\n",
   1530 			        instr->def.bit_size);
   1531 			abort();
   1532 		}
   1533 	}
   1534 	if (instr->def.num_components > 1) {
   1535 		value = LLVMConstVector(values, instr->def.num_components);
   1536 	} else
   1537 		value = values[0];
   1538 
   1539 	_mesa_hash_table_insert(ctx->defs, &instr->def, value);
   1540 }
   1541 
   1542 static LLVMValueRef cast_ptr(struct nir_to_llvm_context *ctx, LLVMValueRef ptr,
   1543                              LLVMTypeRef type)
   1544 {
   1545 	int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
   1546 	return LLVMBuildBitCast(ctx->builder, ptr,
   1547 	                        LLVMPointerType(type, addr_space), "");
   1548 }
   1549 
   1550 static LLVMValueRef
   1551 get_buffer_size(struct nir_to_llvm_context *ctx, LLVMValueRef descriptor, bool in_elements)
   1552 {
   1553 	LLVMValueRef size =
   1554 		LLVMBuildExtractElement(ctx->builder, descriptor,
   1555 					LLVMConstInt(ctx->i32, 2, false), "");
   1556 
   1557 	/* VI only */
   1558 	if (ctx->options->chip_class >= VI && in_elements) {
   1559 		/* On VI, the descriptor contains the size in bytes,
   1560 		 * but TXQ must return the size in elements.
   1561 		 * The stride is always non-zero for resources using TXQ.
   1562 		 */
   1563 		LLVMValueRef stride =
   1564 			LLVMBuildExtractElement(ctx->builder, descriptor,
   1565 						LLVMConstInt(ctx->i32, 1, false), "");
   1566 		stride = LLVMBuildLShr(ctx->builder, stride,
   1567 				       LLVMConstInt(ctx->i32, 16, false), "");
   1568 		stride = LLVMBuildAnd(ctx->builder, stride,
   1569 				      LLVMConstInt(ctx->i32, 0x3fff, false), "");
   1570 
   1571 		size = LLVMBuildUDiv(ctx->builder, size, stride, "");
   1572 	}
   1573 	return size;
   1574 }
   1575 
   1576 /**
   1577  * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
   1578  * intrinsic names).
   1579  */
   1580 static void build_int_type_name(
   1581 	LLVMTypeRef type,
   1582 	char *buf, unsigned bufsize)
   1583 {
   1584 	assert(bufsize >= 6);
   1585 
   1586 	if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
   1587 		snprintf(buf, bufsize, "v%ui32",
   1588 			 LLVMGetVectorSize(type));
   1589 	else
   1590 		strcpy(buf, "i32");
   1591 }
   1592 
   1593 static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
   1594 					       struct ac_tex_info *tinfo,
   1595 					       nir_tex_instr *instr,
   1596 					       const char *intr_name,
   1597 					       unsigned coord_vgpr_index)
   1598 {
   1599 	LLVMValueRef coord = tinfo->args[0];
   1600 	LLVMValueRef half_texel[2];
   1601 	int c;
   1602 
   1603 	//TODO Rect
   1604 	{
   1605 		LLVMValueRef txq_args[10];
   1606 		int txq_arg_count = 0;
   1607 		LLVMValueRef size;
   1608 		bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
   1609 		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, false);
   1610 		txq_args[txq_arg_count++] = tinfo->args[1];
   1611 		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0xf, 0); /* dmask */
   1612 		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* unorm */
   1613 		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
   1614 		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0);
   1615 		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
   1616 		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
   1617 		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
   1618 		txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
   1619 		size = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
   1620 					   txq_args, txq_arg_count,
   1621 					   AC_FUNC_ATTR_READNONE);
   1622 
   1623 		for (c = 0; c < 2; c++) {
   1624 			half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
   1625 								LLVMConstInt(ctx->i32, c, false), "");
   1626 			half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
   1627 			half_texel[c] = ac_emit_fdiv(&ctx->ac, ctx->f32one, half_texel[c]);
   1628 			half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
   1629 						      LLVMConstReal(ctx->f32, -0.5), "");
   1630 		}
   1631 	}
   1632 
   1633 	for (c = 0; c < 2; c++) {
   1634 		LLVMValueRef tmp;
   1635 		LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
   1636 		tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
   1637 		tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
   1638 		tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
   1639 		tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
   1640 		coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
   1641 	}
   1642 
   1643 	tinfo->args[0] = coord;
   1644 	return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
   1645 				   AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
   1646 
   1647 }
   1648 
   1649 static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
   1650 					nir_tex_instr *instr,
   1651 					struct ac_tex_info *tinfo)
   1652 {
   1653 	const char *name = "llvm.SI.image.sample";
   1654 	const char *infix = "";
   1655 	char intr_name[127];
   1656 	char type[64];
   1657 	bool is_shadow = instr->is_shadow;
   1658 	bool has_offset = tinfo->has_offset;
   1659 	switch (instr->op) {
   1660 	case nir_texop_txf:
   1661 	case nir_texop_txf_ms:
   1662 	case nir_texop_samples_identical:
   1663 		name = instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? "llvm.SI.image.load" :
   1664 		       instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? "llvm.SI.vs.load.input" :
   1665 			"llvm.SI.image.load.mip";
   1666 		is_shadow = false;
   1667 		has_offset = false;
   1668 		break;
   1669 	case nir_texop_txb:
   1670 		infix = ".b";
   1671 		break;
   1672 	case nir_texop_txl:
   1673 		infix = ".l";
   1674 		break;
   1675 	case nir_texop_txs:
   1676 		name = "llvm.SI.getresinfo";
   1677 		break;
   1678 	case nir_texop_query_levels:
   1679 		name = "llvm.SI.getresinfo";
   1680 		break;
   1681 	case nir_texop_tex:
   1682 		if (ctx->stage != MESA_SHADER_FRAGMENT)
   1683 			infix = ".lz";
   1684 		break;
   1685 	case nir_texop_txd:
   1686 		infix = ".d";
   1687 		break;
   1688 	case nir_texop_tg4:
   1689 		name = "llvm.SI.gather4";
   1690 		infix = ".lz";
   1691 		break;
   1692 	case nir_texop_lod:
   1693 		name = "llvm.SI.getlod";
   1694 		is_shadow = false;
   1695 		has_offset = false;
   1696 		break;
   1697 	default:
   1698 		break;
   1699 	}
   1700 
   1701 	build_int_type_name(LLVMTypeOf(tinfo->args[0]), type, sizeof(type));
   1702 	sprintf(intr_name, "%s%s%s%s.%s", name, is_shadow ? ".c" : "", infix,
   1703 		has_offset ? ".o" : "", type);
   1704 
   1705 	if (instr->op == nir_texop_tg4) {
   1706 		enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
   1707 		if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
   1708 			return radv_lower_gather4_integer(ctx, tinfo, instr, intr_name,
   1709 							  (int)has_offset + (int)is_shadow);
   1710 		}
   1711 	}
   1712 	return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
   1713 				   AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
   1714 
   1715 }
   1716 
   1717 static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
   1718                                                 nir_intrinsic_instr *instr)
   1719 {
   1720 	LLVMValueRef index = get_src(ctx, instr->src[0]);
   1721 	unsigned desc_set = nir_intrinsic_desc_set(instr);
   1722 	unsigned binding = nir_intrinsic_binding(instr);
   1723 	LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
   1724 	struct radv_pipeline_layout *pipeline_layout = ctx->options->layout;
   1725 	struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
   1726 	unsigned base_offset = layout->binding[binding].offset;
   1727 	LLVMValueRef offset, stride;
   1728 
   1729 	if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
   1730 	    layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
   1731 		unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start +
   1732 			layout->binding[binding].dynamic_offset_offset;
   1733 		desc_ptr = ctx->push_constants;
   1734 		base_offset = pipeline_layout->push_constant_size + 16 * idx;
   1735 		stride = LLVMConstInt(ctx->i32, 16, false);
   1736 	} else
   1737 		stride = LLVMConstInt(ctx->i32, layout->binding[binding].size, false);
   1738 
   1739 	offset = LLVMConstInt(ctx->i32, base_offset, false);
   1740 	index = LLVMBuildMul(ctx->builder, index, stride, "");
   1741 	offset = LLVMBuildAdd(ctx->builder, offset, index, "");
   1742 
   1743 	desc_ptr = build_gep0(ctx, desc_ptr, offset);
   1744 	desc_ptr = cast_ptr(ctx, desc_ptr, ctx->v4i32);
   1745 	LLVMSetMetadata(desc_ptr, ctx->uniform_md_kind, ctx->empty_md);
   1746 
   1747 	return LLVMBuildLoad(ctx->builder, desc_ptr, "");
   1748 }
   1749 
   1750 static LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx,
   1751                                              nir_intrinsic_instr *instr)
   1752 {
   1753 	LLVMValueRef ptr, addr;
   1754 
   1755 	addr = LLVMConstInt(ctx->i32, nir_intrinsic_base(instr), 0);
   1756 	addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx, instr->src[0]), "");
   1757 
   1758 	ptr = build_gep0(ctx, ctx->push_constants, addr);
   1759 	ptr = cast_ptr(ctx, ptr, get_def_type(ctx, &instr->dest.ssa));
   1760 
   1761 	return LLVMBuildLoad(ctx->builder, ptr, "");
   1762 }
   1763 
   1764 static LLVMValueRef visit_get_buffer_size(struct nir_to_llvm_context *ctx,
   1765                                           nir_intrinsic_instr *instr)
   1766 {
   1767 	LLVMValueRef desc = get_src(ctx, instr->src[0]);
   1768 
   1769 	return get_buffer_size(ctx, desc, false);
   1770 }
   1771 static void visit_store_ssbo(struct nir_to_llvm_context *ctx,
   1772                              nir_intrinsic_instr *instr)
   1773 {
   1774 	const char *store_name;
   1775 	LLVMTypeRef data_type = ctx->f32;
   1776 	unsigned writemask = nir_intrinsic_write_mask(instr);
   1777 	LLVMValueRef base_data, base_offset;
   1778 	LLVMValueRef params[6];
   1779 
   1780 	if (ctx->stage == MESA_SHADER_FRAGMENT)
   1781 		ctx->shader_info->fs.writes_memory = true;
   1782 
   1783 	params[1] = get_src(ctx, instr->src[1]);
   1784 	params[2] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
   1785 	params[4] = LLVMConstInt(ctx->i1, 0, false);  /* glc */
   1786 	params[5] = LLVMConstInt(ctx->i1, 0, false);  /* slc */
   1787 
   1788 	if (instr->num_components > 1)
   1789 		data_type = LLVMVectorType(ctx->f32, instr->num_components);
   1790 
   1791 	base_data = to_float(ctx, get_src(ctx, instr->src[0]));
   1792 	base_data = trim_vector(ctx, base_data, instr->num_components);
   1793 	base_data = LLVMBuildBitCast(ctx->builder, base_data,
   1794 				     data_type, "");
   1795 	base_offset = get_src(ctx, instr->src[2]);      /* voffset */
   1796 	while (writemask) {
   1797 		int start, count;
   1798 		LLVMValueRef data;
   1799 		LLVMValueRef offset;
   1800 		LLVMValueRef tmp;
   1801 		u_bit_scan_consecutive_range(&writemask, &start, &count);
   1802 
   1803 		/* Due to an LLVM limitation, split 3-element writes
   1804 		 * into a 2-element and a 1-element write. */
   1805 		if (count == 3) {
   1806 			writemask |= 1 << (start + 2);
   1807 			count = 2;
   1808 		}
   1809 
   1810 		if (count == 4) {
   1811 			store_name = "llvm.amdgcn.buffer.store.v4f32";
   1812 			data = base_data;
   1813 		} else if (count == 2) {
   1814 			tmp = LLVMBuildExtractElement(ctx->builder,
   1815 						      base_data, LLVMConstInt(ctx->i32, start, false), "");
   1816 			data = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), tmp,
   1817 						      ctx->i32zero, "");
   1818 
   1819 			tmp = LLVMBuildExtractElement(ctx->builder,
   1820 						      base_data, LLVMConstInt(ctx->i32, start + 1, false), "");
   1821 			data = LLVMBuildInsertElement(ctx->builder, data, tmp,
   1822 						      ctx->i32one, "");
   1823 			store_name = "llvm.amdgcn.buffer.store.v2f32";
   1824 
   1825 		} else {
   1826 			assert(count == 1);
   1827 			if (get_llvm_num_components(base_data) > 1)
   1828 				data = LLVMBuildExtractElement(ctx->builder, base_data,
   1829 							       LLVMConstInt(ctx->i32, start, false), "");
   1830 			else
   1831 				data = base_data;
   1832 			store_name = "llvm.amdgcn.buffer.store.f32";
   1833 		}
   1834 
   1835 		offset = base_offset;
   1836 		if (start != 0) {
   1837 			offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, start * 4, false), "");
   1838 		}
   1839 		params[0] = data;
   1840 		params[3] = offset;
   1841 		ac_emit_llvm_intrinsic(&ctx->ac, store_name,
   1842 				       ctx->voidt, params, 6, 0);
   1843 	}
   1844 }
   1845 
   1846 static LLVMValueRef visit_atomic_ssbo(struct nir_to_llvm_context *ctx,
   1847                                       nir_intrinsic_instr *instr)
   1848 {
   1849 	const char *name;
   1850 	LLVMValueRef params[6];
   1851 	int arg_count = 0;
   1852 	if (ctx->stage == MESA_SHADER_FRAGMENT)
   1853 		ctx->shader_info->fs.writes_memory = true;
   1854 
   1855 	if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
   1856 		params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[3]), 0);
   1857 	}
   1858 	params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[2]), 0);
   1859 	params[arg_count++] = get_src(ctx, instr->src[0]);
   1860 	params[arg_count++] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
   1861 	params[arg_count++] = get_src(ctx, instr->src[1]);      /* voffset */
   1862 	params[arg_count++] = LLVMConstInt(ctx->i1, 0, false);  /* slc */
   1863 
   1864 	switch (instr->intrinsic) {
   1865 	case nir_intrinsic_ssbo_atomic_add:
   1866 		name = "llvm.amdgcn.buffer.atomic.add";
   1867 		break;
   1868 	case nir_intrinsic_ssbo_atomic_imin:
   1869 		name = "llvm.amdgcn.buffer.atomic.smin";
   1870 		break;
   1871 	case nir_intrinsic_ssbo_atomic_umin:
   1872 		name = "llvm.amdgcn.buffer.atomic.umin";
   1873 		break;
   1874 	case nir_intrinsic_ssbo_atomic_imax:
   1875 		name = "llvm.amdgcn.buffer.atomic.smax";
   1876 		break;
   1877 	case nir_intrinsic_ssbo_atomic_umax:
   1878 		name = "llvm.amdgcn.buffer.atomic.umax";
   1879 		break;
   1880 	case nir_intrinsic_ssbo_atomic_and:
   1881 		name = "llvm.amdgcn.buffer.atomic.and";
   1882 		break;
   1883 	case nir_intrinsic_ssbo_atomic_or:
   1884 		name = "llvm.amdgcn.buffer.atomic.or";
   1885 		break;
   1886 	case nir_intrinsic_ssbo_atomic_xor:
   1887 		name = "llvm.amdgcn.buffer.atomic.xor";
   1888 		break;
   1889 	case nir_intrinsic_ssbo_atomic_exchange:
   1890 		name = "llvm.amdgcn.buffer.atomic.swap";
   1891 		break;
   1892 	case nir_intrinsic_ssbo_atomic_comp_swap:
   1893 		name = "llvm.amdgcn.buffer.atomic.cmpswap";
   1894 		break;
   1895 	default:
   1896 		abort();
   1897 	}
   1898 
   1899 	return ac_emit_llvm_intrinsic(&ctx->ac, name, ctx->i32, params, arg_count, 0);
   1900 }
   1901 
   1902 static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx,
   1903                                       nir_intrinsic_instr *instr)
   1904 {
   1905 	const char *load_name;
   1906 	LLVMTypeRef data_type = ctx->f32;
   1907 	if (instr->num_components == 3)
   1908 		data_type = LLVMVectorType(ctx->f32, 4);
   1909 	else if (instr->num_components > 1)
   1910 		data_type = LLVMVectorType(ctx->f32, instr->num_components);
   1911 
   1912 	if (instr->num_components == 4 || instr->num_components == 3)
   1913 		load_name = "llvm.amdgcn.buffer.load.v4f32";
   1914 	else if (instr->num_components == 2)
   1915 		load_name = "llvm.amdgcn.buffer.load.v2f32";
   1916 	else if (instr->num_components == 1)
   1917 		load_name = "llvm.amdgcn.buffer.load.f32";
   1918 	else
   1919 		abort();
   1920 
   1921 	LLVMValueRef params[] = {
   1922 	    get_src(ctx, instr->src[0]),
   1923 	    LLVMConstInt(ctx->i32, 0, false),
   1924 	    get_src(ctx, instr->src[1]),
   1925 	    LLVMConstInt(ctx->i1, 0, false),
   1926 	    LLVMConstInt(ctx->i1, 0, false),
   1927 	};
   1928 
   1929 	LLVMValueRef ret =
   1930 	    ac_emit_llvm_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
   1931 
   1932 	if (instr->num_components == 3)
   1933 		ret = trim_vector(ctx, ret, 3);
   1934 
   1935 	return LLVMBuildBitCast(ctx->builder, ret,
   1936 	                        get_def_type(ctx, &instr->dest.ssa), "");
   1937 }
   1938 
   1939 static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx,
   1940                                           nir_intrinsic_instr *instr)
   1941 {
   1942 	LLVMValueRef results[4], ret;
   1943 	LLVMValueRef rsrc = get_src(ctx, instr->src[0]);
   1944 	LLVMValueRef offset = get_src(ctx, instr->src[1]);
   1945 
   1946 	rsrc = LLVMBuildBitCast(ctx->builder, rsrc, LLVMVectorType(ctx->i8, 16), "");
   1947 
   1948 	for (unsigned i = 0; i < instr->num_components; ++i) {
   1949 		LLVMValueRef params[] = {
   1950 			rsrc,
   1951 			LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0),
   1952 				     offset, "")
   1953 		};
   1954 		results[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.load.const", ctx->f32,
   1955 						 params, 2, AC_FUNC_ATTR_READNONE);
   1956 	}
   1957 
   1958 
   1959 	ret = ac_build_gather_values(&ctx->ac, results, instr->num_components);
   1960 	return LLVMBuildBitCast(ctx->builder, ret,
   1961 	                        get_def_type(ctx, &instr->dest.ssa), "");
   1962 }
   1963 
   1964 static void
   1965 radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail,
   1966                       bool vs_in, unsigned *const_out, LLVMValueRef *indir_out)
   1967 {
   1968 	unsigned const_offset = 0;
   1969 	LLVMValueRef offset = NULL;
   1970 
   1971 
   1972 	while (tail->child != NULL) {
   1973 		const struct glsl_type *parent_type = tail->type;
   1974 		tail = tail->child;
   1975 
   1976 		if (tail->deref_type == nir_deref_type_array) {
   1977 			nir_deref_array *deref_array = nir_deref_as_array(tail);
   1978 			LLVMValueRef index, stride, local_offset;
   1979 			unsigned size = glsl_count_attribute_slots(tail->type, vs_in);
   1980 
   1981 			const_offset += size * deref_array->base_offset;
   1982 			if (deref_array->deref_array_type == nir_deref_array_type_direct)
   1983 				continue;
   1984 
   1985 			assert(deref_array->deref_array_type == nir_deref_array_type_indirect);
   1986 			index = get_src(ctx, deref_array->indirect);
   1987 			stride = LLVMConstInt(ctx->i32, size, 0);
   1988 			local_offset = LLVMBuildMul(ctx->builder, stride, index, "");
   1989 
   1990 			if (offset)
   1991 				offset = LLVMBuildAdd(ctx->builder, offset, local_offset, "");
   1992 			else
   1993 				offset = local_offset;
   1994 		} else if (tail->deref_type == nir_deref_type_struct) {
   1995 			nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
   1996 
   1997 			for (unsigned i = 0; i < deref_struct->index; i++) {
   1998 				const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
   1999 				const_offset += glsl_count_attribute_slots(ft, vs_in);
   2000 			}
   2001 		} else
   2002 			unreachable("unsupported deref type");
   2003 
   2004 	}
   2005 
   2006 	if (const_offset && offset)
   2007 		offset = LLVMBuildAdd(ctx->builder, offset,
   2008 				      LLVMConstInt(ctx->i32, const_offset, 0),
   2009 				      "");
   2010 
   2011 	*const_out = const_offset;
   2012 	*indir_out = offset;
   2013 }
   2014 
   2015 static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
   2016 				   nir_intrinsic_instr *instr)
   2017 {
   2018 	LLVMValueRef values[4];
   2019 	int idx = instr->variables[0]->var->data.driver_location;
   2020 	int ve = instr->dest.ssa.num_components;
   2021 	LLVMValueRef indir_index;
   2022 	unsigned const_index;
   2023 	switch (instr->variables[0]->var->data.mode) {
   2024 	case nir_var_shader_in:
   2025 		radv_get_deref_offset(ctx, &instr->variables[0]->deref,
   2026 				      ctx->stage == MESA_SHADER_VERTEX,
   2027 				      &const_index, &indir_index);
   2028 		for (unsigned chan = 0; chan < ve; chan++) {
   2029 			if (indir_index) {
   2030 				unsigned count = glsl_count_attribute_slots(
   2031 						instr->variables[0]->var->type,
   2032 						ctx->stage == MESA_SHADER_VERTEX);
   2033 				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
   2034 						&ctx->ac, ctx->inputs + idx + chan, count,
   2035 						4, false);
   2036 
   2037 				values[chan] = LLVMBuildExtractElement(ctx->builder,
   2038 								       tmp_vec,
   2039 								       indir_index, "");
   2040 			} else
   2041 				values[chan] = ctx->inputs[idx + chan + const_index * 4];
   2042 		}
   2043 		return to_integer(ctx, ac_build_gather_values(&ctx->ac, values, ve));
   2044 		break;
   2045 	case nir_var_local:
   2046 		radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
   2047 				      &const_index, &indir_index);
   2048 		for (unsigned chan = 0; chan < ve; chan++) {
   2049 			if (indir_index) {
   2050 				unsigned count = glsl_count_attribute_slots(
   2051 					instr->variables[0]->var->type, false);
   2052 				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
   2053 						&ctx->ac, ctx->locals + idx + chan, count,
   2054 						4, true);
   2055 
   2056 				values[chan] = LLVMBuildExtractElement(ctx->builder,
   2057 								       tmp_vec,
   2058 								       indir_index, "");
   2059 			} else {
   2060 				values[chan] = LLVMBuildLoad(ctx->builder, ctx->locals[idx + chan + const_index * 4], "");
   2061 			}
   2062 		}
   2063 		return to_integer(ctx, ac_build_gather_values(&ctx->ac, values, ve));
   2064 	case nir_var_shader_out:
   2065 		radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
   2066 				      &const_index, &indir_index);
   2067 		for (unsigned chan = 0; chan < ve; chan++) {
   2068 			if (indir_index) {
   2069 				unsigned count = glsl_count_attribute_slots(
   2070 						instr->variables[0]->var->type, false);
   2071 				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
   2072 						&ctx->ac, ctx->outputs + idx + chan, count,
   2073 						4, true);
   2074 
   2075 				values[chan] = LLVMBuildExtractElement(ctx->builder,
   2076 								       tmp_vec,
   2077 								       indir_index, "");
   2078 			} else {
   2079 			values[chan] = LLVMBuildLoad(ctx->builder,
   2080 						     ctx->outputs[idx + chan + const_index * 4],
   2081 						     "");
   2082 			}
   2083 		}
   2084 		return to_integer(ctx, ac_build_gather_values(&ctx->ac, values, ve));
   2085 	case nir_var_shared: {
   2086 		radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
   2087 				      &const_index, &indir_index);
   2088 		LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
   2089 		LLVMValueRef derived_ptr;
   2090 
   2091 		if (indir_index)
   2092 			indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), "");
   2093 
   2094 		for (unsigned chan = 0; chan < ve; chan++) {
   2095 			LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false);
   2096 			if (indir_index)
   2097 				index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
   2098 			derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
   2099 			values[chan] = LLVMBuildLoad(ctx->builder, derived_ptr, "");
   2100 		}
   2101 		return to_integer(ctx, ac_build_gather_values(&ctx->ac, values, ve));
   2102 	}
   2103 	default:
   2104 		break;
   2105 	}
   2106 	return NULL;
   2107 }
   2108 
   2109 static void
   2110 visit_store_var(struct nir_to_llvm_context *ctx,
   2111 				   nir_intrinsic_instr *instr)
   2112 {
   2113 	LLVMValueRef temp_ptr, value;
   2114 	int idx = instr->variables[0]->var->data.driver_location;
   2115 	LLVMValueRef src = to_float(ctx, get_src(ctx, instr->src[0]));
   2116 	int writemask = instr->const_index[0];
   2117 	LLVMValueRef indir_index;
   2118 	unsigned const_index;
   2119 	switch (instr->variables[0]->var->data.mode) {
   2120 	case nir_var_shader_out:
   2121 		radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
   2122 				      &const_index, &indir_index);
   2123 		for (unsigned chan = 0; chan < 4; chan++) {
   2124 			int stride = 4;
   2125 			if (!(writemask & (1 << chan)))
   2126 				continue;
   2127 			if (get_llvm_num_components(src) == 1)
   2128 				value = src;
   2129 			else
   2130 				value = LLVMBuildExtractElement(ctx->builder, src,
   2131 								LLVMConstInt(ctx->i32,
   2132 									     chan, false),
   2133 								"");
   2134 
   2135 			if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 ||
   2136 			    instr->variables[0]->var->data.location == VARYING_SLOT_CULL_DIST0)
   2137 				stride = 1;
   2138 			if (indir_index) {
   2139 				unsigned count = glsl_count_attribute_slots(
   2140 						instr->variables[0]->var->type, false);
   2141 				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
   2142 						&ctx->ac, ctx->outputs + idx + chan, count,
   2143 						stride, true);
   2144 
   2145 				if (get_llvm_num_components(tmp_vec) > 1) {
   2146 					tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
   2147 									 value, indir_index, "");
   2148 				} else
   2149 					tmp_vec = value;
   2150 				build_store_values_extended(ctx, ctx->outputs + idx + chan,
   2151 							    count, stride, tmp_vec);
   2152 
   2153 			} else {
   2154 				temp_ptr = ctx->outputs[idx + chan + const_index * stride];
   2155 
   2156 				LLVMBuildStore(ctx->builder, value, temp_ptr);
   2157 			}
   2158 		}
   2159 		break;
   2160 	case nir_var_local:
   2161 		radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
   2162 				      &const_index, &indir_index);
   2163 		for (unsigned chan = 0; chan < 4; chan++) {
   2164 			if (!(writemask & (1 << chan)))
   2165 				continue;
   2166 
   2167 			if (get_llvm_num_components(src) == 1)
   2168 				value = src;
   2169 			else
   2170 				value = LLVMBuildExtractElement(ctx->builder, src,
   2171 								LLVMConstInt(ctx->i32, chan, false), "");
   2172 			if (indir_index) {
   2173 				unsigned count = glsl_count_attribute_slots(
   2174 					instr->variables[0]->var->type, false);
   2175 				LLVMValueRef tmp_vec = ac_build_gather_values_extended(
   2176 					&ctx->ac, ctx->locals + idx + chan, count,
   2177 					4, true);
   2178 
   2179 				tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
   2180 								 value, indir_index, "");
   2181 				build_store_values_extended(ctx, ctx->locals + idx + chan,
   2182 							    count, 4, tmp_vec);
   2183 			} else {
   2184 				temp_ptr = ctx->locals[idx + chan + const_index * 4];
   2185 
   2186 				LLVMBuildStore(ctx->builder, value, temp_ptr);
   2187 			}
   2188 		}
   2189 		break;
   2190 	case nir_var_shared: {
   2191 		LLVMValueRef ptr;
   2192 		radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
   2193 				      &const_index, &indir_index);
   2194 
   2195 		ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
   2196 		LLVMValueRef derived_ptr;
   2197 
   2198 		if (indir_index)
   2199 			indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), "");
   2200 
   2201 		for (unsigned chan = 0; chan < 4; chan++) {
   2202 			if (!(writemask & (1 << chan)))
   2203 				continue;
   2204 
   2205 			LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false);
   2206 
   2207 			if (get_llvm_num_components(src) == 1)
   2208 				value = src;
   2209 			else
   2210 				value = LLVMBuildExtractElement(ctx->builder, src,
   2211 								LLVMConstInt(ctx->i32,
   2212 									     chan, false),
   2213 								"");
   2214 
   2215 			if (indir_index)
   2216 				index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
   2217 
   2218 			derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
   2219 			LLVMBuildStore(ctx->builder,
   2220 				       to_integer(ctx, value), derived_ptr);
   2221 		}
   2222 		break;
   2223 	}
   2224 	default:
   2225 		break;
   2226 	}
   2227 }
   2228 
   2229 static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
   2230 {
   2231 	switch (dim) {
   2232 	case GLSL_SAMPLER_DIM_BUF:
   2233 		return 1;
   2234 	case GLSL_SAMPLER_DIM_1D:
   2235 		return array ? 2 : 1;
   2236 	case GLSL_SAMPLER_DIM_2D:
   2237 		return array ? 3 : 2;
   2238 	case GLSL_SAMPLER_DIM_MS:
   2239 		return array ? 4 : 3;
   2240 	case GLSL_SAMPLER_DIM_3D:
   2241 	case GLSL_SAMPLER_DIM_CUBE:
   2242 		return 3;
   2243 	case GLSL_SAMPLER_DIM_RECT:
   2244 	case GLSL_SAMPLER_DIM_SUBPASS:
   2245 		return 2;
   2246 	case GLSL_SAMPLER_DIM_SUBPASS_MS:
   2247 		return 3;
   2248 	default:
   2249 		break;
   2250 	}
   2251 	return 0;
   2252 }
   2253 
   2254 static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
   2255 				     nir_intrinsic_instr *instr)
   2256 {
   2257 	const struct glsl_type *type = instr->variables[0]->var->type;
   2258 	if(instr->variables[0]->deref.child)
   2259 		type = instr->variables[0]->deref.child->type;
   2260 
   2261 	LLVMValueRef src0 = get_src(ctx, instr->src[0]);
   2262 	LLVMValueRef coords[4];
   2263 	LLVMValueRef masks[] = {
   2264 		LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
   2265 		LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false),
   2266 	};
   2267 	LLVMValueRef res;
   2268 	int count;
   2269 	enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
   2270 	bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
   2271 			     dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
   2272 	bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
   2273 		      dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
   2274 
   2275 	count = image_type_to_components_count(dim,
   2276 					       glsl_sampler_type_is_array(type));
   2277 
   2278 	if (count == 1) {
   2279 		if (instr->src[0].ssa->num_components)
   2280 			res = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
   2281 		else
   2282 			res = src0;
   2283 	} else {
   2284 		int chan;
   2285 		if (is_ms)
   2286 			count--;
   2287 		for (chan = 0; chan < count; ++chan) {
   2288 			coords[chan] = LLVMBuildExtractElement(ctx->builder, src0, masks[chan], "");
   2289 		}
   2290 
   2291 		if (add_frag_pos) {
   2292 			for (chan = 0; chan < count; ++chan)
   2293 				coords[chan] = LLVMBuildAdd(ctx->builder, coords[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), "");
   2294 		}
   2295 		if (is_ms) {
   2296 			coords[count] = llvm_extract_elem(ctx, get_src(ctx, instr->src[1]), 0);
   2297 			count++;
   2298 		}
   2299 
   2300 		if (count == 3) {
   2301 			coords[3] = LLVMGetUndef(ctx->i32);
   2302 			count = 4;
   2303 		}
   2304 		res = ac_build_gather_values(&ctx->ac, coords, count);
   2305 	}
   2306 	return res;
   2307 }
   2308 
   2309 static void build_type_name_for_intr(
   2310         LLVMTypeRef type,
   2311         char *buf, unsigned bufsize)
   2312 {
   2313         LLVMTypeRef elem_type = type;
   2314 
   2315         assert(bufsize >= 8);
   2316 
   2317         if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
   2318                 int ret = snprintf(buf, bufsize, "v%u",
   2319                                         LLVMGetVectorSize(type));
   2320                 if (ret < 0) {
   2321                         char *type_name = LLVMPrintTypeToString(type);
   2322                         fprintf(stderr, "Error building type name for: %s\n",
   2323                                 type_name);
   2324                         return;
   2325                 }
   2326                 elem_type = LLVMGetElementType(type);
   2327                 buf += ret;
   2328                 bufsize -= ret;
   2329         }
   2330         switch (LLVMGetTypeKind(elem_type)) {
   2331         default: break;
   2332         case LLVMIntegerTypeKind:
   2333                 snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
   2334                 break;
   2335         case LLVMFloatTypeKind:
   2336                 snprintf(buf, bufsize, "f32");
   2337                 break;
   2338         case LLVMDoubleTypeKind:
   2339                 snprintf(buf, bufsize, "f64");
   2340                 break;
   2341         }
   2342 }
   2343 
   2344 static void get_image_intr_name(const char *base_name,
   2345                                 LLVMTypeRef data_type,
   2346                                 LLVMTypeRef coords_type,
   2347                                 LLVMTypeRef rsrc_type,
   2348                                 char *out_name, unsigned out_len)
   2349 {
   2350         char coords_type_name[8];
   2351 
   2352         build_type_name_for_intr(coords_type, coords_type_name,
   2353                             sizeof(coords_type_name));
   2354 
   2355         if (HAVE_LLVM <= 0x0309) {
   2356                 snprintf(out_name, out_len, "%s.%s", base_name, coords_type_name);
   2357         } else {
   2358                 char data_type_name[8];
   2359                 char rsrc_type_name[8];
   2360 
   2361                 build_type_name_for_intr(data_type, data_type_name,
   2362                                         sizeof(data_type_name));
   2363                 build_type_name_for_intr(rsrc_type, rsrc_type_name,
   2364                                         sizeof(rsrc_type_name));
   2365                 snprintf(out_name, out_len, "%s.%s.%s.%s", base_name,
   2366                          data_type_name, coords_type_name, rsrc_type_name);
   2367         }
   2368 }
   2369 
   2370 static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx,
   2371 				     nir_intrinsic_instr *instr)
   2372 {
   2373 	LLVMValueRef params[7];
   2374 	LLVMValueRef res;
   2375 	char intrinsic_name[64];
   2376 	const nir_variable *var = instr->variables[0]->var;
   2377 	const struct glsl_type *type = var->type;
   2378 	if(instr->variables[0]->deref.child)
   2379 		type = instr->variables[0]->deref.child->type;
   2380 
   2381 	type = glsl_without_array(type);
   2382 	if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
   2383 		params[0] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
   2384 		params[1] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
   2385 						    LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
   2386 		params[2] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
   2387 		params[3] = LLVMConstInt(ctx->i1, 0, false);  /* glc */
   2388 		params[4] = LLVMConstInt(ctx->i1, 0, false);  /* slc */
   2389 		res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->v4f32,
   2390 					  params, 5, 0);
   2391 
   2392 		res = trim_vector(ctx, res, instr->dest.ssa.num_components);
   2393 		res = to_integer(ctx, res);
   2394 	} else {
   2395 		bool is_da = glsl_sampler_type_is_array(type) ||
   2396 			     glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
   2397 		LLVMValueRef da = is_da ? ctx->i32one : ctx->i32zero;
   2398 		LLVMValueRef glc = LLVMConstInt(ctx->i1, 0, false);
   2399 		LLVMValueRef slc = LLVMConstInt(ctx->i1, 0, false);
   2400 
   2401 		params[0] = get_image_coords(ctx, instr);
   2402 		params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
   2403 		params[2] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
   2404 		if (HAVE_LLVM <= 0x0309) {
   2405 			params[3] = LLVMConstInt(ctx->i1, 0, false);  /* r128 */
   2406 			params[4] = da;
   2407 			params[5] = glc;
   2408 			params[6] = slc;
   2409 		} else {
   2410 			LLVMValueRef lwe = LLVMConstInt(ctx->i1, 0, false);
   2411 			params[3] = glc;
   2412 			params[4] = slc;
   2413 			params[5] = lwe;
   2414 			params[6] = da;
   2415 		}
   2416 
   2417 		get_image_intr_name("llvm.amdgcn.image.load",
   2418 				    ctx->v4f32, /* vdata */
   2419 				    LLVMTypeOf(params[0]), /* coords */
   2420 				    LLVMTypeOf(params[1]), /* rsrc */
   2421 				    intrinsic_name, sizeof(intrinsic_name));
   2422 
   2423 		res = ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->v4f32,
   2424 					  params, 7, AC_FUNC_ATTR_READONLY);
   2425 	}
   2426 	return to_integer(ctx, res);
   2427 }
   2428 
   2429 static void visit_image_store(struct nir_to_llvm_context *ctx,
   2430 			      nir_intrinsic_instr *instr)
   2431 {
   2432 	LLVMValueRef params[8];
   2433 	char intrinsic_name[64];
   2434 	const nir_variable *var = instr->variables[0]->var;
   2435 	LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
   2436 	LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
   2437 	const struct glsl_type *type = glsl_without_array(var->type);
   2438 
   2439 	if (ctx->stage == MESA_SHADER_FRAGMENT)
   2440 		ctx->shader_info->fs.writes_memory = true;
   2441 
   2442 	if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
   2443 		params[0] = to_float(ctx, get_src(ctx, instr->src[2])); /* data */
   2444 		params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
   2445 		params[2] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
   2446 						    LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
   2447 		params[3] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
   2448 		params[4] = i1false;  /* glc */
   2449 		params[5] = i1false;  /* slc */
   2450 		ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt,
   2451 				    params, 6, 0);
   2452 	} else {
   2453 		bool is_da = glsl_sampler_type_is_array(type) ||
   2454 			     glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
   2455 		LLVMValueRef da = is_da ? i1true : i1false;
   2456 		LLVMValueRef glc = i1false;
   2457 		LLVMValueRef slc = i1false;
   2458 
   2459 		params[0] = to_float(ctx, get_src(ctx, instr->src[2]));
   2460 		params[1] = get_image_coords(ctx, instr); /* coords */
   2461 		params[2] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
   2462 		params[3] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
   2463 		if (HAVE_LLVM <= 0x0309) {
   2464 			params[4] = i1false;  /* r128 */
   2465 			params[5] = da;
   2466 			params[6] = glc;
   2467 			params[7] = slc;
   2468 		} else {
   2469 			LLVMValueRef lwe = i1false;
   2470 			params[4] = glc;
   2471 			params[5] = slc;
   2472 			params[6] = lwe;
   2473 			params[7] = da;
   2474 		}
   2475 
   2476 		get_image_intr_name("llvm.amdgcn.image.store",
   2477 				    LLVMTypeOf(params[0]), /* vdata */
   2478 				    LLVMTypeOf(params[1]), /* coords */
   2479 				    LLVMTypeOf(params[2]), /* rsrc */
   2480 				    intrinsic_name, sizeof(intrinsic_name));
   2481 
   2482 		ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->voidt,
   2483 				    params, 8, 0);
   2484 	}
   2485 
   2486 }
   2487 
   2488 static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx,
   2489                                        nir_intrinsic_instr *instr)
   2490 {
   2491 	LLVMValueRef params[6];
   2492 	int param_count = 0;
   2493 	const nir_variable *var = instr->variables[0]->var;
   2494 	LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
   2495 	LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
   2496 	const char *base_name = "llvm.amdgcn.image.atomic";
   2497 	const char *atomic_name;
   2498 	LLVMValueRef coords;
   2499 	char intrinsic_name[32], coords_type[8];
   2500 	const struct glsl_type *type = glsl_without_array(var->type);
   2501 
   2502 	if (ctx->stage == MESA_SHADER_FRAGMENT)
   2503 		ctx->shader_info->fs.writes_memory = true;
   2504 
   2505 	params[param_count++] = get_src(ctx, instr->src[2]);
   2506 	if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap)
   2507 		params[param_count++] = get_src(ctx, instr->src[3]);
   2508 
   2509 	if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
   2510 		params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
   2511 		coords = params[param_count++] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
   2512 									LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
   2513 		params[param_count++] = ctx->i32zero; /* voffset */
   2514 		params[param_count++] = i1false;  /* glc */
   2515 		params[param_count++] = i1false;  /* slc */
   2516 	} else {
   2517 		bool da = glsl_sampler_type_is_array(type) ||
   2518 		          glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
   2519 
   2520 		coords = params[param_count++] = get_image_coords(ctx, instr);
   2521 		params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
   2522 		params[param_count++] = i1false; /* r128 */
   2523 		params[param_count++] = da ? i1true : i1false;      /* da */
   2524 		params[param_count++] = i1false;  /* slc */
   2525 	}
   2526 
   2527 	switch (instr->intrinsic) {
   2528 	case nir_intrinsic_image_atomic_add:
   2529 		atomic_name = "add";
   2530 		break;
   2531 	case nir_intrinsic_image_atomic_min:
   2532 		atomic_name = "smin";
   2533 		break;
   2534 	case nir_intrinsic_image_atomic_max:
   2535 		atomic_name = "smax";
   2536 		break;
   2537 	case nir_intrinsic_image_atomic_and:
   2538 		atomic_name = "and";
   2539 		break;
   2540 	case nir_intrinsic_image_atomic_or:
   2541 		atomic_name = "or";
   2542 		break;
   2543 	case nir_intrinsic_image_atomic_xor:
   2544 		atomic_name = "xor";
   2545 		break;
   2546 	case nir_intrinsic_image_atomic_exchange:
   2547 		atomic_name = "swap";
   2548 		break;
   2549 	case nir_intrinsic_image_atomic_comp_swap:
   2550 		atomic_name = "cmpswap";
   2551 		break;
   2552 	default:
   2553 		abort();
   2554 	}
   2555 	build_int_type_name(LLVMTypeOf(coords),
   2556 			    coords_type, sizeof(coords_type));
   2557 
   2558 	snprintf(intrinsic_name, sizeof(intrinsic_name),
   2559 			 "%s.%s.%s", base_name, atomic_name, coords_type);
   2560 	return ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->i32, params, param_count, 0);
   2561 }
   2562 
   2563 static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx,
   2564 				     nir_intrinsic_instr *instr)
   2565 {
   2566 	LLVMValueRef res;
   2567 	LLVMValueRef params[10];
   2568 	const nir_variable *var = instr->variables[0]->var;
   2569 	const struct glsl_type *type = instr->variables[0]->var->type;
   2570 	bool da = glsl_sampler_type_is_array(var->type) ||
   2571 	          glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_CUBE;
   2572 	if(instr->variables[0]->deref.child)
   2573 		type = instr->variables[0]->deref.child->type;
   2574 
   2575 	if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF)
   2576 		return get_buffer_size(ctx, get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER), true);
   2577 	params[0] = ctx->i32zero;
   2578 	params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
   2579 	params[2] = LLVMConstInt(ctx->i32, 15, false);
   2580 	params[3] = ctx->i32zero;
   2581 	params[4] = ctx->i32zero;
   2582 	params[5] = da ? ctx->i32one : ctx->i32zero;
   2583 	params[6] = ctx->i32zero;
   2584 	params[7] = ctx->i32zero;
   2585 	params[8] = ctx->i32zero;
   2586 	params[9] = ctx->i32zero;
   2587 
   2588 	res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
   2589 				  params, 10, AC_FUNC_ATTR_READNONE);
   2590 
   2591 	if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
   2592 	    glsl_sampler_type_is_array(type)) {
   2593 		LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
   2594 		LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
   2595 		LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, "");
   2596 		z = LLVMBuildSDiv(ctx->builder, z, six, "");
   2597 		res = LLVMBuildInsertElement(ctx->builder, res, z, two, "");
   2598 	}
   2599 	return res;
   2600 }
   2601 
   2602 static void emit_waitcnt(struct nir_to_llvm_context *ctx)
   2603 {
   2604 	LLVMValueRef args[1] = {
   2605 		LLVMConstInt(ctx->i32, 0xf70, false),
   2606 	};
   2607 	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.s.waitcnt",
   2608 			    ctx->voidt, args, 1, 0);
   2609 }
   2610 
   2611 static void emit_barrier(struct nir_to_llvm_context *ctx)
   2612 {
   2613 	// TODO tess
   2614 	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.s.barrier",
   2615 			    ctx->voidt, NULL, 0, 0);
   2616 }
   2617 
   2618 static void emit_discard_if(struct nir_to_llvm_context *ctx,
   2619 			    nir_intrinsic_instr *instr)
   2620 {
   2621 	LLVMValueRef cond;
   2622 	ctx->shader_info->fs.can_discard = true;
   2623 
   2624 	cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
   2625 			     get_src(ctx, instr->src[0]),
   2626 			     ctx->i32zero, "");
   2627 
   2628 	cond = LLVMBuildSelect(ctx->builder, cond,
   2629 			       LLVMConstReal(ctx->f32, -1.0f),
   2630 			       ctx->f32zero, "");
   2631 	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
   2632 			       ctx->voidt,
   2633 			       &cond, 1, 0);
   2634 }
   2635 
   2636 static LLVMValueRef
   2637 visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
   2638 {
   2639 	LLVMValueRef result;
   2640 	LLVMValueRef thread_id = get_thread_id(ctx);
   2641 	result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
   2642 			      LLVMConstInt(ctx->i32, 0xfc0, false), "");
   2643 
   2644 	return LLVMBuildAdd(ctx->builder, result, thread_id, "");
   2645 }
   2646 
   2647 static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx,
   2648 				     nir_intrinsic_instr *instr)
   2649 {
   2650 	LLVMValueRef ptr, result;
   2651 	int idx = instr->variables[0]->var->data.driver_location;
   2652 	LLVMValueRef src = get_src(ctx, instr->src[0]);
   2653 	ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
   2654 
   2655 	if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) {
   2656 		LLVMValueRef src1 = get_src(ctx, instr->src[1]);
   2657 		result = LLVMBuildAtomicCmpXchg(ctx->builder,
   2658 						ptr, src, src1,
   2659 						LLVMAtomicOrderingSequentiallyConsistent,
   2660 						LLVMAtomicOrderingSequentiallyConsistent,
   2661 						false);
   2662 	} else {
   2663 		LLVMAtomicRMWBinOp op;
   2664 		switch (instr->intrinsic) {
   2665 		case nir_intrinsic_var_atomic_add:
   2666 			op = LLVMAtomicRMWBinOpAdd;
   2667 			break;
   2668 		case nir_intrinsic_var_atomic_umin:
   2669 			op = LLVMAtomicRMWBinOpUMin;
   2670 			break;
   2671 		case nir_intrinsic_var_atomic_umax:
   2672 			op = LLVMAtomicRMWBinOpUMax;
   2673 			break;
   2674 		case nir_intrinsic_var_atomic_imin:
   2675 			op = LLVMAtomicRMWBinOpMin;
   2676 			break;
   2677 		case nir_intrinsic_var_atomic_imax:
   2678 			op = LLVMAtomicRMWBinOpMax;
   2679 			break;
   2680 		case nir_intrinsic_var_atomic_and:
   2681 			op = LLVMAtomicRMWBinOpAnd;
   2682 			break;
   2683 		case nir_intrinsic_var_atomic_or:
   2684 			op = LLVMAtomicRMWBinOpOr;
   2685 			break;
   2686 		case nir_intrinsic_var_atomic_xor:
   2687 			op = LLVMAtomicRMWBinOpXor;
   2688 			break;
   2689 		case nir_intrinsic_var_atomic_exchange:
   2690 			op = LLVMAtomicRMWBinOpXchg;
   2691 			break;
   2692 		default:
   2693 			return NULL;
   2694 		}
   2695 
   2696 		result = LLVMBuildAtomicRMW(ctx->builder, op, ptr, to_integer(ctx, src),
   2697 					    LLVMAtomicOrderingSequentiallyConsistent,
   2698 					    false);
   2699 	}
   2700 	return result;
   2701 }
   2702 
   2703 #define INTERP_CENTER 0
   2704 #define INTERP_CENTROID 1
   2705 #define INTERP_SAMPLE 2
   2706 
   2707 static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx,
   2708 					enum glsl_interp_mode interp, unsigned location)
   2709 {
   2710 	switch (interp) {
   2711 	case INTERP_MODE_FLAT:
   2712 	default:
   2713 		return NULL;
   2714 	case INTERP_MODE_SMOOTH:
   2715 	case INTERP_MODE_NONE:
   2716 		if (location == INTERP_CENTER)
   2717 			return ctx->persp_center;
   2718 		else if (location == INTERP_CENTROID)
   2719 			return ctx->persp_centroid;
   2720 		else if (location == INTERP_SAMPLE)
   2721 			return ctx->persp_sample;
   2722 		break;
   2723 	case INTERP_MODE_NOPERSPECTIVE:
   2724 		if (location == INTERP_CENTER)
   2725 			return ctx->linear_center;
   2726 		else if (location == INTERP_CENTROID)
   2727 			return ctx->linear_centroid;
   2728 		else if (location == INTERP_SAMPLE)
   2729 			return ctx->linear_sample;
   2730 		break;
   2731 	}
   2732 	return NULL;
   2733 }
   2734 
   2735 static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx,
   2736 					 LLVMValueRef sample_id)
   2737 {
   2738 	/* offset = sample_id * 8  (8 = 2 floats containing samplepos.xy) */
   2739 	LLVMValueRef offset0 = LLVMBuildMul(ctx->builder, sample_id, LLVMConstInt(ctx->i32, 8, false), "");
   2740 	LLVMValueRef offset1 = LLVMBuildAdd(ctx->builder, offset0, LLVMConstInt(ctx->i32, 4, false), "");
   2741 	LLVMValueRef result[2];
   2742 
   2743 	result[0] = build_indexed_load_const(ctx, ctx->sample_positions, offset0);
   2744 	result[1] = build_indexed_load_const(ctx, ctx->sample_positions, offset1);
   2745 
   2746 	return ac_build_gather_values(&ctx->ac, result, 2);
   2747 }
   2748 
   2749 static LLVMValueRef load_sample_pos(struct nir_to_llvm_context *ctx)
   2750 {
   2751 	LLVMValueRef values[2];
   2752 
   2753 	values[0] = emit_ffract(ctx, ctx->frag_pos[0]);
   2754 	values[1] = emit_ffract(ctx, ctx->frag_pos[1]);
   2755 	return ac_build_gather_values(&ctx->ac, values, 2);
   2756 }
   2757 
   2758 static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
   2759 				 nir_intrinsic_instr *instr)
   2760 {
   2761 	LLVMValueRef result[2];
   2762 	LLVMValueRef interp_param, attr_number;
   2763 	unsigned location;
   2764 	unsigned chan;
   2765 	LLVMValueRef src_c0, src_c1;
   2766 	const char *intr_name;
   2767 	LLVMValueRef src0;
   2768 	int input_index = instr->variables[0]->var->data.location - VARYING_SLOT_VAR0;
   2769 	switch (instr->intrinsic) {
   2770 	case nir_intrinsic_interp_var_at_centroid:
   2771 		location = INTERP_CENTROID;
   2772 		break;
   2773 	case nir_intrinsic_interp_var_at_sample:
   2774 	case nir_intrinsic_interp_var_at_offset:
   2775 		location = INTERP_SAMPLE;
   2776 		src0 = get_src(ctx, instr->src[0]);
   2777 		break;
   2778 	default:
   2779 		break;
   2780 	}
   2781 
   2782 	if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) {
   2783 		src_c0 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, ""));
   2784 		src_c1 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, ""));
   2785 	} else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) {
   2786 		LLVMValueRef sample_position;
   2787 		LLVMValueRef halfval = LLVMConstReal(ctx->f32, 0.5f);
   2788 
   2789 		/* fetch sample ID */
   2790 		sample_position = load_sample_position(ctx, src0);
   2791 
   2792 		src_c0 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32zero, "");
   2793 		src_c0 = LLVMBuildFSub(ctx->builder, src_c0, halfval, "");
   2794 		src_c1 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32one, "");
   2795 		src_c1 = LLVMBuildFSub(ctx->builder, src_c1, halfval, "");
   2796 	}
   2797 	interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location);
   2798 	attr_number = LLVMConstInt(ctx->i32, input_index, false);
   2799 
   2800 	if (location == INTERP_SAMPLE) {
   2801 		LLVMValueRef ij_out[2];
   2802 		LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param);
   2803 
   2804 		/*
   2805 		 * take the I then J parameters, and the DDX/Y for it, and
   2806 		 * calculate the IJ inputs for the interpolator.
   2807 		 * temp1 = ddx * offset/sample.x + I;
   2808 		 * interp_param.I = ddy * offset/sample.y + temp1;
   2809 		 * temp1 = ddx * offset/sample.x + J;
   2810 		 * interp_param.J = ddy * offset/sample.y + temp1;
   2811 		 */
   2812 		for (unsigned i = 0; i < 2; i++) {
   2813 			LLVMValueRef ix_ll = LLVMConstInt(ctx->i32, i, false);
   2814 			LLVMValueRef iy_ll = LLVMConstInt(ctx->i32, i + 2, false);
   2815 			LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->builder,
   2816 								      ddxy_out, ix_ll, "");
   2817 			LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->builder,
   2818 								      ddxy_out, iy_ll, "");
   2819 			LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->builder,
   2820 									 interp_param, ix_ll, "");
   2821 			LLVMValueRef temp1, temp2;
   2822 
   2823 			interp_el = LLVMBuildBitCast(ctx->builder, interp_el,
   2824 						     ctx->f32, "");
   2825 
   2826 			temp1 = LLVMBuildFMul(ctx->builder, ddx_el, src_c0, "");
   2827 			temp1 = LLVMBuildFAdd(ctx->builder, temp1, interp_el, "");
   2828 
   2829 			temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, "");
   2830 			temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, "");
   2831 
   2832 			ij_out[i] = LLVMBuildBitCast(ctx->builder,
   2833 						     temp2, ctx->i32, "");
   2834 		}
   2835 		interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
   2836 
   2837 	}
   2838 	intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
   2839 	for (chan = 0; chan < 2; chan++) {
   2840 		LLVMValueRef args[4];
   2841 		LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
   2842 
   2843 		args[0] = llvm_chan;
   2844 		args[1] = attr_number;
   2845 		args[2] = ctx->prim_mask;
   2846 		args[3] = interp_param;
   2847 		result[chan] = ac_emit_llvm_intrinsic(&ctx->ac, intr_name,
   2848 						   ctx->f32, args, args[3] ? 4 : 3,
   2849 						   AC_FUNC_ATTR_READNONE);
   2850 	}
   2851 	return ac_build_gather_values(&ctx->ac, result, 2);
   2852 }
   2853 
   2854 static void visit_intrinsic(struct nir_to_llvm_context *ctx,
   2855                             nir_intrinsic_instr *instr)
   2856 {
   2857 	LLVMValueRef result = NULL;
   2858 
   2859 	switch (instr->intrinsic) {
   2860 	case nir_intrinsic_load_work_group_id: {
   2861 		result = ctx->workgroup_ids;
   2862 		break;
   2863 	}
   2864 	case nir_intrinsic_load_base_vertex: {
   2865 		result = ctx->base_vertex;
   2866 		break;
   2867 	}
   2868 	case nir_intrinsic_load_vertex_id_zero_base: {
   2869 		result = ctx->vertex_id;
   2870 		break;
   2871 	}
   2872 	case nir_intrinsic_load_local_invocation_id: {
   2873 		result = ctx->local_invocation_ids;
   2874 		break;
   2875 	}
   2876 	case nir_intrinsic_load_base_instance:
   2877 		result = ctx->start_instance;
   2878 		break;
   2879 	case nir_intrinsic_load_sample_id:
   2880 		ctx->shader_info->fs.force_persample = true;
   2881 		result = unpack_param(ctx, ctx->ancillary, 8, 4);
   2882 		break;
   2883 	case nir_intrinsic_load_sample_pos:
   2884 		ctx->shader_info->fs.force_persample = true;
   2885 		result = load_sample_pos(ctx);
   2886 		break;
   2887 	case nir_intrinsic_load_front_face:
   2888 		result = ctx->front_face;
   2889 		break;
   2890 	case nir_intrinsic_load_instance_id:
   2891 		result = ctx->instance_id;
   2892 		ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
   2893 		                            ctx->shader_info->vs.vgpr_comp_cnt);
   2894 		break;
   2895 	case nir_intrinsic_load_num_work_groups:
   2896 		result = ctx->num_work_groups;
   2897 		break;
   2898 	case nir_intrinsic_load_local_invocation_index:
   2899 		result = visit_load_local_invocation_index(ctx);
   2900 		break;
   2901 	case nir_intrinsic_load_push_constant:
   2902 		result = visit_load_push_constant(ctx, instr);
   2903 		break;
   2904 	case nir_intrinsic_vulkan_resource_index:
   2905 		result = visit_vulkan_resource_index(ctx, instr);
   2906 		break;
   2907 	case nir_intrinsic_store_ssbo:
   2908 		visit_store_ssbo(ctx, instr);
   2909 		break;
   2910 	case nir_intrinsic_load_ssbo:
   2911 		result = visit_load_buffer(ctx, instr);
   2912 		break;
   2913 	case nir_intrinsic_ssbo_atomic_add:
   2914 	case nir_intrinsic_ssbo_atomic_imin:
   2915 	case nir_intrinsic_ssbo_atomic_umin:
   2916 	case nir_intrinsic_ssbo_atomic_imax:
   2917 	case nir_intrinsic_ssbo_atomic_umax:
   2918 	case nir_intrinsic_ssbo_atomic_and:
   2919 	case nir_intrinsic_ssbo_atomic_or:
   2920 	case nir_intrinsic_ssbo_atomic_xor:
   2921 	case nir_intrinsic_ssbo_atomic_exchange:
   2922 	case nir_intrinsic_ssbo_atomic_comp_swap:
   2923 		result = visit_atomic_ssbo(ctx, instr);
   2924 		break;
   2925 	case nir_intrinsic_load_ubo:
   2926 		result = visit_load_ubo_buffer(ctx, instr);
   2927 		break;
   2928 	case nir_intrinsic_get_buffer_size:
   2929 		result = visit_get_buffer_size(ctx, instr);
   2930 		break;
   2931 	case nir_intrinsic_load_var:
   2932 		result = visit_load_var(ctx, instr);
   2933 		break;
   2934 	case nir_intrinsic_store_var:
   2935 		visit_store_var(ctx, instr);
   2936 		break;
   2937 	case nir_intrinsic_image_load:
   2938 		result = visit_image_load(ctx, instr);
   2939 		break;
   2940 	case nir_intrinsic_image_store:
   2941 		visit_image_store(ctx, instr);
   2942 		break;
   2943 	case nir_intrinsic_image_atomic_add:
   2944 	case nir_intrinsic_image_atomic_min:
   2945 	case nir_intrinsic_image_atomic_max:
   2946 	case nir_intrinsic_image_atomic_and:
   2947 	case nir_intrinsic_image_atomic_or:
   2948 	case nir_intrinsic_image_atomic_xor:
   2949 	case nir_intrinsic_image_atomic_exchange:
   2950 	case nir_intrinsic_image_atomic_comp_swap:
   2951 		result = visit_image_atomic(ctx, instr);
   2952 		break;
   2953 	case nir_intrinsic_image_size:
   2954 		result = visit_image_size(ctx, instr);
   2955 		break;
   2956 	case nir_intrinsic_discard:
   2957 		ctx->shader_info->fs.can_discard = true;
   2958 		ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp",
   2959 				       ctx->voidt,
   2960 				       NULL, 0, 0);
   2961 		break;
   2962 	case nir_intrinsic_discard_if:
   2963 		emit_discard_if(ctx, instr);
   2964 		break;
   2965 	case nir_intrinsic_memory_barrier:
   2966 		emit_waitcnt(ctx);
   2967 		break;
   2968 	case nir_intrinsic_barrier:
   2969 		emit_barrier(ctx);
   2970 		break;
   2971 	case nir_intrinsic_var_atomic_add:
   2972 	case nir_intrinsic_var_atomic_imin:
   2973 	case nir_intrinsic_var_atomic_umin:
   2974 	case nir_intrinsic_var_atomic_imax:
   2975 	case nir_intrinsic_var_atomic_umax:
   2976 	case nir_intrinsic_var_atomic_and:
   2977 	case nir_intrinsic_var_atomic_or:
   2978 	case nir_intrinsic_var_atomic_xor:
   2979 	case nir_intrinsic_var_atomic_exchange:
   2980 	case nir_intrinsic_var_atomic_comp_swap:
   2981 		result = visit_var_atomic(ctx, instr);
   2982 		break;
   2983 	case nir_intrinsic_interp_var_at_centroid:
   2984 	case nir_intrinsic_interp_var_at_sample:
   2985 	case nir_intrinsic_interp_var_at_offset:
   2986 		result = visit_interp(ctx, instr);
   2987 		break;
   2988 	default:
   2989 		fprintf(stderr, "Unknown intrinsic: ");
   2990 		nir_print_instr(&instr->instr, stderr);
   2991 		fprintf(stderr, "\n");
   2992 		break;
   2993 	}
   2994 	if (result) {
   2995 		_mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
   2996 	}
   2997 }
   2998 
   2999 static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
   3000 					  nir_deref_var *deref,
   3001 					  enum desc_type desc_type)
   3002 {
   3003 	unsigned desc_set = deref->var->data.descriptor_set;
   3004 	LLVMValueRef list = ctx->descriptor_sets[desc_set];
   3005 	struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout;
   3006 	struct radv_descriptor_set_binding_layout *binding = layout->binding + deref->var->data.binding;
   3007 	unsigned offset = binding->offset;
   3008 	unsigned stride = binding->size;
   3009 	unsigned type_size;
   3010 	LLVMBuilderRef builder = ctx->builder;
   3011 	LLVMTypeRef type;
   3012 	LLVMValueRef index = NULL;
   3013 
   3014 	assert(deref->var->data.binding < layout->binding_count);
   3015 
   3016 	switch (desc_type) {
   3017 	case DESC_IMAGE:
   3018 		type = ctx->v8i32;
   3019 		type_size = 32;
   3020 		break;
   3021 	case DESC_FMASK:
   3022 		type = ctx->v8i32;
   3023 		offset += 32;
   3024 		type_size = 32;
   3025 		break;
   3026 	case DESC_SAMPLER:
   3027 		type = ctx->v4i32;
   3028 		if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
   3029 			offset += 64;
   3030 
   3031 		type_size = 16;
   3032 		break;
   3033 	case DESC_BUFFER:
   3034 		type = ctx->v4i32;
   3035 		type_size = 16;
   3036 		break;
   3037 	default:
   3038 		unreachable("invalid desc_type\n");
   3039 	}
   3040 
   3041 	if (deref->deref.child) {
   3042 		nir_deref_array *child = (nir_deref_array*)deref->deref.child;
   3043 
   3044 		assert(child->deref_array_type != nir_deref_array_type_wildcard);
   3045 		offset += child->base_offset * stride;
   3046 		if (child->deref_array_type == nir_deref_array_type_indirect) {
   3047 			index = get_src(ctx, child->indirect);
   3048 		}
   3049 	}
   3050 
   3051 	assert(stride % type_size == 0);
   3052 
   3053 	if (!index)
   3054 		index = ctx->i32zero;
   3055 
   3056 	index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, stride / type_size, 0), "");
   3057 
   3058 	list = build_gep0(ctx, list, LLVMConstInt(ctx->i32, offset, 0));
   3059 	list = LLVMBuildPointerCast(builder, list, const_array(type, 0), "");
   3060 
   3061 	return build_indexed_load_const(ctx, list, index);
   3062 }
   3063 
   3064 static void set_tex_fetch_args(struct nir_to_llvm_context *ctx,
   3065 			       struct ac_tex_info *tinfo,
   3066 			       nir_tex_instr *instr,
   3067 			       nir_texop op,
   3068 			       LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
   3069 			       LLVMValueRef *param, unsigned count,
   3070 			       unsigned dmask)
   3071 {
   3072 	int num_args;
   3073 	unsigned is_rect = 0;
   3074 	bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
   3075 
   3076 	if (op == nir_texop_lod)
   3077 		da = false;
   3078 	/* Pad to power of two vector */
   3079 	while (count < util_next_power_of_two(count))
   3080 		param[count++] = LLVMGetUndef(ctx->i32);
   3081 
   3082 	if (count > 1)
   3083 		tinfo->args[0] = ac_build_gather_values(&ctx->ac, param, count);
   3084 	else
   3085 		tinfo->args[0] = param[0];
   3086 
   3087 	tinfo->args[1] = res_ptr;
   3088 	num_args = 2;
   3089 
   3090 	if (op == nir_texop_txf ||
   3091 	    op == nir_texop_txf_ms ||
   3092 	    op == nir_texop_query_levels ||
   3093 	    op == nir_texop_texture_samples ||
   3094 	    op == nir_texop_txs)
   3095 		tinfo->dst_type = ctx->v4i32;
   3096 	else {
   3097 		tinfo->dst_type = ctx->v4f32;
   3098 		tinfo->args[num_args++] = samp_ptr;
   3099 	}
   3100 
   3101 	if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) {
   3102 		tinfo->args[0] = res_ptr;
   3103 		tinfo->args[1] = LLVMConstInt(ctx->i32, 0, false);
   3104 		tinfo->args[2] = param[0];
   3105 		tinfo->arg_count = 3;
   3106 		return;
   3107 	}
   3108 
   3109 	tinfo->args[num_args++] = LLVMConstInt(ctx->i32, dmask, 0);
   3110 	tinfo->args[num_args++] = LLVMConstInt(ctx->i32, is_rect, 0); /* unorm */
   3111 	tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
   3112 	tinfo->args[num_args++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0);
   3113 	tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
   3114 	tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
   3115 	tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
   3116 	tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
   3117 
   3118 	tinfo->arg_count = num_args;
   3119 }
   3120 
   3121 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
   3122  *
   3123  * SI-CI:
   3124  *   If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
   3125  *   filtering manually. The driver sets img7 to a mask clearing
   3126  *   MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
   3127  *     s_and_b32 samp0, samp0, img7
   3128  *
   3129  * VI:
   3130  *   The ANISO_OVERRIDE sampler field enables this fix in TA.
   3131  */
   3132 static LLVMValueRef sici_fix_sampler_aniso(struct nir_to_llvm_context *ctx,
   3133                                            LLVMValueRef res, LLVMValueRef samp)
   3134 {
   3135 	LLVMBuilderRef builder = ctx->builder;
   3136 	LLVMValueRef img7, samp0;
   3137 
   3138 	if (ctx->options->chip_class >= VI)
   3139 		return samp;
   3140 
   3141 	img7 = LLVMBuildExtractElement(builder, res,
   3142 	                               LLVMConstInt(ctx->i32, 7, 0), "");
   3143 	samp0 = LLVMBuildExtractElement(builder, samp,
   3144 	                                LLVMConstInt(ctx->i32, 0, 0), "");
   3145 	samp0 = LLVMBuildAnd(builder, samp0, img7, "");
   3146 	return LLVMBuildInsertElement(builder, samp, samp0,
   3147 	                              LLVMConstInt(ctx->i32, 0, 0), "");
   3148 }
   3149 
   3150 static void tex_fetch_ptrs(struct nir_to_llvm_context *ctx,
   3151 			   nir_tex_instr *instr,
   3152 			   LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
   3153 			   LLVMValueRef *fmask_ptr)
   3154 {
   3155 	if (instr->sampler_dim  == GLSL_SAMPLER_DIM_BUF)
   3156 		*res_ptr = get_sampler_desc(ctx, instr->texture, DESC_BUFFER);
   3157 	else
   3158 		*res_ptr = get_sampler_desc(ctx, instr->texture, DESC_IMAGE);
   3159 	if (samp_ptr) {
   3160 		if (instr->sampler)
   3161 			*samp_ptr = get_sampler_desc(ctx, instr->sampler, DESC_SAMPLER);
   3162 		else
   3163 			*samp_ptr = get_sampler_desc(ctx, instr->texture, DESC_SAMPLER);
   3164 		if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
   3165 			*samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
   3166 	}
   3167 	if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms ||
   3168 					     instr->op == nir_texop_samples_identical))
   3169 		*fmask_ptr = get_sampler_desc(ctx, instr->texture, DESC_FMASK);
   3170 }
   3171 
   3172 static LLVMValueRef apply_round_slice(struct nir_to_llvm_context *ctx,
   3173 				      LLVMValueRef coord)
   3174 {
   3175 	coord = to_float(ctx, coord);
   3176 	coord = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, &coord, 1, 0);
   3177 	coord = to_integer(ctx, coord);
   3178 	return coord;
   3179 }
   3180 
   3181 static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
   3182 {
   3183 	LLVMValueRef result = NULL;
   3184 	struct ac_tex_info tinfo = { 0 };
   3185 	unsigned dmask = 0xf;
   3186 	LLVMValueRef address[16];
   3187 	LLVMValueRef coords[5];
   3188 	LLVMValueRef coord = NULL, lod = NULL, comparator = NULL;
   3189 	LLVMValueRef bias = NULL, offsets = NULL;
   3190 	LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL;
   3191 	LLVMValueRef ddx = NULL, ddy = NULL;
   3192 	LLVMValueRef derivs[6];
   3193 	unsigned chan, count = 0;
   3194 	unsigned const_src = 0, num_deriv_comp = 0;
   3195 
   3196 	tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr);
   3197 
   3198 	for (unsigned i = 0; i < instr->num_srcs; i++) {
   3199 		switch (instr->src[i].src_type) {
   3200 		case nir_tex_src_coord:
   3201 			coord = get_src(ctx, instr->src[i].src);
   3202 			break;
   3203 		case nir_tex_src_projector:
   3204 			break;
   3205 		case nir_tex_src_comparator:
   3206 			comparator = get_src(ctx, instr->src[i].src);
   3207 			break;
   3208 		case nir_tex_src_offset:
   3209 			offsets = get_src(ctx, instr->src[i].src);
   3210 			const_src = i;
   3211 			break;
   3212 		case nir_tex_src_bias:
   3213 			bias = get_src(ctx, instr->src[i].src);
   3214 			break;
   3215 		case nir_tex_src_lod:
   3216 			lod = get_src(ctx, instr->src[i].src);
   3217 			break;
   3218 		case nir_tex_src_ms_index:
   3219 			sample_index = get_src(ctx, instr->src[i].src);
   3220 			break;
   3221 		case nir_tex_src_ms_mcs:
   3222 			break;
   3223 		case nir_tex_src_ddx:
   3224 			ddx = get_src(ctx, instr->src[i].src);
   3225 			num_deriv_comp = instr->src[i].src.ssa->num_components;
   3226 			break;
   3227 		case nir_tex_src_ddy:
   3228 			ddy = get_src(ctx, instr->src[i].src);
   3229 			break;
   3230 		case nir_tex_src_texture_offset:
   3231 		case nir_tex_src_sampler_offset:
   3232 		case nir_tex_src_plane:
   3233 		default:
   3234 			break;
   3235 		}
   3236 	}
   3237 
   3238 	if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
   3239 		result = get_buffer_size(ctx, res_ptr, true);
   3240 		goto write_result;
   3241 	}
   3242 
   3243 	if (instr->op == nir_texop_texture_samples) {
   3244 		LLVMValueRef res, samples, is_msaa;
   3245 		res = LLVMBuildBitCast(ctx->builder, res_ptr, ctx->v8i32, "");
   3246 		samples = LLVMBuildExtractElement(ctx->builder, res,
   3247 						  LLVMConstInt(ctx->i32, 3, false), "");
   3248 		is_msaa = LLVMBuildLShr(ctx->builder, samples,
   3249 					LLVMConstInt(ctx->i32, 28, false), "");
   3250 		is_msaa = LLVMBuildAnd(ctx->builder, is_msaa,
   3251 				       LLVMConstInt(ctx->i32, 0xe, false), "");
   3252 		is_msaa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, is_msaa,
   3253 					LLVMConstInt(ctx->i32, 0xe, false), "");
   3254 
   3255 		samples = LLVMBuildLShr(ctx->builder, samples,
   3256 					LLVMConstInt(ctx->i32, 16, false), "");
   3257 		samples = LLVMBuildAnd(ctx->builder, samples,
   3258 				       LLVMConstInt(ctx->i32, 0xf, false), "");
   3259 		samples = LLVMBuildShl(ctx->builder, ctx->i32one,
   3260 				       samples, "");
   3261 		samples = LLVMBuildSelect(ctx->builder, is_msaa, samples,
   3262 					  ctx->i32one, "");
   3263 		result = samples;
   3264 		goto write_result;
   3265 	}
   3266 
   3267 	if (coord)
   3268 		for (chan = 0; chan < instr->coord_components; chan++)
   3269 			coords[chan] = llvm_extract_elem(ctx, coord, chan);
   3270 
   3271 	if (offsets && instr->op != nir_texop_txf) {
   3272 		LLVMValueRef offset[3], pack;
   3273 		for (chan = 0; chan < 3; ++chan)
   3274 			offset[chan] = ctx->i32zero;
   3275 
   3276 		tinfo.has_offset = true;
   3277 		for (chan = 0; chan < get_llvm_num_components(offsets); chan++) {
   3278 			offset[chan] = llvm_extract_elem(ctx, offsets, chan);
   3279 			offset[chan] = LLVMBuildAnd(ctx->builder, offset[chan],
   3280 						    LLVMConstInt(ctx->i32, 0x3f, false), "");
   3281 			if (chan)
   3282 				offset[chan] = LLVMBuildShl(ctx->builder, offset[chan],
   3283 							    LLVMConstInt(ctx->i32, chan * 8, false), "");
   3284 		}
   3285 		pack = LLVMBuildOr(ctx->builder, offset[0], offset[1], "");
   3286 		pack = LLVMBuildOr(ctx->builder, pack, offset[2], "");
   3287 		address[count++] = pack;
   3288 
   3289 	}
   3290 	/* pack LOD bias value */
   3291 	if (instr->op == nir_texop_txb && bias) {
   3292 		address[count++] = bias;
   3293 	}
   3294 
   3295 	/* Pack depth comparison value */
   3296 	if (instr->is_shadow && comparator) {
   3297 		address[count++] = llvm_extract_elem(ctx, comparator, 0);
   3298 	}
   3299 
   3300 	/* pack derivatives */
   3301 	if (ddx || ddy) {
   3302 		switch (instr->sampler_dim) {
   3303 		case GLSL_SAMPLER_DIM_3D:
   3304 		case GLSL_SAMPLER_DIM_CUBE:
   3305 			num_deriv_comp = 3;
   3306 			break;
   3307 		case GLSL_SAMPLER_DIM_2D:
   3308 		default:
   3309 			num_deriv_comp = 2;
   3310 			break;
   3311 		case GLSL_SAMPLER_DIM_1D:
   3312 			num_deriv_comp = 1;
   3313 			break;
   3314 		}
   3315 
   3316 		for (unsigned i = 0; i < num_deriv_comp; i++) {
   3317 			derivs[i * 2] = to_float(ctx, llvm_extract_elem(ctx, ddx, i));
   3318 			derivs[i * 2 + 1] = to_float(ctx, llvm_extract_elem(ctx, ddy, i));
   3319 		}
   3320 	}
   3321 
   3322 	if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
   3323 		for (chan = 0; chan < instr->coord_components; chan++)
   3324 			coords[chan] = to_float(ctx, coords[chan]);
   3325 		if (instr->coord_components == 3)
   3326 			coords[3] = LLVMGetUndef(ctx->f32);
   3327 		ac_prepare_cube_coords(&ctx->ac,
   3328 			instr->op == nir_texop_txd, instr->is_array,
   3329 			coords, derivs);
   3330 		if (num_deriv_comp)
   3331 			num_deriv_comp--;
   3332 	}
   3333 
   3334 	if (ddx || ddy) {
   3335 		for (unsigned i = 0; i < num_deriv_comp * 2; i++)
   3336 			address[count++] = derivs[i];
   3337 	}
   3338 
   3339 	/* Pack texture coordinates */
   3340 	if (coord) {
   3341 		address[count++] = coords[0];
   3342 		if (instr->coord_components > 1) {
   3343 			if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) {
   3344 				coords[1] = apply_round_slice(ctx, coords[1]);
   3345 			}
   3346 			address[count++] = coords[1];
   3347 		}
   3348 		if (instr->coord_components > 2) {
   3349 			/* This seems like a bit of a hack - but it passes Vulkan CTS with it */
   3350 			if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D && instr->op != nir_texop_txf) {
   3351 				coords[2] = apply_round_slice(ctx, coords[2]);
   3352 			}
   3353 			address[count++] = coords[2];
   3354 		}
   3355 	}
   3356 
   3357 	/* Pack LOD */
   3358 	if ((instr->op == nir_texop_txl || instr->op == nir_texop_txf) && lod) {
   3359 		address[count++] = lod;
   3360 	} else if (instr->op == nir_texop_txf_ms && sample_index) {
   3361 		address[count++] = sample_index;
   3362 	} else if(instr->op == nir_texop_txs) {
   3363 		count = 0;
   3364 		if (lod)
   3365 			address[count++] = lod;
   3366 		else
   3367 			address[count++] = ctx->i32zero;
   3368 	}
   3369 
   3370 	for (chan = 0; chan < count; chan++) {
   3371 		address[chan] = LLVMBuildBitCast(ctx->builder,
   3372 						 address[chan], ctx->i32, "");
   3373 	}
   3374 
   3375 	if (instr->op == nir_texop_samples_identical) {
   3376 		LLVMValueRef txf_address[4];
   3377 		struct ac_tex_info txf_info = { 0 };
   3378 		unsigned txf_count = count;
   3379 		memcpy(txf_address, address, sizeof(txf_address));
   3380 
   3381 		if (!instr->is_array)
   3382 			txf_address[2] = ctx->i32zero;
   3383 		txf_address[3] = ctx->i32zero;
   3384 
   3385 		set_tex_fetch_args(ctx, &txf_info, instr, nir_texop_txf,
   3386 				   fmask_ptr, NULL,
   3387 				   txf_address, txf_count, 0xf);
   3388 
   3389 		result = build_tex_intrinsic(ctx, instr, &txf_info);
   3390 
   3391 		result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
   3392 		result = emit_int_cmp(ctx, LLVMIntEQ, result, ctx->i32zero);
   3393 		goto write_result;
   3394 	}
   3395 
   3396 	/* Adjust the sample index according to FMASK.
   3397 	 *
   3398 	 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
   3399 	 * which is the identity mapping. Each nibble says which physical sample
   3400 	 * should be fetched to get that sample.
   3401 	 *
   3402 	 * For example, 0x11111100 means there are only 2 samples stored and
   3403 	 * the second sample covers 3/4 of the pixel. When reading samples 0
   3404 	 * and 1, return physical sample 0 (determined by the first two 0s
   3405 	 * in FMASK), otherwise return physical sample 1.
   3406 	 *
   3407 	 * The sample index should be adjusted as follows:
   3408 	 *   sample_index = (fmask >> (sample_index * 4)) & 0xF;
   3409 	 */
   3410 	if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS &&
   3411 	    instr->op != nir_texop_txs) {
   3412 		LLVMValueRef txf_address[4];
   3413 		struct ac_tex_info txf_info = { 0 };
   3414 		unsigned txf_count = count;
   3415 		memcpy(txf_address, address, sizeof(txf_address));
   3416 
   3417 		if (!instr->is_array)
   3418 			txf_address[2] = ctx->i32zero;
   3419 		txf_address[3] = ctx->i32zero;
   3420 
   3421 		set_tex_fetch_args(ctx, &txf_info, instr, nir_texop_txf,
   3422 				   fmask_ptr, NULL,
   3423 				   txf_address, txf_count, 0xf);
   3424 
   3425 		result = build_tex_intrinsic(ctx, instr, &txf_info);
   3426 		LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false);
   3427 		LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false);
   3428 
   3429 		LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder,
   3430 							     result,
   3431 							     ctx->i32zero, "");
   3432 
   3433 		unsigned sample_chan = instr->is_array ? 3 : 2;
   3434 
   3435 		LLVMValueRef sample_index4 =
   3436 			LLVMBuildMul(ctx->builder, address[sample_chan], four, "");
   3437 		LLVMValueRef shifted_fmask =
   3438 			LLVMBuildLShr(ctx->builder, fmask, sample_index4, "");
   3439 		LLVMValueRef final_sample =
   3440 			LLVMBuildAnd(ctx->builder, shifted_fmask, F, "");
   3441 
   3442 		/* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
   3443 		 * resource descriptor is 0 (invalid),
   3444 		 */
   3445 		LLVMValueRef fmask_desc =
   3446 			LLVMBuildBitCast(ctx->builder, fmask_ptr,
   3447 					 ctx->v8i32, "");
   3448 
   3449 		LLVMValueRef fmask_word1 =
   3450 			LLVMBuildExtractElement(ctx->builder, fmask_desc,
   3451 						ctx->i32one, "");
   3452 
   3453 		LLVMValueRef word1_is_nonzero =
   3454 			LLVMBuildICmp(ctx->builder, LLVMIntNE,
   3455 				      fmask_word1, ctx->i32zero, "");
   3456 
   3457 		/* Replace the MSAA sample index. */
   3458 		address[sample_chan] =
   3459 			LLVMBuildSelect(ctx->builder, word1_is_nonzero,
   3460 					final_sample, address[sample_chan], "");
   3461 	}
   3462 
   3463 	if (offsets && instr->op == nir_texop_txf) {
   3464 		nir_const_value *const_offset =
   3465 			nir_src_as_const_value(instr->src[const_src].src);
   3466 		int num_offsets = instr->src[const_src].src.ssa->num_components;
   3467 		assert(const_offset);
   3468 		num_offsets = MIN2(num_offsets, instr->coord_components);
   3469 		if (num_offsets > 2)
   3470 			address[2] = LLVMBuildAdd(ctx->builder,
   3471 						  address[2], LLVMConstInt(ctx->i32, const_offset->i32[2], false), "");
   3472 		if (num_offsets > 1)
   3473 			address[1] = LLVMBuildAdd(ctx->builder,
   3474 						  address[1], LLVMConstInt(ctx->i32, const_offset->i32[1], false), "");
   3475 		address[0] = LLVMBuildAdd(ctx->builder,
   3476 					  address[0], LLVMConstInt(ctx->i32, const_offset->i32[0], false), "");
   3477 
   3478 	}
   3479 
   3480 	/* TODO TG4 support */
   3481 	if (instr->op == nir_texop_tg4) {
   3482 		if (instr->is_shadow)
   3483 			dmask = 1;
   3484 		else
   3485 			dmask = 1 << instr->component;
   3486 	}
   3487 	set_tex_fetch_args(ctx, &tinfo, instr, instr->op,
   3488 			   res_ptr, samp_ptr, address, count, dmask);
   3489 
   3490 	result = build_tex_intrinsic(ctx, instr, &tinfo);
   3491 
   3492 	if (instr->op == nir_texop_query_levels)
   3493 		result = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, 3, false), "");
   3494 	else if (instr->is_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod && instr->op != nir_texop_tg4)
   3495 		result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
   3496 	else if (instr->op == nir_texop_txs &&
   3497 		 instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
   3498 		 instr->is_array) {
   3499 		LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
   3500 		LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
   3501 		LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, result, two, "");
   3502 		z = LLVMBuildSDiv(ctx->builder, z, six, "");
   3503 		result = LLVMBuildInsertElement(ctx->builder, result, z, two, "");
   3504 	} else if (instr->dest.ssa.num_components != 4)
   3505 		result = trim_vector(ctx, result, instr->dest.ssa.num_components);
   3506 
   3507 write_result:
   3508 	if (result) {
   3509 		assert(instr->dest.is_ssa);
   3510 		result = to_integer(ctx, result);
   3511 		_mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
   3512 	}
   3513 }
   3514 
   3515 
   3516 static void visit_phi(struct nir_to_llvm_context *ctx, nir_phi_instr *instr)
   3517 {
   3518 	LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
   3519 	LLVMValueRef result = LLVMBuildPhi(ctx->builder, type, "");
   3520 
   3521 	_mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
   3522 	_mesa_hash_table_insert(ctx->phis, instr, result);
   3523 }
   3524 
   3525 static void visit_post_phi(struct nir_to_llvm_context *ctx,
   3526                            nir_phi_instr *instr,
   3527                            LLVMValueRef llvm_phi)
   3528 {
   3529 	nir_foreach_phi_src(src, instr) {
   3530 		LLVMBasicBlockRef block = get_block(ctx, src->pred);
   3531 		LLVMValueRef llvm_src = get_src(ctx, src->src);
   3532 
   3533 		LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
   3534 	}
   3535 }
   3536 
   3537 static void phi_post_pass(struct nir_to_llvm_context *ctx)
   3538 {
   3539 	struct hash_entry *entry;
   3540 	hash_table_foreach(ctx->phis, entry) {
   3541 		visit_post_phi(ctx, (nir_phi_instr*)entry->key,
   3542 		               (LLVMValueRef)entry->data);
   3543 	}
   3544 }
   3545 
   3546 
   3547 static void visit_ssa_undef(struct nir_to_llvm_context *ctx,
   3548 			    nir_ssa_undef_instr *instr)
   3549 {
   3550 	unsigned num_components = instr->def.num_components;
   3551 	LLVMValueRef undef;
   3552 
   3553 	if (num_components == 1)
   3554 		undef = LLVMGetUndef(ctx->i32);
   3555 	else {
   3556 		undef = LLVMGetUndef(LLVMVectorType(ctx->i32, num_components));
   3557 	}
   3558 	_mesa_hash_table_insert(ctx->defs, &instr->def, undef);
   3559 }
   3560 
   3561 static void visit_jump(struct nir_to_llvm_context *ctx,
   3562 		       nir_jump_instr *instr)
   3563 {
   3564 	switch (instr->type) {
   3565 	case nir_jump_break:
   3566 		LLVMBuildBr(ctx->builder, ctx->break_block);
   3567 		LLVMClearInsertionPosition(ctx->builder);
   3568 		break;
   3569 	case nir_jump_continue:
   3570 		LLVMBuildBr(ctx->builder, ctx->continue_block);
   3571 		LLVMClearInsertionPosition(ctx->builder);
   3572 		break;
   3573 	default:
   3574 		fprintf(stderr, "Unknown NIR jump instr: ");
   3575 		nir_print_instr(&instr->instr, stderr);
   3576 		fprintf(stderr, "\n");
   3577 		abort();
   3578 	}
   3579 }
   3580 
   3581 static void visit_cf_list(struct nir_to_llvm_context *ctx,
   3582                           struct exec_list *list);
   3583 
   3584 static void visit_block(struct nir_to_llvm_context *ctx, nir_block *block)
   3585 {
   3586 	LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->builder);
   3587 	nir_foreach_instr(instr, block)
   3588 	{
   3589 		switch (instr->type) {
   3590 		case nir_instr_type_alu:
   3591 			visit_alu(ctx, nir_instr_as_alu(instr));
   3592 			break;
   3593 		case nir_instr_type_load_const:
   3594 			visit_load_const(ctx, nir_instr_as_load_const(instr));
   3595 			break;
   3596 		case nir_instr_type_intrinsic:
   3597 			visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
   3598 			break;
   3599 		case nir_instr_type_tex:
   3600 			visit_tex(ctx, nir_instr_as_tex(instr));
   3601 			break;
   3602 		case nir_instr_type_phi:
   3603 			visit_phi(ctx, nir_instr_as_phi(instr));
   3604 			break;
   3605 		case nir_instr_type_ssa_undef:
   3606 			visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
   3607 			break;
   3608 		case nir_instr_type_jump:
   3609 			visit_jump(ctx, nir_instr_as_jump(instr));
   3610 			break;
   3611 		default:
   3612 			fprintf(stderr, "Unknown NIR instr type: ");
   3613 			nir_print_instr(instr, stderr);
   3614 			fprintf(stderr, "\n");
   3615 			abort();
   3616 		}
   3617 	}
   3618 
   3619 	_mesa_hash_table_insert(ctx->defs, block, llvm_block);
   3620 }
   3621 
   3622 static void visit_if(struct nir_to_llvm_context *ctx, nir_if *if_stmt)
   3623 {
   3624 	LLVMValueRef value = get_src(ctx, if_stmt->condition);
   3625 
   3626 	LLVMBasicBlockRef merge_block =
   3627 	    LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
   3628 	LLVMBasicBlockRef if_block =
   3629 	    LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
   3630 	LLVMBasicBlockRef else_block = merge_block;
   3631 	if (!exec_list_is_empty(&if_stmt->else_list))
   3632 		else_block = LLVMAppendBasicBlockInContext(
   3633 		    ctx->context, ctx->main_function, "");
   3634 
   3635 	LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, value,
   3636 	                                  LLVMConstInt(ctx->i32, 0, false), "");
   3637 	LLVMBuildCondBr(ctx->builder, cond, if_block, else_block);
   3638 
   3639 	LLVMPositionBuilderAtEnd(ctx->builder, if_block);
   3640 	visit_cf_list(ctx, &if_stmt->then_list);
   3641 	if (LLVMGetInsertBlock(ctx->builder))
   3642 		LLVMBuildBr(ctx->builder, merge_block);
   3643 
   3644 	if (!exec_list_is_empty(&if_stmt->else_list)) {
   3645 		LLVMPositionBuilderAtEnd(ctx->builder, else_block);
   3646 		visit_cf_list(ctx, &if_stmt->else_list);
   3647 		if (LLVMGetInsertBlock(ctx->builder))
   3648 			LLVMBuildBr(ctx->builder, merge_block);
   3649 	}
   3650 
   3651 	LLVMPositionBuilderAtEnd(ctx->builder, merge_block);
   3652 }
   3653 
   3654 static void visit_loop(struct nir_to_llvm_context *ctx, nir_loop *loop)
   3655 {
   3656 	LLVMBasicBlockRef continue_parent = ctx->continue_block;
   3657 	LLVMBasicBlockRef break_parent = ctx->break_block;
   3658 
   3659 	ctx->continue_block =
   3660 	    LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
   3661 	ctx->break_block =
   3662 	    LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
   3663 
   3664 	LLVMBuildBr(ctx->builder, ctx->continue_block);
   3665 	LLVMPositionBuilderAtEnd(ctx->builder, ctx->continue_block);
   3666 	visit_cf_list(ctx, &loop->body);
   3667 
   3668 	if (LLVMGetInsertBlock(ctx->builder))
   3669 		LLVMBuildBr(ctx->builder, ctx->continue_block);
   3670 	LLVMPositionBuilderAtEnd(ctx->builder, ctx->break_block);
   3671 
   3672 	ctx->continue_block = continue_parent;
   3673 	ctx->break_block = break_parent;
   3674 }
   3675 
   3676 static void visit_cf_list(struct nir_to_llvm_context *ctx,
   3677                           struct exec_list *list)
   3678 {
   3679 	foreach_list_typed(nir_cf_node, node, node, list)
   3680 	{
   3681 		switch (node->type) {
   3682 		case nir_cf_node_block:
   3683 			visit_block(ctx, nir_cf_node_as_block(node));
   3684 			break;
   3685 
   3686 		case nir_cf_node_if:
   3687 			visit_if(ctx, nir_cf_node_as_if(node));
   3688 			break;
   3689 
   3690 		case nir_cf_node_loop:
   3691 			visit_loop(ctx, nir_cf_node_as_loop(node));
   3692 			break;
   3693 
   3694 		default:
   3695 			assert(0);
   3696 		}
   3697 	}
   3698 }
   3699 
   3700 static void
   3701 handle_vs_input_decl(struct nir_to_llvm_context *ctx,
   3702 		     struct nir_variable *variable)
   3703 {
   3704 	LLVMValueRef t_list_ptr = ctx->vertex_buffers;
   3705 	LLVMValueRef t_offset;
   3706 	LLVMValueRef t_list;
   3707 	LLVMValueRef args[3];
   3708 	LLVMValueRef input;
   3709 	LLVMValueRef buffer_index;
   3710 	int index = variable->data.location - VERT_ATTRIB_GENERIC0;
   3711 	int idx = variable->data.location;
   3712 	unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
   3713 
   3714 	variable->data.driver_location = idx * 4;
   3715 
   3716 	if (ctx->options->key.vs.instance_rate_inputs & (1u << index)) {
   3717 		buffer_index = LLVMBuildAdd(ctx->builder, ctx->instance_id,
   3718 					    ctx->start_instance, "");
   3719 		ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
   3720 		                            ctx->shader_info->vs.vgpr_comp_cnt);
   3721 	} else
   3722 		buffer_index = LLVMBuildAdd(ctx->builder, ctx->vertex_id,
   3723 					    ctx->base_vertex, "");
   3724 
   3725 	for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
   3726 		t_offset = LLVMConstInt(ctx->i32, index + i, false);
   3727 
   3728 		t_list = build_indexed_load_const(ctx, t_list_ptr, t_offset);
   3729 		args[0] = t_list;
   3730 		args[1] = LLVMConstInt(ctx->i32, 0, false);
   3731 		args[2] = buffer_index;
   3732 		input = ac_emit_llvm_intrinsic(&ctx->ac,
   3733 			"llvm.SI.vs.load.input", ctx->v4f32, args, 3,
   3734 			AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
   3735 
   3736 		for (unsigned chan = 0; chan < 4; chan++) {
   3737 			LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
   3738 			ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
   3739 				to_integer(ctx, LLVMBuildExtractElement(ctx->builder,
   3740 							input, llvm_chan, ""));
   3741 		}
   3742 	}
   3743 }
   3744 
   3745 
   3746 static void interp_fs_input(struct nir_to_llvm_context *ctx,
   3747 			    unsigned attr,
   3748 			    LLVMValueRef interp_param,
   3749 			    LLVMValueRef prim_mask,
   3750 			    LLVMValueRef result[4])
   3751 {
   3752 	const char *intr_name;
   3753 	LLVMValueRef attr_number;
   3754 	unsigned chan;
   3755 
   3756 	attr_number = LLVMConstInt(ctx->i32, attr, false);
   3757 
   3758 	/* fs.constant returns the param from the middle vertex, so it's not
   3759 	 * really useful for flat shading. It's meant to be used for custom
   3760 	 * interpolation (but the intrinsic can't fetch from the other two
   3761 	 * vertices).
   3762 	 *
   3763 	 * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
   3764 	 * to do the right thing. The only reason we use fs.constant is that
   3765 	 * fs.interp cannot be used on integers, because they can be equal
   3766 	 * to NaN.
   3767 	 */
   3768 	intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
   3769 
   3770 	for (chan = 0; chan < 4; chan++) {
   3771 		LLVMValueRef args[4];
   3772 		LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
   3773 
   3774 		args[0] = llvm_chan;
   3775 		args[1] = attr_number;
   3776 		args[2] = prim_mask;
   3777 		args[3] = interp_param;
   3778 		result[chan] = ac_emit_llvm_intrinsic(&ctx->ac, intr_name,
   3779 						   ctx->f32, args, args[3] ? 4 : 3,
   3780 						  AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
   3781 	}
   3782 }
   3783 
   3784 static void
   3785 handle_fs_input_decl(struct nir_to_llvm_context *ctx,
   3786 		     struct nir_variable *variable)
   3787 {
   3788 	int idx = variable->data.location;
   3789 	unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
   3790 	LLVMValueRef interp;
   3791 
   3792 	variable->data.driver_location = idx * 4;
   3793 	ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location;
   3794 
   3795 	if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
   3796 		unsigned interp_type;
   3797 		if (variable->data.sample) {
   3798 			interp_type = INTERP_SAMPLE;
   3799 			ctx->shader_info->fs.force_persample = true;
   3800 		} else if (variable->data.centroid)
   3801 			interp_type = INTERP_CENTROID;
   3802 		else
   3803 			interp_type = INTERP_CENTER;
   3804 
   3805 		interp = lookup_interp_param(ctx, variable->data.interpolation, interp_type);
   3806 	} else
   3807 		interp = NULL;
   3808 
   3809 	for (unsigned i = 0; i < attrib_count; ++i)
   3810 		ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp;
   3811 
   3812 }
   3813 
   3814 static void
   3815 handle_shader_input_decl(struct nir_to_llvm_context *ctx,
   3816 			 struct nir_variable *variable)
   3817 {
   3818 	switch (ctx->stage) {
   3819 	case MESA_SHADER_VERTEX:
   3820 		handle_vs_input_decl(ctx, variable);
   3821 		break;
   3822 	case MESA_SHADER_FRAGMENT:
   3823 		handle_fs_input_decl(ctx, variable);
   3824 		break;
   3825 	default:
   3826 		break;
   3827 	}
   3828 
   3829 }
   3830 
   3831 static void
   3832 handle_fs_inputs_pre(struct nir_to_llvm_context *ctx,
   3833 		     struct nir_shader *nir)
   3834 {
   3835 	unsigned index = 0;
   3836 	for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) {
   3837 		LLVMValueRef interp_param;
   3838 		LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0);
   3839 
   3840 		if (!(ctx->input_mask & (1ull << i)))
   3841 			continue;
   3842 
   3843 		if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC) {
   3844 			interp_param = *inputs;
   3845 			interp_fs_input(ctx, index, interp_param, ctx->prim_mask,
   3846 					inputs);
   3847 
   3848 			if (!interp_param)
   3849 				ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
   3850 			++index;
   3851 		} else if (i == VARYING_SLOT_POS) {
   3852 			for(int i = 0; i < 3; ++i)
   3853 				inputs[i] = ctx->frag_pos[i];
   3854 
   3855 			inputs[3] = ac_emit_fdiv(&ctx->ac, ctx->f32one, ctx->frag_pos[3]);
   3856 		}
   3857 	}
   3858 	ctx->shader_info->fs.num_interp = index;
   3859 	if (ctx->input_mask & (1 << VARYING_SLOT_PNTC))
   3860 		ctx->shader_info->fs.has_pcoord = true;
   3861 	ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0;
   3862 }
   3863 
   3864 static LLVMValueRef
   3865 ac_build_alloca(struct nir_to_llvm_context *ctx,
   3866                 LLVMTypeRef type,
   3867                 const char *name)
   3868 {
   3869 	LLVMBuilderRef builder = ctx->builder;
   3870 	LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
   3871 	LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
   3872 	LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
   3873 	LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
   3874 	LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ctx->context);
   3875 	LLVMValueRef res;
   3876 
   3877 	if (first_instr) {
   3878 		LLVMPositionBuilderBefore(first_builder, first_instr);
   3879 	} else {
   3880 		LLVMPositionBuilderAtEnd(first_builder, first_block);
   3881 	}
   3882 
   3883 	res = LLVMBuildAlloca(first_builder, type, name);
   3884 	LLVMBuildStore(builder, LLVMConstNull(type), res);
   3885 
   3886 	LLVMDisposeBuilder(first_builder);
   3887 
   3888 	return res;
   3889 }
   3890 
   3891 static LLVMValueRef si_build_alloca_undef(struct nir_to_llvm_context *ctx,
   3892 					  LLVMTypeRef type,
   3893 					  const char *name)
   3894 {
   3895 	LLVMValueRef ptr = ac_build_alloca(ctx, type, name);
   3896 	LLVMBuildStore(ctx->builder, LLVMGetUndef(type), ptr);
   3897 	return ptr;
   3898 }
   3899 
   3900 static void
   3901 handle_shader_output_decl(struct nir_to_llvm_context *ctx,
   3902 			  struct nir_variable *variable)
   3903 {
   3904 	int idx = variable->data.location + variable->data.index;
   3905 	unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
   3906 
   3907 	variable->data.driver_location = idx * 4;
   3908 
   3909 	if (ctx->stage == MESA_SHADER_VERTEX) {
   3910 
   3911 		if (idx == VARYING_SLOT_CLIP_DIST0 ||
   3912 		    idx == VARYING_SLOT_CULL_DIST0) {
   3913 			int length = glsl_get_length(variable->type);
   3914 			if (idx == VARYING_SLOT_CLIP_DIST0) {
   3915 				ctx->shader_info->vs.clip_dist_mask = (1 << length) - 1;
   3916 				ctx->num_clips = length;
   3917 			} else if (idx == VARYING_SLOT_CULL_DIST0) {
   3918 				ctx->shader_info->vs.cull_dist_mask = (1 << length) - 1;
   3919 				ctx->num_culls = length;
   3920 			}
   3921 			if (length > 4)
   3922 				attrib_count = 2;
   3923 			else
   3924 				attrib_count = 1;
   3925 		}
   3926 	}
   3927 
   3928 	for (unsigned i = 0; i < attrib_count; ++i) {
   3929 		for (unsigned chan = 0; chan < 4; chan++) {
   3930 			ctx->outputs[radeon_llvm_reg_index_soa(idx + i, chan)] =
   3931 		                       si_build_alloca_undef(ctx, ctx->f32, "");
   3932 		}
   3933 	}
   3934 	ctx->output_mask |= ((1ull << attrib_count) - 1) << idx;
   3935 }
   3936 
   3937 static void
   3938 setup_locals(struct nir_to_llvm_context *ctx,
   3939 	     struct nir_function *func)
   3940 {
   3941 	int i, j;
   3942 	ctx->num_locals = 0;
   3943 	nir_foreach_variable(variable, &func->impl->locals) {
   3944 		unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
   3945 		variable->data.driver_location = ctx->num_locals * 4;
   3946 		ctx->num_locals += attrib_count;
   3947 	}
   3948 	ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
   3949 	if (!ctx->locals)
   3950 	    return;
   3951 
   3952 	for (i = 0; i < ctx->num_locals; i++) {
   3953 		for (j = 0; j < 4; j++) {
   3954 			ctx->locals[i * 4 + j] =
   3955 				si_build_alloca_undef(ctx, ctx->f32, "temp");
   3956 		}
   3957 	}
   3958 }
   3959 
   3960 static LLVMValueRef
   3961 emit_float_saturate(struct nir_to_llvm_context *ctx, LLVMValueRef v, float lo, float hi)
   3962 {
   3963 	v = to_float(ctx, v);
   3964 	v = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", v, LLVMConstReal(ctx->f32, lo));
   3965 	return emit_intrin_2f_param(ctx, "llvm.minnum.f32", v, LLVMConstReal(ctx->f32, hi));
   3966 }
   3967 
   3968 
   3969 static LLVMValueRef emit_pack_int16(struct nir_to_llvm_context *ctx,
   3970 					LLVMValueRef src0, LLVMValueRef src1)
   3971 {
   3972 	LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
   3973 	LLVMValueRef comp[2];
   3974 
   3975 	comp[0] = LLVMBuildAnd(ctx->builder, src0, LLVMConstInt(ctx-> i32, 65535, 0), "");
   3976 	comp[1] = LLVMBuildAnd(ctx->builder, src1, LLVMConstInt(ctx-> i32, 65535, 0), "");
   3977 	comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
   3978 	return LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
   3979 }
   3980 
   3981 /* Initialize arguments for the shader export intrinsic */
   3982 static void
   3983 si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
   3984 			 LLVMValueRef *values,
   3985 			 unsigned target,
   3986 			 LLVMValueRef *args)
   3987 {
   3988 	/* Default is 0xf. Adjusted below depending on the format. */
   3989 	args[0] = LLVMConstInt(ctx->i32, target != V_008DFC_SQ_EXP_NULL ? 0xf : 0, false);
   3990 	/* Specify whether the EXEC mask represents the valid mask */
   3991 	args[1] = LLVMConstInt(ctx->i32, 0, false);
   3992 
   3993 	/* Specify whether this is the last export */
   3994 	args[2] = LLVMConstInt(ctx->i32, 0, false);
   3995 	/* Specify the target we are exporting */
   3996 	args[3] = LLVMConstInt(ctx->i32, target, false);
   3997 
   3998 	args[4] = LLVMConstInt(ctx->i32, 0, false); /* COMPR flag */
   3999 	args[5] = LLVMGetUndef(ctx->f32);
   4000 	args[6] = LLVMGetUndef(ctx->f32);
   4001 	args[7] = LLVMGetUndef(ctx->f32);
   4002 	args[8] = LLVMGetUndef(ctx->f32);
   4003 
   4004 	if (!values)
   4005 		return;
   4006 
   4007 	if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) {
   4008 		LLVMValueRef val[4];
   4009 		unsigned index = target - V_008DFC_SQ_EXP_MRT;
   4010 		unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf;
   4011 		bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
   4012 
   4013 		switch(col_format) {
   4014 		case V_028714_SPI_SHADER_ZERO:
   4015 			args[0] = LLVMConstInt(ctx->i32, 0x0, 0);
   4016 			args[3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_NULL, 0);
   4017 			break;
   4018 
   4019 		case V_028714_SPI_SHADER_32_R:
   4020 			args[0] = LLVMConstInt(ctx->i32, 0x1, 0);
   4021 			args[5] = values[0];
   4022 			break;
   4023 
   4024 		case V_028714_SPI_SHADER_32_GR:
   4025 			args[0] = LLVMConstInt(ctx->i32, 0x3, 0);
   4026 			args[5] = values[0];
   4027 			args[6] = values[1];
   4028 			break;
   4029 
   4030 		case V_028714_SPI_SHADER_32_AR:
   4031 			args[0] = LLVMConstInt(ctx->i32, 0x9, 0);
   4032 			args[5] = values[0];
   4033 			args[8] = values[3];
   4034 			break;
   4035 
   4036 		case V_028714_SPI_SHADER_FP16_ABGR:
   4037 			args[4] = ctx->i32one;
   4038 
   4039 			for (unsigned chan = 0; chan < 2; chan++) {
   4040 				LLVMValueRef pack_args[2] = {
   4041 					values[2 * chan],
   4042 					values[2 * chan + 1]
   4043 				};
   4044 				LLVMValueRef packed;
   4045 
   4046 				packed = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.packf16",
   4047 							     ctx->i32, pack_args, 2,
   4048 							     AC_FUNC_ATTR_READNONE);
   4049 				args[chan + 5] = packed;
   4050 			}
   4051 			break;
   4052 
   4053 		case V_028714_SPI_SHADER_UNORM16_ABGR:
   4054 			for (unsigned chan = 0; chan < 4; chan++) {
   4055 				val[chan] = emit_float_saturate(ctx, values[chan], 0, 1);
   4056 				val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
   4057 							LLVMConstReal(ctx->f32, 65535), "");
   4058 				val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
   4059 							LLVMConstReal(ctx->f32, 0.5), "");
   4060 				val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan],
   4061 							ctx->i32, "");
   4062 			}
   4063 
   4064 			args[4] = ctx->i32one;
   4065 			args[5] = emit_pack_int16(ctx, val[0], val[1]);
   4066 			args[6] = emit_pack_int16(ctx, val[2], val[3]);
   4067 			break;
   4068 
   4069 		case V_028714_SPI_SHADER_SNORM16_ABGR:
   4070 			for (unsigned chan = 0; chan < 4; chan++) {
   4071 				val[chan] = emit_float_saturate(ctx, values[chan], -1, 1);
   4072 				val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
   4073 							LLVMConstReal(ctx->f32, 32767), "");
   4074 
   4075 				/* If positive, add 0.5, else add -0.5. */
   4076 				val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
   4077 						LLVMBuildSelect(ctx->builder,
   4078 							LLVMBuildFCmp(ctx->builder, LLVMRealOGE,
   4079 								val[chan], ctx->f32zero, ""),
   4080 							LLVMConstReal(ctx->f32, 0.5),
   4081 							LLVMConstReal(ctx->f32, -0.5), ""), "");
   4082 				val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->i32, "");
   4083 			}
   4084 
   4085 			args[4] = ctx->i32one;
   4086 			args[5] = emit_pack_int16(ctx, val[0], val[1]);
   4087 			args[6] = emit_pack_int16(ctx, val[2], val[3]);
   4088 			break;
   4089 
   4090 		case V_028714_SPI_SHADER_UINT16_ABGR: {
   4091 			LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 255 : 65535, 0);
   4092 
   4093 			for (unsigned chan = 0; chan < 4; chan++) {
   4094 				val[chan] = to_integer(ctx, values[chan]);
   4095 				val[chan] = emit_minmax_int(ctx, LLVMIntULT, val[chan], max);
   4096 			}
   4097 
   4098 			args[4] = ctx->i32one;
   4099 			args[5] = emit_pack_int16(ctx, val[0], val[1]);
   4100 			args[6] = emit_pack_int16(ctx, val[2], val[3]);
   4101 			break;
   4102 		}
   4103 
   4104 		case V_028714_SPI_SHADER_SINT16_ABGR: {
   4105 			LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 127 : 32767, 0);
   4106 			LLVMValueRef min = LLVMConstInt(ctx->i32, is_int8 ? -128 : -32768, 0);
   4107 
   4108 			/* Clamp. */
   4109 			for (unsigned chan = 0; chan < 4; chan++) {
   4110 				val[chan] = to_integer(ctx, values[chan]);
   4111 				val[chan] = emit_minmax_int(ctx, LLVMIntSLT, val[chan], max);
   4112 				val[chan] = emit_minmax_int(ctx, LLVMIntSGT, val[chan], min);
   4113 			}
   4114 
   4115 			args[4] = ctx->i32one;
   4116 			args[5] = emit_pack_int16(ctx, val[0], val[1]);
   4117 			args[6] = emit_pack_int16(ctx, val[2], val[3]);
   4118 			break;
   4119 		}
   4120 
   4121 		default:
   4122 		case V_028714_SPI_SHADER_32_ABGR:
   4123 			memcpy(&args[5], values, sizeof(values[0]) * 4);
   4124 			break;
   4125 		}
   4126 	} else
   4127 		memcpy(&args[5], values, sizeof(values[0]) * 4);
   4128 
   4129 	for (unsigned i = 5; i < 9; ++i)
   4130 		args[i] = to_float(ctx, args[i]);
   4131 }
   4132 
   4133 static void
   4134 handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
   4135 {
   4136 	uint32_t param_count = 0;
   4137 	unsigned target;
   4138 	unsigned pos_idx, num_pos_exports = 0;
   4139 	LLVMValueRef args[9];
   4140 	LLVMValueRef pos_args[4][9] = { { 0 } };
   4141 	LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL;
   4142 	int i;
   4143 	const uint64_t clip_mask = ctx->output_mask & ((1ull << VARYING_SLOT_CLIP_DIST0) |
   4144 						       (1ull << VARYING_SLOT_CLIP_DIST1) |
   4145 						       (1ull << VARYING_SLOT_CULL_DIST0) |
   4146 						       (1ull << VARYING_SLOT_CULL_DIST1));
   4147 
   4148 	if (clip_mask) {
   4149 		LLVMValueRef slots[8];
   4150 		unsigned j;
   4151 
   4152 		if (ctx->shader_info->vs.cull_dist_mask)
   4153 			ctx->shader_info->vs.cull_dist_mask <<= ctx->num_clips;
   4154 
   4155 		i = VARYING_SLOT_CLIP_DIST0;
   4156 		for (j = 0; j < ctx->num_clips; j++)
   4157 			slots[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
   4158 							       ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
   4159 		i = VARYING_SLOT_CULL_DIST0;
   4160 		for (j = 0; j < ctx->num_culls; j++)
   4161 			slots[ctx->num_clips + j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
   4162 									   ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
   4163 
   4164 		for (i = ctx->num_clips + ctx->num_culls; i < 8; i++)
   4165 			slots[i] = LLVMGetUndef(ctx->f32);
   4166 
   4167 		if (ctx->num_clips + ctx->num_culls > 4) {
   4168 			target = V_008DFC_SQ_EXP_POS + 3;
   4169 			si_llvm_init_export_args(ctx, &slots[4], target, args);
   4170 			memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
   4171 			       args, sizeof(args));
   4172 		}
   4173 
   4174 		target = V_008DFC_SQ_EXP_POS + 2;
   4175 		si_llvm_init_export_args(ctx, &slots[0], target, args);
   4176 		memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
   4177 		       args, sizeof(args));
   4178 
   4179 	}
   4180 
   4181 	for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
   4182 		LLVMValueRef values[4];
   4183 		if (!(ctx->output_mask & (1ull << i)))
   4184 			continue;
   4185 
   4186 		for (unsigned j = 0; j < 4; j++)
   4187 			values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
   4188 					      ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
   4189 
   4190 		if (i == VARYING_SLOT_POS) {
   4191 			target = V_008DFC_SQ_EXP_POS;
   4192 		} else if (i == VARYING_SLOT_CLIP_DIST0 ||
   4193 			   i == VARYING_SLOT_CLIP_DIST1 ||
   4194 			   i == VARYING_SLOT_CULL_DIST0 ||
   4195 			   i == VARYING_SLOT_CULL_DIST1) {
   4196 			continue;
   4197 		} else if (i == VARYING_SLOT_PSIZ) {
   4198 			ctx->shader_info->vs.writes_pointsize = true;
   4199 			psize_value = values[0];
   4200 			continue;
   4201 		} else if (i == VARYING_SLOT_LAYER) {
   4202 			ctx->shader_info->vs.writes_layer = true;
   4203 			layer_value = values[0];
   4204 			continue;
   4205 		} else if (i == VARYING_SLOT_VIEWPORT) {
   4206 			ctx->shader_info->vs.writes_viewport_index = true;
   4207 			viewport_index_value = values[0];
   4208 			continue;
   4209 		} else if (i >= VARYING_SLOT_VAR0) {
   4210 			ctx->shader_info->vs.export_mask |= 1u << (i - VARYING_SLOT_VAR0);
   4211 			target = V_008DFC_SQ_EXP_PARAM + param_count;
   4212 			param_count++;
   4213 		}
   4214 
   4215 		si_llvm_init_export_args(ctx, values, target, args);
   4216 
   4217 		if (target >= V_008DFC_SQ_EXP_POS &&
   4218 		    target <= (V_008DFC_SQ_EXP_POS + 3)) {
   4219 			memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
   4220 			       args, sizeof(args));
   4221 		} else {
   4222 			ac_emit_llvm_intrinsic(&ctx->ac,
   4223 					       "llvm.SI.export",
   4224 					       ctx->voidt,
   4225 					       args, 9, 0);
   4226 		}
   4227 	}
   4228 
   4229 	/* We need to add the position output manually if it's missing. */
   4230 	if (!pos_args[0][0]) {
   4231 		pos_args[0][0] = LLVMConstInt(ctx->i32, 0xf, false);
   4232 		pos_args[0][1] = ctx->i32zero; /* EXEC mask */
   4233 		pos_args[0][2] = ctx->i32zero; /* last export? */
   4234 		pos_args[0][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS, false);
   4235 		pos_args[0][4] = ctx->i32zero; /* COMPR flag */
   4236 		pos_args[0][5] = ctx->f32zero; /* X */
   4237 		pos_args[0][6] = ctx->f32zero; /* Y */
   4238 		pos_args[0][7] = ctx->f32zero; /* Z */
   4239 		pos_args[0][8] = ctx->f32one;  /* W */
   4240 	}
   4241 
   4242 	uint32_t mask = ((ctx->shader_info->vs.writes_pointsize == true ? 1 : 0) |
   4243 			 (ctx->shader_info->vs.writes_layer == true ? 4 : 0) |
   4244 			 (ctx->shader_info->vs.writes_viewport_index == true ? 8 : 0));
   4245 	if (mask) {
   4246 		pos_args[1][0] = LLVMConstInt(ctx->i32, mask, false); /* writemask */
   4247 		pos_args[1][1] = ctx->i32zero;  /* EXEC mask */
   4248 		pos_args[1][2] = ctx->i32zero;  /* last export? */
   4249 		pos_args[1][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + 1, false);
   4250 		pos_args[1][4] = ctx->i32zero;  /* COMPR flag */
   4251 		pos_args[1][5] = ctx->f32zero; /* X */
   4252 		pos_args[1][6] = ctx->f32zero; /* Y */
   4253 		pos_args[1][7] = ctx->f32zero; /* Z */
   4254 		pos_args[1][8] = ctx->f32zero;  /* W */
   4255 
   4256 		if (ctx->shader_info->vs.writes_pointsize == true)
   4257 			pos_args[1][5] = psize_value;
   4258 		if (ctx->shader_info->vs.writes_layer == true)
   4259 			pos_args[1][7] = layer_value;
   4260 		if (ctx->shader_info->vs.writes_viewport_index == true)
   4261 			pos_args[1][8] = viewport_index_value;
   4262 	}
   4263 	for (i = 0; i < 4; i++) {
   4264 		if (pos_args[i][0])
   4265 			num_pos_exports++;
   4266 	}
   4267 
   4268 	pos_idx = 0;
   4269 	for (i = 0; i < 4; i++) {
   4270 		if (!pos_args[i][0])
   4271 			continue;
   4272 
   4273 		/* Specify the target we are exporting */
   4274 		pos_args[i][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + pos_idx++, false);
   4275 		if (pos_idx == num_pos_exports)
   4276 			pos_args[i][2] = ctx->i32one;
   4277 		ac_emit_llvm_intrinsic(&ctx->ac,
   4278 				       "llvm.SI.export",
   4279 				       ctx->voidt,
   4280 				       pos_args[i], 9, 0);
   4281 	}
   4282 
   4283 	ctx->shader_info->vs.pos_exports = num_pos_exports;
   4284 	ctx->shader_info->vs.param_exports = param_count;
   4285 }
   4286 
   4287 static void
   4288 si_export_mrt_color(struct nir_to_llvm_context *ctx,
   4289 		    LLVMValueRef *color, unsigned param, bool is_last)
   4290 {
   4291 	LLVMValueRef args[9];
   4292 	/* Export */
   4293 	si_llvm_init_export_args(ctx, color, param,
   4294 				 args);
   4295 
   4296 	if (is_last) {
   4297 		args[1] = ctx->i32one; /* whether the EXEC mask is valid */
   4298 		args[2] = ctx->i32one; /* DONE bit */
   4299 	} else if (args[0] == ctx->i32zero)
   4300 		return; /* unnecessary NULL export */
   4301 
   4302 	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
   4303 			    ctx->voidt, args, 9, 0);
   4304 }
   4305 
   4306 static void
   4307 si_export_mrt_z(struct nir_to_llvm_context *ctx,
   4308 		LLVMValueRef depth, LLVMValueRef stencil,
   4309 		LLVMValueRef samplemask)
   4310 {
   4311 	LLVMValueRef args[9];
   4312 	unsigned mask = 0;
   4313 	args[1] = ctx->i32one; /* whether the EXEC mask is valid */
   4314 	args[2] = ctx->i32one; /* DONE bit */
   4315 	/* Specify the target we are exporting */
   4316 	args[3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_MRTZ, false);
   4317 
   4318 	args[4] = ctx->i32zero; /* COMP flag */
   4319 	args[5] = LLVMGetUndef(ctx->f32); /* R, depth */
   4320 	args[6] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */
   4321 	args[7] = LLVMGetUndef(ctx->f32); /* B, sample mask */
   4322 	args[8] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */
   4323 
   4324 	if (depth) {
   4325 		args[5] = depth;
   4326 		mask |= 0x1;
   4327 	}
   4328 
   4329 	if (stencil) {
   4330 		args[6] = stencil;
   4331 		mask |= 0x2;
   4332 	}
   4333 
   4334 	if (samplemask) {
   4335 		args[7] = samplemask;
   4336 		mask |= 0x04;
   4337 	}
   4338 
   4339 	/* SI (except OLAND) has a bug that it only looks
   4340 	 * at the X writemask component. */
   4341 	if (ctx->options->chip_class == SI &&
   4342 	    ctx->options->family != CHIP_OLAND)
   4343 		mask |= 0x01;
   4344 
   4345 	args[0] = LLVMConstInt(ctx->i32, mask, false);
   4346 	ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
   4347 			    ctx->voidt, args, 9, 0);
   4348 }
   4349 
   4350 static void
   4351 handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
   4352 {
   4353 	unsigned index = 0;
   4354 	LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
   4355 
   4356 	for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
   4357 		LLVMValueRef values[4];
   4358 
   4359 		if (!(ctx->output_mask & (1ull << i)))
   4360 			continue;
   4361 
   4362 		if (i == FRAG_RESULT_DEPTH) {
   4363 			ctx->shader_info->fs.writes_z = true;
   4364 			depth = to_float(ctx, LLVMBuildLoad(ctx->builder,
   4365 							    ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
   4366 		} else if (i == FRAG_RESULT_STENCIL) {
   4367 			ctx->shader_info->fs.writes_stencil = true;
   4368 			stencil = to_float(ctx, LLVMBuildLoad(ctx->builder,
   4369 							      ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
   4370 		} else {
   4371 			bool last = false;
   4372 			for (unsigned j = 0; j < 4; j++)
   4373 				values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
   4374 									ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
   4375 
   4376 			if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil)
   4377 				last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
   4378 
   4379 			si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + index, last);
   4380 			index++;
   4381 		}
   4382 	}
   4383 
   4384 	if (depth || stencil)
   4385 		si_export_mrt_z(ctx, depth, stencil, samplemask);
   4386 	else if (!index)
   4387 		si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true);
   4388 
   4389 	ctx->shader_info->fs.output_mask = index ? ((1ull << index) - 1) : 0;
   4390 }
   4391 
   4392 static void
   4393 handle_shader_outputs_post(struct nir_to_llvm_context *ctx)
   4394 {
   4395 	switch (ctx->stage) {
   4396 	case MESA_SHADER_VERTEX:
   4397 		handle_vs_outputs_post(ctx);
   4398 		break;
   4399 	case MESA_SHADER_FRAGMENT:
   4400 		handle_fs_outputs_post(ctx);
   4401 		break;
   4402 	default:
   4403 		break;
   4404 	}
   4405 }
   4406 
   4407 static void
   4408 handle_shared_compute_var(struct nir_to_llvm_context *ctx,
   4409 			  struct nir_variable *variable, uint32_t *offset, int idx)
   4410 {
   4411 	unsigned size = glsl_count_attribute_slots(variable->type, false);
   4412 	variable->data.driver_location = *offset;
   4413 	*offset += size;
   4414 }
   4415 
   4416 static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
   4417 {
   4418 	LLVMPassManagerRef passmgr;
   4419 	/* Create the pass manager */
   4420 	passmgr = LLVMCreateFunctionPassManagerForModule(
   4421 							ctx->module);
   4422 
   4423 	/* This pass should eliminate all the load and store instructions */
   4424 	LLVMAddPromoteMemoryToRegisterPass(passmgr);
   4425 
   4426 	/* Add some optimization passes */
   4427 	LLVMAddScalarReplAggregatesPass(passmgr);
   4428 	LLVMAddLICMPass(passmgr);
   4429 	LLVMAddAggressiveDCEPass(passmgr);
   4430 	LLVMAddCFGSimplificationPass(passmgr);
   4431 	LLVMAddInstructionCombiningPass(passmgr);
   4432 
   4433 	/* Run the pass */
   4434 	LLVMInitializeFunctionPassManager(passmgr);
   4435 	LLVMRunFunctionPassManager(passmgr, ctx->main_function);
   4436 	LLVMFinalizeFunctionPassManager(passmgr);
   4437 
   4438 	LLVMDisposeBuilder(ctx->builder);
   4439 	LLVMDisposePassManager(passmgr);
   4440 }
   4441 
   4442 static
   4443 LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
   4444                                        struct nir_shader *nir,
   4445                                        struct ac_shader_variant_info *shader_info,
   4446                                        const struct ac_nir_compiler_options *options)
   4447 {
   4448 	struct nir_to_llvm_context ctx = {0};
   4449 	struct nir_function *func;
   4450 	unsigned i;
   4451 	ctx.options = options;
   4452 	ctx.shader_info = shader_info;
   4453 	ctx.context = LLVMContextCreate();
   4454 	ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
   4455 
   4456 	ac_llvm_context_init(&ctx.ac, ctx.context);
   4457 	ctx.ac.module = ctx.module;
   4458 
   4459 	ctx.has_ds_bpermute = ctx.options->chip_class >= VI;
   4460 
   4461 	memset(shader_info, 0, sizeof(*shader_info));
   4462 
   4463 	LLVMSetTarget(ctx.module, "amdgcn--");
   4464 
   4465 	LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
   4466 	char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
   4467 	LLVMSetDataLayout(ctx.module, data_layout_str);
   4468 	LLVMDisposeTargetData(data_layout);
   4469 	LLVMDisposeMessage(data_layout_str);
   4470 
   4471 	setup_types(&ctx);
   4472 
   4473 	ctx.builder = LLVMCreateBuilderInContext(ctx.context);
   4474 	ctx.ac.builder = ctx.builder;
   4475 	ctx.stage = nir->stage;
   4476 
   4477 	for (i = 0; i < AC_UD_MAX_SETS; i++)
   4478 		shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
   4479 	for (i = 0; i < AC_UD_MAX_UD; i++)
   4480 		shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
   4481 
   4482 	create_function(&ctx);
   4483 
   4484 	if (nir->stage == MESA_SHADER_COMPUTE) {
   4485 		int num_shared = 0;
   4486 		nir_foreach_variable(variable, &nir->shared)
   4487 			num_shared++;
   4488 		if (num_shared) {
   4489 			int idx = 0;
   4490 			uint32_t shared_size = 0;
   4491 			LLVMValueRef var;
   4492 			LLVMTypeRef i8p = LLVMPointerType(ctx.i8, LOCAL_ADDR_SPACE);
   4493 			nir_foreach_variable(variable, &nir->shared) {
   4494 				handle_shared_compute_var(&ctx, variable, &shared_size, idx);
   4495 				idx++;
   4496 			}
   4497 
   4498 			shared_size *= 16;
   4499 			var = LLVMAddGlobalInAddressSpace(ctx.module,
   4500 							  LLVMArrayType(ctx.i8, shared_size),
   4501 							  "compute_lds",
   4502 							  LOCAL_ADDR_SPACE);
   4503 			LLVMSetAlignment(var, 4);
   4504 			ctx.shared_memory = LLVMBuildBitCast(ctx.builder, var, i8p, "");
   4505 		}
   4506 	}
   4507 
   4508 	nir_foreach_variable(variable, &nir->inputs)
   4509 		handle_shader_input_decl(&ctx, variable);
   4510 
   4511 	if (nir->stage == MESA_SHADER_FRAGMENT)
   4512 		handle_fs_inputs_pre(&ctx, nir);
   4513 
   4514 	nir_foreach_variable(variable, &nir->outputs)
   4515 		handle_shader_output_decl(&ctx, variable);
   4516 
   4517 	ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
   4518 	                                   _mesa_key_pointer_equal);
   4519 	ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
   4520 	                                   _mesa_key_pointer_equal);
   4521 
   4522 	func = (struct nir_function *)exec_list_get_head(&nir->functions);
   4523 
   4524 	setup_locals(&ctx, func);
   4525 
   4526 	visit_cf_list(&ctx, &func->impl->body);
   4527 	phi_post_pass(&ctx);
   4528 
   4529 	handle_shader_outputs_post(&ctx);
   4530 	LLVMBuildRetVoid(ctx.builder);
   4531 
   4532 	ac_llvm_finalize_module(&ctx);
   4533 	free(ctx.locals);
   4534 	ralloc_free(ctx.defs);
   4535 	ralloc_free(ctx.phis);
   4536 
   4537 	return ctx.module;
   4538 }
   4539 
   4540 static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
   4541 {
   4542 	unsigned *retval = (unsigned *)context;
   4543 	LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
   4544 	char *description = LLVMGetDiagInfoDescription(di);
   4545 
   4546 	if (severity == LLVMDSError) {
   4547 		*retval = 1;
   4548 		fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n",
   4549 		        description);
   4550 	}
   4551 
   4552 	LLVMDisposeMessage(description);
   4553 }
   4554 
   4555 static unsigned ac_llvm_compile(LLVMModuleRef M,
   4556                                 struct ac_shader_binary *binary,
   4557                                 LLVMTargetMachineRef tm)
   4558 {
   4559 	unsigned retval = 0;
   4560 	char *err;
   4561 	LLVMContextRef llvm_ctx;
   4562 	LLVMMemoryBufferRef out_buffer;
   4563 	unsigned buffer_size;
   4564 	const char *buffer_data;
   4565 	LLVMBool mem_err;
   4566 
   4567 	/* Setup Diagnostic Handler*/
   4568 	llvm_ctx = LLVMGetModuleContext(M);
   4569 
   4570 	LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler,
   4571 	                                &retval);
   4572 
   4573 	/* Compile IR*/
   4574 	mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile,
   4575 	                                              &err, &out_buffer);
   4576 
   4577 	/* Process Errors/Warnings */
   4578 	if (mem_err) {
   4579 		fprintf(stderr, "%s: %s", __FUNCTION__, err);
   4580 		free(err);
   4581 		retval = 1;
   4582 		goto out;
   4583 	}
   4584 
   4585 	/* Extract Shader Code*/
   4586 	buffer_size = LLVMGetBufferSize(out_buffer);
   4587 	buffer_data = LLVMGetBufferStart(out_buffer);
   4588 
   4589 	ac_elf_read(buffer_data, buffer_size, binary);
   4590 
   4591 	/* Clean up */
   4592 	LLVMDisposeMemoryBuffer(out_buffer);
   4593 
   4594 out:
   4595 	return retval;
   4596 }
   4597 
   4598 static void ac_compile_llvm_module(LLVMTargetMachineRef tm,
   4599 				   LLVMModuleRef llvm_module,
   4600 				   struct ac_shader_binary *binary,
   4601 				   struct ac_shader_config *config,
   4602 				   struct ac_shader_variant_info *shader_info,
   4603 				   gl_shader_stage stage,
   4604 				   bool dump_shader)
   4605 {
   4606 	if (dump_shader)
   4607 		LLVMDumpModule(llvm_module);
   4608 
   4609 	memset(binary, 0, sizeof(*binary));
   4610 	int v = ac_llvm_compile(llvm_module, binary, tm);
   4611 	if (v) {
   4612 		fprintf(stderr, "compile failed\n");
   4613 	}
   4614 
   4615 	if (dump_shader)
   4616 		fprintf(stderr, "disasm:\n%s\n", binary->disasm_string);
   4617 
   4618 	ac_shader_binary_read_config(binary, config, 0);
   4619 
   4620 	LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
   4621 	LLVMDisposeModule(llvm_module);
   4622 	LLVMContextDispose(ctx);
   4623 
   4624 	if (stage == MESA_SHADER_FRAGMENT) {
   4625 		shader_info->num_input_vgprs = 0;
   4626 		if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr))
   4627 			shader_info->num_input_vgprs += 2;
   4628 		if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr))
   4629 			shader_info->num_input_vgprs += 2;
   4630 		if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr))
   4631 			shader_info->num_input_vgprs += 2;
   4632 		if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr))
   4633 			shader_info->num_input_vgprs += 3;
   4634 		if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr))
   4635 			shader_info->num_input_vgprs += 2;
   4636 		if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr))
   4637 			shader_info->num_input_vgprs += 2;
   4638 		if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr))
   4639 			shader_info->num_input_vgprs += 2;
   4640 		if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr))
   4641 			shader_info->num_input_vgprs += 1;
   4642 		if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr))
   4643 			shader_info->num_input_vgprs += 1;
   4644 		if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr))
   4645 			shader_info->num_input_vgprs += 1;
   4646 		if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr))
   4647 			shader_info->num_input_vgprs += 1;
   4648 		if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr))
   4649 			shader_info->num_input_vgprs += 1;
   4650 		if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr))
   4651 			shader_info->num_input_vgprs += 1;
   4652 		if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr))
   4653 			shader_info->num_input_vgprs += 1;
   4654 		if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr))
   4655 			shader_info->num_input_vgprs += 1;
   4656 		if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr))
   4657 			shader_info->num_input_vgprs += 1;
   4658 	}
   4659 	config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs);
   4660 
   4661 	/* +3 for scratch wave offset and VCC */
   4662 	config->num_sgprs = MAX2(config->num_sgprs,
   4663 	                         shader_info->num_input_sgprs + 3);
   4664 }
   4665 
   4666 void ac_compile_nir_shader(LLVMTargetMachineRef tm,
   4667                            struct ac_shader_binary *binary,
   4668                            struct ac_shader_config *config,
   4669                            struct ac_shader_variant_info *shader_info,
   4670                            struct nir_shader *nir,
   4671                            const struct ac_nir_compiler_options *options,
   4672 			   bool dump_shader)
   4673 {
   4674 
   4675 	LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, shader_info,
   4676 	                                                     options);
   4677 
   4678 	ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir->stage, dump_shader);
   4679 	switch (nir->stage) {
   4680 	case MESA_SHADER_COMPUTE:
   4681 		for (int i = 0; i < 3; ++i)
   4682 			shader_info->cs.block_size[i] = nir->info->cs.local_size[i];
   4683 		break;
   4684 	case MESA_SHADER_FRAGMENT:
   4685 		shader_info->fs.early_fragment_test = nir->info->fs.early_fragment_tests;
   4686 		break;
   4687 	default:
   4688 		break;
   4689 	}
   4690 }
   4691