Home | History | Annotate | Download | only in radeonsi
      1 /*
      2  * Copyright 2016 Advanced Micro Devices, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 #include "si_shader_internal.h"
     25 #include "si_pipe.h"
     26 
     27 #include "gallivm/lp_bld_const.h"
     28 #include "gallivm/lp_bld_gather.h"
     29 #include "gallivm/lp_bld_flow.h"
     30 #include "gallivm/lp_bld_init.h"
     31 #include "gallivm/lp_bld_intr.h"
     32 #include "gallivm/lp_bld_misc.h"
     33 #include "gallivm/lp_bld_swizzle.h"
     34 #include "tgsi/tgsi_info.h"
     35 #include "tgsi/tgsi_parse.h"
     36 #include "util/u_math.h"
     37 #include "util/u_memory.h"
     38 #include "util/u_debug.h"
     39 
     40 #include <stdio.h>
     41 #include <llvm-c/Transforms/IPO.h>
     42 #include <llvm-c/Transforms/Scalar.h>
     43 
     44 enum si_llvm_calling_convention {
     45 	RADEON_LLVM_AMDGPU_VS = 87,
     46 	RADEON_LLVM_AMDGPU_GS = 88,
     47 	RADEON_LLVM_AMDGPU_PS = 89,
     48 	RADEON_LLVM_AMDGPU_CS = 90,
     49 	RADEON_LLVM_AMDGPU_HS = 93,
     50 };
     51 
     52 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
     53 {
     54 	char str[16];
     55 
     56 	snprintf(str, sizeof(str), "%i", value);
     57 	LLVMAddTargetDependentFunctionAttr(F, name, str);
     58 }
     59 
     60 struct si_llvm_diagnostics {
     61 	struct pipe_debug_callback *debug;
     62 	unsigned retval;
     63 };
     64 
     65 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
     66 {
     67 	struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
     68 	LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
     69 	char *description = LLVMGetDiagInfoDescription(di);
     70 	const char *severity_str = NULL;
     71 
     72 	switch (severity) {
     73 	case LLVMDSError:
     74 		severity_str = "error";
     75 		break;
     76 	case LLVMDSWarning:
     77 		severity_str = "warning";
     78 		break;
     79 	case LLVMDSRemark:
     80 		severity_str = "remark";
     81 		break;
     82 	case LLVMDSNote:
     83 		severity_str = "note";
     84 		break;
     85 	default:
     86 		severity_str = "unknown";
     87 	}
     88 
     89 	pipe_debug_message(diag->debug, SHADER_INFO,
     90 			   "LLVM diagnostic (%s): %s", severity_str, description);
     91 
     92 	if (severity == LLVMDSError) {
     93 		diag->retval = 1;
     94 		fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
     95 	}
     96 
     97 	LLVMDisposeMessage(description);
     98 }
     99 
    100 /**
    101  * Compile an LLVM module to machine code.
    102  *
    103  * @returns 0 for success, 1 for failure
    104  */
    105 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
    106 			 LLVMTargetMachineRef tm,
    107 			 struct pipe_debug_callback *debug)
    108 {
    109 	struct si_llvm_diagnostics diag;
    110 	char *err;
    111 	LLVMContextRef llvm_ctx;
    112 	LLVMMemoryBufferRef out_buffer;
    113 	unsigned buffer_size;
    114 	const char *buffer_data;
    115 	LLVMBool mem_err;
    116 
    117 	diag.debug = debug;
    118 	diag.retval = 0;
    119 
    120 	/* Setup Diagnostic Handler*/
    121 	llvm_ctx = LLVMGetModuleContext(M);
    122 
    123 	LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
    124 
    125 	/* Compile IR*/
    126 	mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
    127 								 &out_buffer);
    128 
    129 	/* Process Errors/Warnings */
    130 	if (mem_err) {
    131 		fprintf(stderr, "%s: %s", __FUNCTION__, err);
    132 		pipe_debug_message(debug, SHADER_INFO,
    133 				   "LLVM emit error: %s", err);
    134 		FREE(err);
    135 		diag.retval = 1;
    136 		goto out;
    137 	}
    138 
    139 	/* Extract Shader Code*/
    140 	buffer_size = LLVMGetBufferSize(out_buffer);
    141 	buffer_data = LLVMGetBufferStart(out_buffer);
    142 
    143 	if (!ac_elf_read(buffer_data, buffer_size, binary)) {
    144 		fprintf(stderr, "radeonsi: cannot read an ELF shader binary\n");
    145 		diag.retval = 1;
    146 	}
    147 
    148 	/* Clean up */
    149 	LLVMDisposeMemoryBuffer(out_buffer);
    150 
    151 out:
    152 	if (diag.retval != 0)
    153 		pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
    154 	return diag.retval;
    155 }
    156 
    157 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
    158 			  enum tgsi_opcode_type type)
    159 {
    160 	struct si_shader_context *ctx = si_shader_context(bld_base);
    161 
    162 	switch (type) {
    163 	case TGSI_TYPE_UNSIGNED:
    164 	case TGSI_TYPE_SIGNED:
    165 		return ctx->ac.i32;
    166 	case TGSI_TYPE_UNSIGNED64:
    167 	case TGSI_TYPE_SIGNED64:
    168 		return ctx->ac.i64;
    169 	case TGSI_TYPE_DOUBLE:
    170 		return ctx->ac.f64;
    171 	case TGSI_TYPE_UNTYPED:
    172 	case TGSI_TYPE_FLOAT:
    173 		return ctx->ac.f32;
    174 	default: break;
    175 	}
    176 	return 0;
    177 }
    178 
    179 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
    180 		     enum tgsi_opcode_type type, LLVMValueRef value)
    181 {
    182 	struct si_shader_context *ctx = si_shader_context(bld_base);
    183 	LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
    184 
    185 	if (dst_type)
    186 		return LLVMBuildBitCast(ctx->ac.builder, value, dst_type, "");
    187 	else
    188 		return value;
    189 }
    190 
    191 /**
    192  * Return a value that is equal to the given i32 \p index if it lies in [0,num)
    193  * or an undefined value in the same interval otherwise.
    194  */
    195 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
    196 				 LLVMValueRef index,
    197 				 unsigned num)
    198 {
    199 	LLVMBuilderRef builder = ctx->ac.builder;
    200 	LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
    201 	LLVMValueRef cc;
    202 
    203 	if (util_is_power_of_two(num)) {
    204 		index = LLVMBuildAnd(builder, index, c_max, "");
    205 	} else {
    206 		/* In theory, this MAX pattern should result in code that is
    207 		 * as good as the bit-wise AND above.
    208 		 *
    209 		 * In practice, LLVM generates worse code (at the time of
    210 		 * writing), because its value tracking is not strong enough.
    211 		 */
    212 		cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
    213 		index = LLVMBuildSelect(builder, cc, index, c_max, "");
    214 	}
    215 
    216 	return index;
    217 }
    218 
    219 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
    220 				 LLVMValueRef value,
    221 				 unsigned swizzle_x,
    222 				 unsigned swizzle_y,
    223 				 unsigned swizzle_z,
    224 				 unsigned swizzle_w)
    225 {
    226 	struct si_shader_context *ctx = si_shader_context(bld_base);
    227 	LLVMValueRef swizzles[4];
    228 
    229 	swizzles[0] = LLVMConstInt(ctx->i32, swizzle_x, 0);
    230 	swizzles[1] = LLVMConstInt(ctx->i32, swizzle_y, 0);
    231 	swizzles[2] = LLVMConstInt(ctx->i32, swizzle_z, 0);
    232 	swizzles[3] = LLVMConstInt(ctx->i32, swizzle_w, 0);
    233 
    234 	return LLVMBuildShuffleVector(ctx->ac.builder,
    235 				      value,
    236 				      LLVMGetUndef(LLVMTypeOf(value)),
    237 				      LLVMConstVector(swizzles, 4), "");
    238 }
    239 
    240 /**
    241  * Return the description of the array covering the given temporary register
    242  * index.
    243  */
    244 static unsigned
    245 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
    246 		  unsigned reg_index,
    247 		  const struct tgsi_ind_register *reg)
    248 {
    249 	struct si_shader_context *ctx = si_shader_context(bld_base);
    250 	unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
    251 	unsigned i;
    252 
    253 	if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
    254 		return reg->ArrayID;
    255 
    256 	for (i = 0; i < num_arrays; i++) {
    257 		const struct tgsi_array_info *array = &ctx->temp_arrays[i];
    258 
    259 		if (reg_index >= array->range.First && reg_index <= array->range.Last)
    260 			return i + 1;
    261 	}
    262 
    263 	return 0;
    264 }
    265 
    266 static struct tgsi_declaration_range
    267 get_array_range(struct lp_build_tgsi_context *bld_base,
    268 		unsigned File, unsigned reg_index,
    269 		const struct tgsi_ind_register *reg)
    270 {
    271 	struct si_shader_context *ctx = si_shader_context(bld_base);
    272 	struct tgsi_declaration_range range;
    273 
    274 	if (File == TGSI_FILE_TEMPORARY) {
    275 		unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
    276 		if (array_id)
    277 			return ctx->temp_arrays[array_id - 1].range;
    278 	}
    279 
    280 	range.First = 0;
    281 	range.Last = bld_base->info->file_max[File];
    282 	return range;
    283 }
    284 
    285 /**
    286  * For indirect registers, construct a pointer directly to the requested
    287  * element using getelementptr if possible.
    288  *
    289  * Returns NULL if the insertelement/extractelement fallback for array access
    290  * must be used.
    291  */
    292 static LLVMValueRef
    293 get_pointer_into_array(struct si_shader_context *ctx,
    294 		       unsigned file,
    295 		       unsigned swizzle,
    296 		       unsigned reg_index,
    297 		       const struct tgsi_ind_register *reg_indirect)
    298 {
    299 	unsigned array_id;
    300 	struct tgsi_array_info *array;
    301 	LLVMBuilderRef builder = ctx->ac.builder;
    302 	LLVMValueRef idxs[2];
    303 	LLVMValueRef index;
    304 	LLVMValueRef alloca;
    305 
    306 	if (file != TGSI_FILE_TEMPORARY)
    307 		return NULL;
    308 
    309 	array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
    310 	if (!array_id)
    311 		return NULL;
    312 
    313 	alloca = ctx->temp_array_allocas[array_id - 1];
    314 	if (!alloca)
    315 		return NULL;
    316 
    317 	array = &ctx->temp_arrays[array_id - 1];
    318 
    319 	if (!(array->writemask & (1 << swizzle)))
    320 		return ctx->undef_alloca;
    321 
    322 	index = si_get_indirect_index(ctx, reg_indirect, 1,
    323 				      reg_index - ctx->temp_arrays[array_id - 1].range.First);
    324 
    325 	/* Ensure that the index is within a valid range, to guard against
    326 	 * VM faults and overwriting critical data (e.g. spilled resource
    327 	 * descriptors).
    328 	 *
    329 	 * TODO It should be possible to avoid the additional instructions
    330 	 * if LLVM is changed so that it guarantuees:
    331 	 * 1. the scratch space descriptor isolates the current wave (this
    332 	 *    could even save the scratch offset SGPR at the cost of an
    333 	 *    additional SALU instruction)
    334 	 * 2. the memory for allocas must be allocated at the _end_ of the
    335 	 *    scratch space (after spilled registers)
    336 	 */
    337 	index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
    338 
    339 	index = LLVMBuildMul(
    340 		builder, index,
    341 		LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
    342 		"");
    343 	index = LLVMBuildAdd(
    344 		builder, index,
    345 		LLVMConstInt(ctx->i32,
    346 			     util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0),
    347 		"");
    348 	idxs[0] = ctx->i32_0;
    349 	idxs[1] = index;
    350 	return LLVMBuildGEP(ctx->ac.builder, alloca, idxs, 2, "");
    351 }
    352 
    353 LLVMValueRef
    354 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
    355 			 LLVMTypeRef type,
    356 			 LLVMValueRef ptr,
    357 			 LLVMValueRef ptr2)
    358 {
    359 	struct si_shader_context *ctx = si_shader_context(bld_base);
    360 	LLVMValueRef result;
    361 
    362 	result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
    363 
    364 	result = LLVMBuildInsertElement(ctx->ac.builder,
    365 					result,
    366 					ac_to_integer(&ctx->ac, ptr),
    367 					ctx->i32_0, "");
    368 	result = LLVMBuildInsertElement(ctx->ac.builder,
    369 					result,
    370 					ac_to_integer(&ctx->ac, ptr2),
    371 					ctx->i32_1, "");
    372 	return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
    373 }
    374 
    375 static LLVMValueRef
    376 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
    377 		 unsigned File, enum tgsi_opcode_type type,
    378 		 struct tgsi_declaration_range range,
    379 		 unsigned swizzle)
    380 {
    381 	struct si_shader_context *ctx = si_shader_context(bld_base);
    382 	unsigned i, size = range.Last - range.First + 1;
    383 	LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
    384 	LLVMValueRef result = LLVMGetUndef(vec);
    385 
    386 	struct tgsi_full_src_register tmp_reg = {};
    387 	tmp_reg.Register.File = File;
    388 
    389 	for (i = 0; i < size; ++i) {
    390 		tmp_reg.Register.Index = i + range.First;
    391 		LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
    392 		result = LLVMBuildInsertElement(ctx->ac.builder, result, temp,
    393 			LLVMConstInt(ctx->i32, i, 0), "array_vector");
    394 	}
    395 	return result;
    396 }
    397 
    398 static LLVMValueRef
    399 load_value_from_array(struct lp_build_tgsi_context *bld_base,
    400 		      unsigned file,
    401 		      enum tgsi_opcode_type type,
    402 		      unsigned swizzle,
    403 		      unsigned reg_index,
    404 		      const struct tgsi_ind_register *reg_indirect)
    405 {
    406 	struct si_shader_context *ctx = si_shader_context(bld_base);
    407 	LLVMBuilderRef builder = ctx->ac.builder;
    408 	LLVMValueRef ptr;
    409 
    410 	ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
    411 	if (ptr) {
    412 		LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
    413 		if (tgsi_type_is_64bit(type)) {
    414 			LLVMValueRef ptr_hi, val_hi;
    415 			ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
    416 			val_hi = LLVMBuildLoad(builder, ptr_hi, "");
    417 			val = si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
    418 						       val, val_hi);
    419 		}
    420 
    421 		return val;
    422 	} else {
    423 		struct tgsi_declaration_range range =
    424 			get_array_range(bld_base, file, reg_index, reg_indirect);
    425 		LLVMValueRef index =
    426 			si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
    427 		LLVMValueRef array =
    428 			emit_array_fetch(bld_base, file, type, range, swizzle);
    429 		return LLVMBuildExtractElement(builder, array, index, "");
    430 	}
    431 }
    432 
    433 static void
    434 store_value_to_array(struct lp_build_tgsi_context *bld_base,
    435 		     LLVMValueRef value,
    436 		     unsigned file,
    437 		     unsigned chan_index,
    438 		     unsigned reg_index,
    439 		     const struct tgsi_ind_register *reg_indirect)
    440 {
    441 	struct si_shader_context *ctx = si_shader_context(bld_base);
    442 	LLVMBuilderRef builder = ctx->ac.builder;
    443 	LLVMValueRef ptr;
    444 
    445 	ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
    446 	if (ptr) {
    447 		LLVMBuildStore(builder, value, ptr);
    448 	} else {
    449 		unsigned i, size;
    450 		struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
    451 		LLVMValueRef index = si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
    452 		LLVMValueRef array =
    453 			emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
    454 		LLVMValueRef temp_ptr;
    455 
    456 		array = LLVMBuildInsertElement(builder, array, value, index, "");
    457 
    458 		size = range.Last - range.First + 1;
    459 		for (i = 0; i < size; ++i) {
    460 			switch(file) {
    461 			case TGSI_FILE_OUTPUT:
    462 				temp_ptr = ctx->outputs[i + range.First][chan_index];
    463 				break;
    464 
    465 			case TGSI_FILE_TEMPORARY:
    466 				if (range.First + i >= ctx->temps_count)
    467 					continue;
    468 				temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
    469 				break;
    470 
    471 			default:
    472 				continue;
    473 			}
    474 			value = LLVMBuildExtractElement(builder, array,
    475 				LLVMConstInt(ctx->i32, i, 0), "");
    476 			LLVMBuildStore(builder, value, temp_ptr);
    477 		}
    478 	}
    479 }
    480 
    481 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
    482  * reload them at each use. This must be true if the shader is using
    483  * derivatives and KILL, because KILL can leave the WQM and then a lazy
    484  * input load isn't in the WQM anymore.
    485  */
    486 static bool si_preload_fs_inputs(struct si_shader_context *ctx)
    487 {
    488 	struct si_shader_selector *sel = ctx->shader->selector;
    489 
    490 	return sel->info.uses_derivatives &&
    491 	       sel->info.uses_kill;
    492 }
    493 
    494 static LLVMValueRef
    495 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
    496 	       unsigned chan)
    497 {
    498 	struct si_shader_context *ctx = si_shader_context(bld_base);
    499 
    500 	assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
    501 	return ctx->outputs[index][chan];
    502 }
    503 
    504 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
    505 				const struct tgsi_full_src_register *reg,
    506 				enum tgsi_opcode_type type,
    507 				unsigned swizzle)
    508 {
    509 	struct si_shader_context *ctx = si_shader_context(bld_base);
    510 	LLVMBuilderRef builder = ctx->ac.builder;
    511 	LLVMValueRef result = NULL, ptr, ptr2;
    512 
    513 	if (swizzle == ~0) {
    514 		LLVMValueRef values[TGSI_NUM_CHANNELS];
    515 		unsigned chan;
    516 		for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
    517 			values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
    518 		}
    519 		return lp_build_gather_values(&ctx->gallivm, values,
    520 					      TGSI_NUM_CHANNELS);
    521 	}
    522 
    523 	if (reg->Register.Indirect) {
    524 		LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
    525 				swizzle, reg->Register.Index, &reg->Indirect);
    526 		return bitcast(bld_base, type, load);
    527 	}
    528 
    529 	switch(reg->Register.File) {
    530 	case TGSI_FILE_IMMEDIATE: {
    531 		LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
    532 		if (tgsi_type_is_64bit(type)) {
    533 			result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
    534 			result = LLVMConstInsertElement(result,
    535 							ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
    536 							ctx->i32_0);
    537 			result = LLVMConstInsertElement(result,
    538 							ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
    539 							ctx->i32_1);
    540 			return LLVMConstBitCast(result, ctype);
    541 		} else {
    542 			return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
    543 		}
    544 	}
    545 
    546 	case TGSI_FILE_INPUT: {
    547 		unsigned index = reg->Register.Index;
    548 		LLVMValueRef input[4];
    549 
    550 		/* I don't think doing this for vertex shaders is beneficial.
    551 		 * For those, we want to make sure the VMEM loads are executed
    552 		 * only once. Fragment shaders don't care much, because
    553 		 * v_interp instructions are much cheaper than VMEM loads.
    554 		 */
    555 		if (!si_preload_fs_inputs(ctx) &&
    556 		    ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
    557 			ctx->load_input(ctx, index, &ctx->input_decls[index], input);
    558 		else
    559 			memcpy(input, &ctx->inputs[index * 4], sizeof(input));
    560 
    561 		result = input[swizzle];
    562 
    563 		if (tgsi_type_is_64bit(type)) {
    564 			ptr = result;
    565 			ptr2 = input[swizzle + 1];
    566 			return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
    567 							ptr, ptr2);
    568 		}
    569 		break;
    570 	}
    571 
    572 	case TGSI_FILE_TEMPORARY:
    573 		if (reg->Register.Index >= ctx->temps_count)
    574 			return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
    575 		ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
    576 		if (tgsi_type_is_64bit(type)) {
    577 			ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
    578 			return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
    579 							LLVMBuildLoad(builder, ptr, ""),
    580 							LLVMBuildLoad(builder, ptr2, ""));
    581 		}
    582 		result = LLVMBuildLoad(builder, ptr, "");
    583 		break;
    584 
    585 	case TGSI_FILE_OUTPUT:
    586 		ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
    587 		if (tgsi_type_is_64bit(type)) {
    588 			ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1);
    589 			return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
    590 							LLVMBuildLoad(builder, ptr, ""),
    591 							LLVMBuildLoad(builder, ptr2, ""));
    592 		}
    593 		result = LLVMBuildLoad(builder, ptr, "");
    594 		break;
    595 
    596 	default:
    597 		return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
    598 	}
    599 
    600 	return bitcast(bld_base, type, result);
    601 }
    602 
    603 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
    604 				       const struct tgsi_full_src_register *reg,
    605 				       enum tgsi_opcode_type type,
    606 				       unsigned swizzle)
    607 {
    608 	struct si_shader_context *ctx = si_shader_context(bld_base);
    609 	LLVMBuilderRef builder = ctx->ac.builder;
    610 	LLVMValueRef cval = ctx->system_values[reg->Register.Index];
    611 
    612 	if (tgsi_type_is_64bit(type)) {
    613 		LLVMValueRef lo, hi;
    614 
    615 		assert(swizzle == 0 || swizzle == 2);
    616 
    617 		lo = LLVMBuildExtractElement(
    618 			builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
    619 		hi = LLVMBuildExtractElement(
    620 			builder, cval, LLVMConstInt(ctx->i32, swizzle + 1, 0), "");
    621 
    622 		return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
    623 						lo, hi);
    624 	}
    625 
    626 	if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
    627 		cval = LLVMBuildExtractElement(
    628 			builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
    629 	} else {
    630 		assert(swizzle == 0);
    631 	}
    632 
    633 	return bitcast(bld_base, type, cval);
    634 }
    635 
    636 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
    637 			     const struct tgsi_full_declaration *decl)
    638 {
    639 	struct si_shader_context *ctx = si_shader_context(bld_base);
    640 	LLVMBuilderRef builder = ctx->ac.builder;
    641 	unsigned first, last, i;
    642 	switch(decl->Declaration.File) {
    643 	case TGSI_FILE_ADDRESS:
    644 	{
    645 		 unsigned idx;
    646 		for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
    647 			unsigned chan;
    648 			for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
    649 				 ctx->addrs[idx][chan] = lp_build_alloca_undef(
    650 					&ctx->gallivm,
    651 					ctx->i32, "");
    652 			}
    653 		}
    654 		break;
    655 	}
    656 
    657 	case TGSI_FILE_TEMPORARY:
    658 	{
    659 		char name[16] = "";
    660 		LLVMValueRef array_alloca = NULL;
    661 		unsigned decl_size;
    662 		unsigned writemask = decl->Declaration.UsageMask;
    663 		first = decl->Range.First;
    664 		last = decl->Range.Last;
    665 		decl_size = 4 * ((last - first) + 1);
    666 
    667 		if (decl->Declaration.Array) {
    668 			unsigned id = decl->Array.ArrayID - 1;
    669 			unsigned array_size;
    670 
    671 			writemask &= ctx->temp_arrays[id].writemask;
    672 			ctx->temp_arrays[id].writemask = writemask;
    673 			array_size = ((last - first) + 1) * util_bitcount(writemask);
    674 
    675 			/* If the array has more than 16 elements, store it
    676 			 * in memory using an alloca that spans the entire
    677 			 * array.
    678 			 *
    679 			 * Otherwise, store each array element individually.
    680 			 * We will then generate vectors (per-channel, up to
    681 			 * <16 x float> if the usagemask is a single bit) for
    682 			 * indirect addressing.
    683 			 *
    684 			 * Note that 16 is the number of vector elements that
    685 			 * LLVM will store in a register, so theoretically an
    686 			 * array with up to 4 * 16 = 64 elements could be
    687 			 * handled this way, but whether that's a good idea
    688 			 * depends on VGPR register pressure elsewhere.
    689 			 *
    690 			 * FIXME: We shouldn't need to have the non-alloca
    691 			 * code path for arrays. LLVM should be smart enough to
    692 			 * promote allocas into registers when profitable.
    693 			 */
    694 			if (array_size > 16 ||
    695 			    !ctx->screen->llvm_has_working_vgpr_indexing) {
    696 				array_alloca = lp_build_alloca_undef(&ctx->gallivm,
    697 					LLVMArrayType(ctx->f32,
    698 						      array_size), "array");
    699 				ctx->temp_array_allocas[id] = array_alloca;
    700 			}
    701 		}
    702 
    703 		if (!ctx->temps_count) {
    704 			ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
    705 			ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
    706 		}
    707 		if (!array_alloca) {
    708 			for (i = 0; i < decl_size; ++i) {
    709 #ifdef DEBUG
    710 				snprintf(name, sizeof(name), "TEMP%d.%c",
    711 					 first + i / 4, "xyzw"[i % 4]);
    712 #endif
    713 				ctx->temps[first * TGSI_NUM_CHANNELS + i] =
    714 					lp_build_alloca_undef(&ctx->gallivm,
    715 							      ctx->f32,
    716 							      name);
    717 			}
    718 		} else {
    719 			LLVMValueRef idxs[2] = {
    720 				ctx->i32_0,
    721 				NULL
    722 			};
    723 			unsigned j = 0;
    724 
    725 			if (writemask != TGSI_WRITEMASK_XYZW &&
    726 			    !ctx->undef_alloca) {
    727 				/* Create a dummy alloca. We use it so that we
    728 				 * have a pointer that is safe to load from if
    729 				 * a shader ever reads from a channel that
    730 				 * it never writes to.
    731 				 */
    732 				ctx->undef_alloca = lp_build_alloca_undef(
    733 					&ctx->gallivm,
    734 					ctx->f32, "undef");
    735 			}
    736 
    737 			for (i = 0; i < decl_size; ++i) {
    738 				LLVMValueRef ptr;
    739 				if (writemask & (1 << (i % 4))) {
    740 #ifdef DEBUG
    741 					snprintf(name, sizeof(name), "TEMP%d.%c",
    742 						 first + i / 4, "xyzw"[i % 4]);
    743 #endif
    744 					idxs[1] = LLVMConstInt(ctx->i32, j, 0);
    745 					ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
    746 					j++;
    747 				} else {
    748 					ptr = ctx->undef_alloca;
    749 				}
    750 				ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
    751 			}
    752 		}
    753 		break;
    754 	}
    755 	case TGSI_FILE_INPUT:
    756 	{
    757 		unsigned idx;
    758 		for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
    759 			if (ctx->load_input &&
    760 			    ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
    761 				ctx->input_decls[idx] = *decl;
    762 				ctx->input_decls[idx].Range.First = idx;
    763 				ctx->input_decls[idx].Range.Last = idx;
    764 				ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
    765 
    766 				if (si_preload_fs_inputs(ctx) ||
    767 				    bld_base->info->processor != PIPE_SHADER_FRAGMENT)
    768 					ctx->load_input(ctx, idx, &ctx->input_decls[idx],
    769 							&ctx->inputs[idx * 4]);
    770 			}
    771 		}
    772 	}
    773 	break;
    774 
    775 	case TGSI_FILE_SYSTEM_VALUE:
    776 	{
    777 		unsigned idx;
    778 		for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
    779 			si_load_system_value(ctx, idx, decl);
    780 		}
    781 	}
    782 	break;
    783 
    784 	case TGSI_FILE_OUTPUT:
    785 	{
    786 		char name[16] = "";
    787 		unsigned idx;
    788 		for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
    789 			unsigned chan;
    790 			assert(idx < RADEON_LLVM_MAX_OUTPUTS);
    791 			if (ctx->outputs[idx][0])
    792 				continue;
    793 			for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
    794 #ifdef DEBUG
    795 				snprintf(name, sizeof(name), "OUT%d.%c",
    796 					 idx, "xyzw"[chan % 4]);
    797 #endif
    798 				ctx->outputs[idx][chan] = lp_build_alloca_undef(
    799 					&ctx->gallivm,
    800 					ctx->f32, name);
    801 			}
    802 		}
    803 		break;
    804 	}
    805 
    806 	case TGSI_FILE_MEMORY:
    807 		si_declare_compute_memory(ctx, decl);
    808 		break;
    809 
    810 	default:
    811 		break;
    812 	}
    813 }
    814 
    815 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
    816 			const struct tgsi_full_instruction *inst,
    817 			const struct tgsi_opcode_info *info,
    818 			unsigned index,
    819 			LLVMValueRef dst[4])
    820 {
    821 	struct si_shader_context *ctx = si_shader_context(bld_base);
    822 	const struct tgsi_full_dst_register *reg = &inst->Dst[index];
    823 	LLVMBuilderRef builder = ctx->ac.builder;
    824 	LLVMValueRef temp_ptr, temp_ptr2 = NULL;
    825 	bool is_vec_store = false;
    826 	enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
    827 
    828 	if (dst[0]) {
    829 		LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
    830 		is_vec_store = (k == LLVMVectorTypeKind);
    831 	}
    832 
    833 	if (is_vec_store) {
    834 		LLVMValueRef values[4] = {};
    835 		uint32_t writemask = reg->Register.WriteMask;
    836 		while (writemask) {
    837 			unsigned chan = u_bit_scan(&writemask);
    838 			LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
    839 			values[chan]  = LLVMBuildExtractElement(ctx->ac.builder,
    840 							dst[0], index, "");
    841 		}
    842 		bld_base->emit_store(bld_base, inst, info, index, values);
    843 		return;
    844 	}
    845 
    846 	uint32_t writemask = reg->Register.WriteMask;
    847 	while (writemask) {
    848 		unsigned chan_index = u_bit_scan(&writemask);
    849 		LLVMValueRef value = dst[chan_index];
    850 
    851 		if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
    852 			continue;
    853 		if (inst->Instruction.Saturate)
    854 			value = ac_build_clamp(&ctx->ac, value);
    855 
    856 		if (reg->Register.File == TGSI_FILE_ADDRESS) {
    857 			temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
    858 			LLVMBuildStore(builder, value, temp_ptr);
    859 			continue;
    860 		}
    861 
    862 		if (!tgsi_type_is_64bit(dtype))
    863 			value = ac_to_float(&ctx->ac, value);
    864 
    865 		if (reg->Register.Indirect) {
    866 			unsigned file = reg->Register.File;
    867 			unsigned reg_index = reg->Register.Index;
    868 			store_value_to_array(bld_base, value, file, chan_index,
    869 					     reg_index, &reg->Indirect);
    870 		} else {
    871 			switch(reg->Register.File) {
    872 			case TGSI_FILE_OUTPUT:
    873 				temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
    874 				if (tgsi_type_is_64bit(dtype))
    875 					temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
    876 				break;
    877 
    878 			case TGSI_FILE_TEMPORARY:
    879 			{
    880 				if (reg->Register.Index >= ctx->temps_count)
    881 					continue;
    882 
    883 				temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
    884 				if (tgsi_type_is_64bit(dtype))
    885 					temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
    886 
    887 				break;
    888 			}
    889 			default:
    890 				return;
    891 			}
    892 			if (!tgsi_type_is_64bit(dtype))
    893 				LLVMBuildStore(builder, value, temp_ptr);
    894 			else {
    895 				LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
    896 								    LLVMVectorType(ctx->i32, 2), "");
    897 				LLVMValueRef val2;
    898 				value = LLVMBuildExtractElement(builder, ptr,
    899 								ctx->i32_0, "");
    900 				val2 = LLVMBuildExtractElement(builder, ptr,
    901 							       ctx->i32_1, "");
    902 
    903 				LLVMBuildStore(builder, ac_to_float(&ctx->ac, value), temp_ptr);
    904 				LLVMBuildStore(builder, ac_to_float(&ctx->ac, val2), temp_ptr2);
    905 			}
    906 		}
    907 	}
    908 }
    909 
    910 static int get_line(int pc)
    911 {
    912 	/* Subtract 1 so that the number shown is that of the corresponding
    913 	 * opcode in the TGSI dump, e.g. an if block has the same suffix as
    914 	 * the instruction number of the corresponding TGSI IF.
    915 	 */
    916 	return pc - 1;
    917 }
    918 
    919 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
    920 			 struct lp_build_tgsi_context *bld_base,
    921 			 struct lp_build_emit_data *emit_data)
    922 {
    923 	struct si_shader_context *ctx = si_shader_context(bld_base);
    924 	ac_build_bgnloop(&ctx->ac, get_line(bld_base->pc));
    925 }
    926 
    927 static void brk_emit(const struct lp_build_tgsi_action *action,
    928 		     struct lp_build_tgsi_context *bld_base,
    929 		     struct lp_build_emit_data *emit_data)
    930 {
    931 	struct si_shader_context *ctx = si_shader_context(bld_base);
    932 	ac_build_break(&ctx->ac);
    933 }
    934 
    935 static void cont_emit(const struct lp_build_tgsi_action *action,
    936 		      struct lp_build_tgsi_context *bld_base,
    937 		      struct lp_build_emit_data *emit_data)
    938 {
    939 	struct si_shader_context *ctx = si_shader_context(bld_base);
    940 	ac_build_continue(&ctx->ac);
    941 }
    942 
    943 static void else_emit(const struct lp_build_tgsi_action *action,
    944 		      struct lp_build_tgsi_context *bld_base,
    945 		      struct lp_build_emit_data *emit_data)
    946 {
    947 	struct si_shader_context *ctx = si_shader_context(bld_base);
    948 	ac_build_else(&ctx->ac, get_line(bld_base->pc));
    949 }
    950 
    951 static void endif_emit(const struct lp_build_tgsi_action *action,
    952 		       struct lp_build_tgsi_context *bld_base,
    953 		       struct lp_build_emit_data *emit_data)
    954 {
    955 	struct si_shader_context *ctx = si_shader_context(bld_base);
    956 	ac_build_endif(&ctx->ac, get_line(bld_base->pc));
    957 }
    958 
    959 static void endloop_emit(const struct lp_build_tgsi_action *action,
    960 			 struct lp_build_tgsi_context *bld_base,
    961 			 struct lp_build_emit_data *emit_data)
    962 {
    963 	struct si_shader_context *ctx = si_shader_context(bld_base);
    964 	ac_build_endloop(&ctx->ac, get_line(bld_base->pc));
    965 }
    966 
    967 static void if_emit(const struct lp_build_tgsi_action *action,
    968 		    struct lp_build_tgsi_context *bld_base,
    969 		    struct lp_build_emit_data *emit_data)
    970 {
    971 	struct si_shader_context *ctx = si_shader_context(bld_base);
    972 	ac_build_if(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
    973 }
    974 
    975 static void uif_emit(const struct lp_build_tgsi_action *action,
    976 		     struct lp_build_tgsi_context *bld_base,
    977 		     struct lp_build_emit_data *emit_data)
    978 {
    979 	struct si_shader_context *ctx = si_shader_context(bld_base);
    980 	ac_build_uif(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
    981 }
    982 
    983 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
    984 			   const struct tgsi_full_immediate *imm)
    985 {
    986 	unsigned i;
    987 	struct si_shader_context *ctx = si_shader_context(bld_base);
    988 
    989 	for (i = 0; i < 4; ++i) {
    990 		ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
    991 				LLVMConstInt(ctx->i32, imm->u[i].Uint, false   );
    992 	}
    993 
    994 	ctx->imms_num++;
    995 }
    996 
    997 void si_llvm_context_init(struct si_shader_context *ctx,
    998 			  struct si_screen *sscreen,
    999 			  LLVMTargetMachineRef tm)
   1000 {
   1001 	struct lp_type type;
   1002 
   1003 	/* Initialize the gallivm object:
   1004 	 * We are only using the module, context, and builder fields of this struct.
   1005 	 * This should be enough for us to be able to pass our gallivm struct to the
   1006 	 * helper functions in the gallivm module.
   1007 	 */
   1008 	memset(ctx, 0, sizeof(*ctx));
   1009 	ctx->screen = sscreen;
   1010 	ctx->tm = tm;
   1011 
   1012 	ctx->gallivm.context = LLVMContextCreate();
   1013 	ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
   1014 						ctx->gallivm.context);
   1015 	LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
   1016 
   1017 	LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
   1018 	char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
   1019 	LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
   1020 	LLVMDisposeTargetData(data_layout);
   1021 	LLVMDisposeMessage(data_layout_str);
   1022 
   1023 	bool unsafe_fpmath = (sscreen->debug_flags & DBG(UNSAFE_MATH)) != 0;
   1024 	enum ac_float_mode float_mode =
   1025 		unsafe_fpmath ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
   1026 				AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
   1027 
   1028 	ctx->gallivm.builder = ac_create_builder(ctx->gallivm.context,
   1029 						 float_mode);
   1030 
   1031 	ac_llvm_context_init(&ctx->ac, ctx->gallivm.context,
   1032 			     sscreen->info.chip_class, sscreen->info.family);
   1033 	ctx->ac.module = ctx->gallivm.module;
   1034 	ctx->ac.builder = ctx->gallivm.builder;
   1035 
   1036 	struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
   1037 
   1038 	type.floating = true;
   1039 	type.fixed = false;
   1040 	type.sign = true;
   1041 	type.norm = false;
   1042 	type.width = 32;
   1043 	type.length = 1;
   1044 
   1045 	lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
   1046 	lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
   1047 	lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
   1048 	type.width *= 2;
   1049 	lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
   1050 	lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
   1051 	lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
   1052 
   1053 	bld_base->soa = 1;
   1054 	bld_base->emit_swizzle = emit_swizzle;
   1055 	bld_base->emit_declaration = emit_declaration;
   1056 	bld_base->emit_immediate = emit_immediate;
   1057 
   1058 	/* metadata allowing 2.5 ULP */
   1059 	ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->ac.context,
   1060 						       "fpmath", 6);
   1061 	LLVMValueRef arg = LLVMConstReal(ctx->ac.f32, 2.5);
   1062 	ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->ac.context,
   1063 						     &arg, 1);
   1064 
   1065 	bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
   1066 	bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
   1067 	bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
   1068 	bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
   1069 	bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
   1070 	bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
   1071 	bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
   1072 	bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
   1073 
   1074 	si_shader_context_init_alu(&ctx->bld_base);
   1075 	si_shader_context_init_mem(ctx);
   1076 
   1077 	ctx->voidt = LLVMVoidTypeInContext(ctx->ac.context);
   1078 	ctx->i1 = LLVMInt1TypeInContext(ctx->ac.context);
   1079 	ctx->i8 = LLVMInt8TypeInContext(ctx->ac.context);
   1080 	ctx->i32 = LLVMInt32TypeInContext(ctx->ac.context);
   1081 	ctx->i64 = LLVMInt64TypeInContext(ctx->ac.context);
   1082 	ctx->i128 = LLVMIntTypeInContext(ctx->ac.context, 128);
   1083 	ctx->f32 = LLVMFloatTypeInContext(ctx->ac.context);
   1084 	ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
   1085 	ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
   1086 	ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
   1087 	ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
   1088 
   1089 	ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
   1090 	ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
   1091 }
   1092 
   1093 /* Set the context to a certain TGSI shader. Can be called repeatedly
   1094  * to change the shader. */
   1095 void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
   1096 			      struct si_shader *shader)
   1097 {
   1098 	const struct tgsi_shader_info *info = NULL;
   1099 	const struct tgsi_token *tokens = NULL;
   1100 
   1101 	if (shader && shader->selector) {
   1102 		info = &shader->selector->info;
   1103 		tokens = shader->selector->tokens;
   1104 	}
   1105 
   1106 	ctx->shader = shader;
   1107 	ctx->type = info ? info->processor : -1;
   1108 	ctx->bld_base.info = info;
   1109 
   1110 	/* Clean up the old contents. */
   1111 	FREE(ctx->temp_arrays);
   1112 	ctx->temp_arrays = NULL;
   1113 	FREE(ctx->temp_array_allocas);
   1114 	ctx->temp_array_allocas = NULL;
   1115 
   1116 	FREE(ctx->imms);
   1117 	ctx->imms = NULL;
   1118 	ctx->imms_num = 0;
   1119 
   1120 	FREE(ctx->temps);
   1121 	ctx->temps = NULL;
   1122 	ctx->temps_count = 0;
   1123 
   1124 	if (!info || !tokens)
   1125 		return;
   1126 
   1127 	if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
   1128 		int size = info->array_max[TGSI_FILE_TEMPORARY];
   1129 
   1130 		ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
   1131 		ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
   1132 
   1133 		tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
   1134 				 ctx->temp_arrays);
   1135 	}
   1136 	if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
   1137 		int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
   1138 		ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
   1139 	}
   1140 
   1141 	/* Re-set these to start with a clean slate. */
   1142 	ctx->bld_base.num_instructions = 0;
   1143 	ctx->bld_base.pc = 0;
   1144 	memset(ctx->outputs, 0, sizeof(ctx->outputs));
   1145 
   1146 	ctx->bld_base.emit_store = si_llvm_emit_store;
   1147 	ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
   1148 	ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
   1149 	ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
   1150 	ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
   1151 	ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
   1152 
   1153 	ctx->num_const_buffers = util_last_bit(info->const_buffers_declared);
   1154 	ctx->num_shader_buffers = util_last_bit(info->shader_buffers_declared);
   1155 	ctx->num_samplers = util_last_bit(info->samplers_declared);
   1156 	ctx->num_images = util_last_bit(info->images_declared);
   1157 }
   1158 
   1159 void si_llvm_create_func(struct si_shader_context *ctx,
   1160 			 const char *name,
   1161 			 LLVMTypeRef *return_types, unsigned num_return_elems,
   1162 			 LLVMTypeRef *ParamTypes, unsigned ParamCount)
   1163 {
   1164 	LLVMTypeRef main_fn_type, ret_type;
   1165 	LLVMBasicBlockRef main_fn_body;
   1166 	enum si_llvm_calling_convention call_conv;
   1167 	unsigned real_shader_type;
   1168 
   1169 	if (num_return_elems)
   1170 		ret_type = LLVMStructTypeInContext(ctx->ac.context,
   1171 						   return_types,
   1172 						   num_return_elems, true);
   1173 	else
   1174 		ret_type = ctx->voidt;
   1175 
   1176 	/* Setup the function */
   1177 	ctx->return_type = ret_type;
   1178 	main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
   1179 	ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
   1180 	main_fn_body = LLVMAppendBasicBlockInContext(ctx->ac.context,
   1181 			ctx->main_fn, "main_body");
   1182 	LLVMPositionBuilderAtEnd(ctx->ac.builder, main_fn_body);
   1183 
   1184 	real_shader_type = ctx->type;
   1185 
   1186 	/* LS is merged into HS (TCS), and ES is merged into GS. */
   1187 	if (ctx->screen->info.chip_class >= GFX9) {
   1188 		if (ctx->shader->key.as_ls)
   1189 			real_shader_type = PIPE_SHADER_TESS_CTRL;
   1190 		else if (ctx->shader->key.as_es)
   1191 			real_shader_type = PIPE_SHADER_GEOMETRY;
   1192 	}
   1193 
   1194 	switch (real_shader_type) {
   1195 	case PIPE_SHADER_VERTEX:
   1196 	case PIPE_SHADER_TESS_EVAL:
   1197 		call_conv = RADEON_LLVM_AMDGPU_VS;
   1198 		break;
   1199 	case PIPE_SHADER_TESS_CTRL:
   1200 		call_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS :
   1201 						  RADEON_LLVM_AMDGPU_VS;
   1202 		break;
   1203 	case PIPE_SHADER_GEOMETRY:
   1204 		call_conv = RADEON_LLVM_AMDGPU_GS;
   1205 		break;
   1206 	case PIPE_SHADER_FRAGMENT:
   1207 		call_conv = RADEON_LLVM_AMDGPU_PS;
   1208 		break;
   1209 	case PIPE_SHADER_COMPUTE:
   1210 		call_conv = RADEON_LLVM_AMDGPU_CS;
   1211 		break;
   1212 	default:
   1213 		unreachable("Unhandle shader type");
   1214 	}
   1215 
   1216 	LLVMSetFunctionCallConv(ctx->main_fn, call_conv);
   1217 }
   1218 
   1219 void si_llvm_optimize_module(struct si_shader_context *ctx)
   1220 {
   1221 	struct gallivm_state *gallivm = &ctx->gallivm;
   1222 	const char *triple = LLVMGetTarget(gallivm->module);
   1223 	LLVMTargetLibraryInfoRef target_library_info;
   1224 
   1225 	/* Dump LLVM IR before any optimization passes */
   1226 	if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
   1227 	    si_can_dump_shader(ctx->screen, ctx->type))
   1228 		LLVMDumpModule(ctx->gallivm.module);
   1229 
   1230 	/* Create the pass manager */
   1231 	gallivm->passmgr = LLVMCreatePassManager();
   1232 
   1233 	target_library_info = gallivm_create_target_library_info(triple);
   1234 	LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
   1235 
   1236 	if (si_extra_shader_checks(ctx->screen, ctx->type))
   1237 		LLVMAddVerifierPass(gallivm->passmgr);
   1238 
   1239 	LLVMAddAlwaysInlinerPass(gallivm->passmgr);
   1240 
   1241 	/* This pass should eliminate all the load and store instructions */
   1242 	LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
   1243 
   1244 	/* Add some optimization passes */
   1245 	LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
   1246 	LLVMAddLICMPass(gallivm->passmgr);
   1247 	LLVMAddAggressiveDCEPass(gallivm->passmgr);
   1248 	LLVMAddCFGSimplificationPass(gallivm->passmgr);
   1249 #if HAVE_LLVM >= 0x0400
   1250 	/* This is recommended by the instruction combining pass. */
   1251 	LLVMAddEarlyCSEMemSSAPass(gallivm->passmgr);
   1252 #endif
   1253 	LLVMAddInstructionCombiningPass(gallivm->passmgr);
   1254 
   1255 	/* Run the pass */
   1256 	LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module);
   1257 
   1258 	LLVMDisposeBuilder(ctx->ac.builder);
   1259 	LLVMDisposePassManager(gallivm->passmgr);
   1260 	gallivm_dispose_target_library_info(target_library_info);
   1261 }
   1262 
   1263 void si_llvm_dispose(struct si_shader_context *ctx)
   1264 {
   1265 	LLVMDisposeModule(ctx->gallivm.module);
   1266 	LLVMContextDispose(ctx->gallivm.context);
   1267 	FREE(ctx->temp_arrays);
   1268 	ctx->temp_arrays = NULL;
   1269 	FREE(ctx->temp_array_allocas);
   1270 	ctx->temp_array_allocas = NULL;
   1271 	FREE(ctx->temps);
   1272 	ctx->temps = NULL;
   1273 	ctx->temps_count = 0;
   1274 	FREE(ctx->imms);
   1275 	ctx->imms = NULL;
   1276 	ctx->imms_num = 0;
   1277 	ac_llvm_context_dispose(&ctx->ac);
   1278 }
   1279