Home | History | Annotate | Download | only in radeonsi
      1 /*
      2  * Copyright 2017 Advanced Micro Devices, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 #include "si_shader_internal.h"
     25 #include "si_pipe.h"
     26 #include "sid.h"
     27 #include "gallivm/lp_bld_arit.h"
     28 #include "gallivm/lp_bld_gather.h"
     29 #include "gallivm/lp_bld_intr.h"
     30 #include "tgsi/tgsi_build.h"
     31 #include "tgsi/tgsi_parse.h"
     32 #include "tgsi/tgsi_util.h"
     33 
     34 static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
     35 				struct lp_build_tgsi_context *bld_base,
     36 				struct lp_build_emit_data *emit_data);
     37 
     38 static const struct lp_build_tgsi_action tex_action;
     39 
     40 /**
     41  * Given a v8i32 resource descriptor for a buffer, extract the size of the
     42  * buffer in number of elements and return it as an i32.
     43  */
     44 static LLVMValueRef get_buffer_size(
     45 	struct lp_build_tgsi_context *bld_base,
     46 	LLVMValueRef descriptor)
     47 {
     48 	struct si_shader_context *ctx = si_shader_context(bld_base);
     49 	LLVMBuilderRef builder = ctx->ac.builder;
     50 	LLVMValueRef size =
     51 		LLVMBuildExtractElement(builder, descriptor,
     52 					LLVMConstInt(ctx->i32, 2, 0), "");
     53 
     54 	if (ctx->screen->info.chip_class == VI) {
     55 		/* On VI, the descriptor contains the size in bytes,
     56 		 * but TXQ must return the size in elements.
     57 		 * The stride is always non-zero for resources using TXQ.
     58 		 */
     59 		LLVMValueRef stride =
     60 			LLVMBuildExtractElement(builder, descriptor,
     61 						ctx->i32_1, "");
     62 		stride = LLVMBuildLShr(builder, stride,
     63 				       LLVMConstInt(ctx->i32, 16, 0), "");
     64 		stride = LLVMBuildAnd(builder, stride,
     65 				      LLVMConstInt(ctx->i32, 0x3FFF, 0), "");
     66 
     67 		size = LLVMBuildUDiv(builder, size, stride, "");
     68 	}
     69 
     70 	return size;
     71 }
     72 
     73 static LLVMValueRef
     74 shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
     75 			 const struct tgsi_full_src_register *reg,
     76 			 bool ubo)
     77 {
     78 	LLVMValueRef index;
     79 
     80 	if (!reg->Register.Indirect) {
     81 		index = LLVMConstInt(ctx->i32, reg->Register.Index, false);
     82 	} else {
     83 		index = si_get_indirect_index(ctx, &reg->Indirect,
     84 					      1, reg->Register.Index);
     85 	}
     86 
     87 	if (ubo)
     88 		return ctx->abi.load_ubo(&ctx->abi, index);
     89 	else
     90 		return ctx->abi.load_ssbo(&ctx->abi, index, false);
     91 }
     92 
     93 static bool tgsi_is_array_image(unsigned target)
     94 {
     95 	return target == TGSI_TEXTURE_3D ||
     96 	       target == TGSI_TEXTURE_CUBE ||
     97 	       target == TGSI_TEXTURE_1D_ARRAY ||
     98 	       target == TGSI_TEXTURE_2D_ARRAY ||
     99 	       target == TGSI_TEXTURE_CUBE_ARRAY ||
    100 	       target == TGSI_TEXTURE_2D_ARRAY_MSAA;
    101 }
    102 
    103 /**
    104  * Given a 256-bit resource descriptor, force the DCC enable bit to off.
    105  *
    106  * At least on Tonga, executing image stores on images with DCC enabled and
    107  * non-trivial can eventually lead to lockups. This can occur when an
    108  * application binds an image as read-only but then uses a shader that writes
    109  * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
    110  * program termination) in this case, but it doesn't cost much to be a bit
    111  * nicer: disabling DCC in the shader still leads to undefined results but
    112  * avoids the lockup.
    113  */
    114 static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
    115 				  LLVMValueRef rsrc)
    116 {
    117 	if (ctx->screen->info.chip_class <= CIK) {
    118 		return rsrc;
    119 	} else {
    120 		LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
    121 		LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0);
    122 		LLVMValueRef tmp;
    123 
    124 		tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, "");
    125 		tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, "");
    126 		return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, "");
    127 	}
    128 }
    129 
    130 LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
    131 				LLVMValueRef list, LLVMValueRef index,
    132 				enum ac_descriptor_type desc_type, bool dcc_off)
    133 {
    134 	LLVMBuilderRef builder = ctx->ac.builder;
    135 	LLVMValueRef rsrc;
    136 
    137 	if (desc_type == AC_DESC_BUFFER) {
    138 		index = LLVMBuildMul(builder, index,
    139 				     LLVMConstInt(ctx->i32, 2, 0), "");
    140 		index = LLVMBuildAdd(builder, index,
    141 				     ctx->i32_1, "");
    142 		list = LLVMBuildPointerCast(builder, list,
    143 					    si_const_array(ctx->v4i32, 0), "");
    144 	} else {
    145 		assert(desc_type == AC_DESC_IMAGE);
    146 	}
    147 
    148 	rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
    149 	if (desc_type == AC_DESC_IMAGE && dcc_off)
    150 		rsrc = force_dcc_off(ctx, rsrc);
    151 	return rsrc;
    152 }
    153 
    154 /**
    155  * Load the resource descriptor for \p image.
    156  */
    157 static void
    158 image_fetch_rsrc(
    159 	struct lp_build_tgsi_context *bld_base,
    160 	const struct tgsi_full_src_register *image,
    161 	bool is_store, unsigned target,
    162 	LLVMValueRef *rsrc)
    163 {
    164 	struct si_shader_context *ctx = si_shader_context(bld_base);
    165 	LLVMValueRef rsrc_ptr = LLVMGetParam(ctx->main_fn,
    166 					     ctx->param_samplers_and_images);
    167 	LLVMValueRef index;
    168 	bool dcc_off = is_store;
    169 
    170 	if (!image->Register.Indirect) {
    171 		const struct tgsi_shader_info *info = bld_base->info;
    172 		unsigned images_writemask = info->images_store |
    173 					    info->images_atomic;
    174 
    175 		index = LLVMConstInt(ctx->i32,
    176 				     si_get_image_slot(image->Register.Index), 0);
    177 
    178 		if (images_writemask & (1 << image->Register.Index))
    179 			dcc_off = true;
    180 	} else {
    181 		/* From the GL_ARB_shader_image_load_store extension spec:
    182 		 *
    183 		 *    If a shader performs an image load, store, or atomic
    184 		 *    operation using an image variable declared as an array,
    185 		 *    and if the index used to select an individual element is
    186 		 *    negative or greater than or equal to the size of the
    187 		 *    array, the results of the operation are undefined but may
    188 		 *    not lead to termination.
    189 		 */
    190 		index = si_get_bounded_indirect_index(ctx, &image->Indirect,
    191 						      image->Register.Index,
    192 						      ctx->num_images);
    193 		index = LLVMBuildSub(ctx->ac.builder,
    194 				     LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0),
    195 				     index, "");
    196 	}
    197 
    198 	if (image->Register.File != TGSI_FILE_IMAGE) {
    199 		/* Bindless descriptors are accessible from a different pair of
    200 		 * user SGPR indices.
    201 		 */
    202 		rsrc_ptr = LLVMGetParam(ctx->main_fn,
    203 					ctx->param_bindless_samplers_and_images);
    204 		index = lp_build_emit_fetch_src(bld_base, image,
    205 						TGSI_TYPE_UNSIGNED, 0);
    206 
    207 		/* For simplicity, bindless image descriptors use fixed
    208 		 * 16-dword slots for now.
    209 		 */
    210 		index = LLVMBuildMul(ctx->ac.builder, index,
    211 				     LLVMConstInt(ctx->i32, 2, 0), "");
    212 	}
    213 
    214 	*rsrc = si_load_image_desc(ctx, rsrc_ptr, index,
    215 				   target == TGSI_TEXTURE_BUFFER ? AC_DESC_BUFFER : AC_DESC_IMAGE,
    216 				   dcc_off);
    217 }
    218 
    219 static LLVMValueRef image_fetch_coords(
    220 		struct lp_build_tgsi_context *bld_base,
    221 		const struct tgsi_full_instruction *inst,
    222 		unsigned src, LLVMValueRef desc)
    223 {
    224 	struct si_shader_context *ctx = si_shader_context(bld_base);
    225 	LLVMBuilderRef builder = ctx->ac.builder;
    226 	unsigned target = inst->Memory.Texture;
    227 	unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
    228 	LLVMValueRef coords[4];
    229 	LLVMValueRef tmp;
    230 	int chan;
    231 
    232 	for (chan = 0; chan < num_coords; ++chan) {
    233 		tmp = lp_build_emit_fetch(bld_base, inst, src, chan);
    234 		tmp = ac_to_integer(&ctx->ac, tmp);
    235 		coords[chan] = tmp;
    236 	}
    237 
    238 	if (ctx->screen->info.chip_class >= GFX9) {
    239 		/* 1D textures are allocated and used as 2D on GFX9. */
    240 		if (target == TGSI_TEXTURE_1D) {
    241 			coords[1] = ctx->i32_0;
    242 			num_coords++;
    243 		} else if (target == TGSI_TEXTURE_1D_ARRAY) {
    244 			coords[2] = coords[1];
    245 			coords[1] = ctx->i32_0;
    246 			num_coords++;
    247 		} else if (target == TGSI_TEXTURE_2D) {
    248 			/* The hw can't bind a slice of a 3D image as a 2D
    249 			 * image, because it ignores BASE_ARRAY if the target
    250 			 * is 3D. The workaround is to read BASE_ARRAY and set
    251 			 * it as the 3rd address operand for all 2D images.
    252 			 */
    253 			LLVMValueRef first_layer, const5, mask;
    254 
    255 			const5 = LLVMConstInt(ctx->i32, 5, 0);
    256 			mask = LLVMConstInt(ctx->i32, S_008F24_BASE_ARRAY(~0), 0);
    257 			first_layer = LLVMBuildExtractElement(builder, desc, const5, "");
    258 			first_layer = LLVMBuildAnd(builder, first_layer, mask, "");
    259 
    260 			coords[2] = first_layer;
    261 			num_coords++;
    262 		}
    263 	}
    264 
    265 	if (num_coords == 1)
    266 		return coords[0];
    267 
    268 	if (num_coords == 3) {
    269 		/* LLVM has difficulties lowering 3-element vectors. */
    270 		coords[3] = bld_base->uint_bld.undef;
    271 		num_coords = 4;
    272 	}
    273 
    274 	return lp_build_gather_values(&ctx->gallivm, coords, num_coords);
    275 }
    276 
    277 /**
    278  * Append the extra mode bits that are used by image load and store.
    279  */
    280 static void image_append_args(
    281 		struct si_shader_context *ctx,
    282 		struct lp_build_emit_data * emit_data,
    283 		unsigned target,
    284 		bool atomic,
    285 		bool force_glc)
    286 {
    287 	const struct tgsi_full_instruction *inst = emit_data->inst;
    288 	LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
    289 	LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
    290 	LLVMValueRef r128 = i1false;
    291 	LLVMValueRef da = tgsi_is_array_image(target) ? i1true : i1false;
    292 	LLVMValueRef glc =
    293 		force_glc ||
    294 		inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ?
    295 		i1true : i1false;
    296 	LLVMValueRef slc = i1false;
    297 	LLVMValueRef lwe = i1false;
    298 
    299 	if (atomic || (HAVE_LLVM <= 0x0309)) {
    300 		emit_data->args[emit_data->arg_count++] = r128;
    301 		emit_data->args[emit_data->arg_count++] = da;
    302 		if (!atomic) {
    303 			emit_data->args[emit_data->arg_count++] = glc;
    304 		}
    305 		emit_data->args[emit_data->arg_count++] = slc;
    306 		return;
    307 	}
    308 
    309 	/* HAVE_LLVM >= 0x0400 */
    310 	emit_data->args[emit_data->arg_count++] = glc;
    311 	emit_data->args[emit_data->arg_count++] = slc;
    312 	emit_data->args[emit_data->arg_count++] = lwe;
    313 	emit_data->args[emit_data->arg_count++] = da;
    314 }
    315 
    316 /**
    317  * Append the resource and indexing arguments for buffer intrinsics.
    318  *
    319  * \param rsrc the v4i32 buffer resource
    320  * \param index index into the buffer (stride-based)
    321  * \param offset byte offset into the buffer
    322  */
    323 static void buffer_append_args(
    324 		struct si_shader_context *ctx,
    325 		struct lp_build_emit_data *emit_data,
    326 		LLVMValueRef rsrc,
    327 		LLVMValueRef index,
    328 		LLVMValueRef offset,
    329 		bool atomic,
    330 		bool force_glc)
    331 {
    332 	const struct tgsi_full_instruction *inst = emit_data->inst;
    333 	LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
    334 	LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
    335 
    336 	emit_data->args[emit_data->arg_count++] = rsrc;
    337 	emit_data->args[emit_data->arg_count++] = index; /* vindex */
    338 	emit_data->args[emit_data->arg_count++] = offset; /* voffset */
    339 	if (!atomic) {
    340 		emit_data->args[emit_data->arg_count++] =
    341 			force_glc ||
    342 			inst->Memory.Qualifier & (TGSI_MEMORY_COHERENT | TGSI_MEMORY_VOLATILE) ?
    343 			i1true : i1false; /* glc */
    344 	}
    345 	emit_data->args[emit_data->arg_count++] = i1false; /* slc */
    346 }
    347 
    348 static void load_fetch_args(
    349 		struct lp_build_tgsi_context * bld_base,
    350 		struct lp_build_emit_data * emit_data)
    351 {
    352 	struct si_shader_context *ctx = si_shader_context(bld_base);
    353 	const struct tgsi_full_instruction * inst = emit_data->inst;
    354 	unsigned target = inst->Memory.Texture;
    355 	LLVMValueRef rsrc;
    356 
    357 	emit_data->dst_type = ctx->v4f32;
    358 
    359 	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
    360 		   inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) {
    361 		LLVMValueRef offset;
    362 		LLVMValueRef tmp;
    363 
    364 		bool ubo = inst->Src[0].Register.File == TGSI_FILE_CONSTBUF;
    365 		rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], ubo);
    366 
    367 		tmp = lp_build_emit_fetch(bld_base, inst, 1, 0);
    368 		offset = ac_to_integer(&ctx->ac, tmp);
    369 
    370 		buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
    371 				   offset, false, false);
    372 	} else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
    373 		   tgsi_is_bindless_image_file(inst->Src[0].Register.File)) {
    374 		LLVMValueRef coords;
    375 
    376 		image_fetch_rsrc(bld_base, &inst->Src[0], false, target, &rsrc);
    377 		coords = image_fetch_coords(bld_base, inst, 1, rsrc);
    378 
    379 		if (target == TGSI_TEXTURE_BUFFER) {
    380 			buffer_append_args(ctx, emit_data, rsrc, coords,
    381 					   ctx->i32_0, false, false);
    382 		} else {
    383 			emit_data->args[0] = coords;
    384 			emit_data->args[1] = rsrc;
    385 			emit_data->args[2] = LLVMConstInt(ctx->i32, 15, 0); /* dmask */
    386 			emit_data->arg_count = 3;
    387 
    388 			image_append_args(ctx, emit_data, target, false, false);
    389 		}
    390 	}
    391 }
    392 
    393 static void load_emit_buffer(struct si_shader_context *ctx,
    394 			     struct lp_build_emit_data *emit_data,
    395 			     bool can_speculate, bool allow_smem)
    396 {
    397 	const struct tgsi_full_instruction *inst = emit_data->inst;
    398 	uint writemask = inst->Dst[0].Register.WriteMask;
    399 	uint count = util_last_bit(writemask);
    400 	LLVMValueRef *args = emit_data->args;
    401 
    402 	/* Don't use SMEM for shader buffer loads, because LLVM doesn't
    403 	 * select SMEM for SI.load.const with a non-constant offset, and
    404 	 * constant offsets practically don't exist with shader buffers.
    405 	 *
    406 	 * Also, SI.load.const doesn't use inst_offset when it's lowered
    407 	 * to VMEM, so we just end up with more VALU instructions in the end
    408 	 * and no benefit.
    409 	 *
    410 	 * TODO: Remove this line once LLVM can select SMEM with a non-constant
    411 	 *       offset, and can derive inst_offset when VMEM is selected.
    412 	 *       After that, si_memory_barrier should invalidate sL1 for shader
    413 	 *       buffers.
    414 	 */
    415 
    416 	assert(LLVMConstIntGetZExtValue(args[1]) == 0); /* vindex */
    417 	emit_data->output[emit_data->chan] =
    418 		ac_build_buffer_load(&ctx->ac, args[0], count, NULL,
    419 				     args[2], NULL, 0,
    420 				     LLVMConstIntGetZExtValue(args[3]),
    421 				     LLVMConstIntGetZExtValue(args[4]),
    422 				     can_speculate, allow_smem);
    423 }
    424 
    425 static LLVMValueRef get_memory_ptr(struct si_shader_context *ctx,
    426                                    const struct tgsi_full_instruction *inst,
    427                                    LLVMTypeRef type, int arg)
    428 {
    429 	LLVMBuilderRef builder = ctx->ac.builder;
    430 	LLVMValueRef offset, ptr;
    431 	int addr_space;
    432 
    433 	offset = lp_build_emit_fetch(&ctx->bld_base, inst, arg, 0);
    434 	offset = ac_to_integer(&ctx->ac, offset);
    435 
    436 	ptr = ctx->ac.lds;
    437 	ptr = LLVMBuildGEP(builder, ptr, &offset, 1, "");
    438 	addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
    439 	ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(type, addr_space), "");
    440 
    441 	return ptr;
    442 }
    443 
    444 static void load_emit_memory(
    445 		struct si_shader_context *ctx,
    446 		struct lp_build_emit_data *emit_data)
    447 {
    448 	const struct tgsi_full_instruction *inst = emit_data->inst;
    449 	unsigned writemask = inst->Dst[0].Register.WriteMask;
    450 	LLVMValueRef channels[4], ptr, derived_ptr, index;
    451 	int chan;
    452 
    453 	ptr = get_memory_ptr(ctx, inst, ctx->f32, 1);
    454 
    455 	for (chan = 0; chan < 4; ++chan) {
    456 		if (!(writemask & (1 << chan))) {
    457 			channels[chan] = LLVMGetUndef(ctx->f32);
    458 			continue;
    459 		}
    460 
    461 		index = LLVMConstInt(ctx->i32, chan, 0);
    462 		derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
    463 		channels[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, "");
    464 	}
    465 	emit_data->output[emit_data->chan] = lp_build_gather_values(&ctx->gallivm, channels, 4);
    466 }
    467 
    468 /**
    469  * Return true if the memory accessed by a LOAD or STORE instruction is
    470  * read-only or write-only, respectively.
    471  *
    472  * \param shader_buffers_reverse_access_mask
    473  *	For LOAD, set this to (store | atomic) slot usage in the shader.
    474  *	For STORE, set this to (load | atomic) slot usage in the shader.
    475  * \param images_reverse_access_mask  Same as above, but for images.
    476  */
    477 static bool is_oneway_access_only(const struct tgsi_full_instruction *inst,
    478 				  const struct tgsi_shader_info *info,
    479 				  unsigned shader_buffers_reverse_access_mask,
    480 				  unsigned images_reverse_access_mask)
    481 {
    482 	/* RESTRICT means NOALIAS.
    483 	 * If there are no writes, we can assume the accessed memory is read-only.
    484 	 * If there are no reads, we can assume the accessed memory is write-only.
    485 	 */
    486 	if (inst->Memory.Qualifier & TGSI_MEMORY_RESTRICT) {
    487 		unsigned reverse_access_mask;
    488 
    489 		if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
    490 			reverse_access_mask = shader_buffers_reverse_access_mask;
    491 		} else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
    492 			reverse_access_mask = info->images_buffers &
    493 					      images_reverse_access_mask;
    494 		} else {
    495 			reverse_access_mask = ~info->images_buffers &
    496 					      images_reverse_access_mask;
    497 		}
    498 
    499 		if (inst->Src[0].Register.Indirect) {
    500 			if (!reverse_access_mask)
    501 				return true;
    502 		} else {
    503 			if (!(reverse_access_mask &
    504 			      (1u << inst->Src[0].Register.Index)))
    505 				return true;
    506 		}
    507 	}
    508 
    509 	/* If there are no buffer writes (for both shader buffers & image
    510 	 * buffers), it implies that buffer memory is read-only.
    511 	 * If there are no buffer reads (for both shader buffers & image
    512 	 * buffers), it implies that buffer memory is write-only.
    513 	 *
    514 	 * Same for the case when there are no writes/reads for non-buffer
    515 	 * images.
    516 	 */
    517 	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
    518 	    (inst->Memory.Texture == TGSI_TEXTURE_BUFFER &&
    519 	     (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
    520 	      tgsi_is_bindless_image_file(inst->Src[0].Register.File)))) {
    521 		if (!shader_buffers_reverse_access_mask &&
    522 		    !(info->images_buffers & images_reverse_access_mask))
    523 			return true;
    524 	} else {
    525 		if (!(~info->images_buffers & images_reverse_access_mask))
    526 			return true;
    527 	}
    528 	return false;
    529 }
    530 
    531 static void load_emit(
    532 		const struct lp_build_tgsi_action *action,
    533 		struct lp_build_tgsi_context *bld_base,
    534 		struct lp_build_emit_data *emit_data)
    535 {
    536 	struct si_shader_context *ctx = si_shader_context(bld_base);
    537 	LLVMBuilderRef builder = ctx->ac.builder;
    538 	const struct tgsi_full_instruction * inst = emit_data->inst;
    539 	const struct tgsi_shader_info *info = &ctx->shader->selector->info;
    540 	char intrinsic_name[64];
    541 	bool can_speculate = false;
    542 
    543 	if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
    544 		load_emit_memory(ctx, emit_data);
    545 		return;
    546 	}
    547 
    548 	if (inst->Src[0].Register.File == TGSI_FILE_CONSTBUF) {
    549 		load_emit_buffer(ctx, emit_data, true, true);
    550 		return;
    551 	}
    552 
    553 	if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
    554 		ac_build_waitcnt(&ctx->ac, VM_CNT);
    555 
    556 	can_speculate = !(inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE) &&
    557 			  is_oneway_access_only(inst, info,
    558 						info->shader_buffers_store |
    559 						info->shader_buffers_atomic,
    560 						info->images_store |
    561 						info->images_atomic);
    562 
    563 	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
    564 		load_emit_buffer(ctx, emit_data, can_speculate, false);
    565 		return;
    566 	}
    567 
    568 	if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
    569 		emit_data->output[emit_data->chan] =
    570 			lp_build_intrinsic(
    571 				builder, "llvm.amdgcn.buffer.load.format.v4f32", emit_data->dst_type,
    572 				emit_data->args, emit_data->arg_count,
    573 				ac_get_load_intr_attribs(can_speculate));
    574 	} else {
    575 		ac_get_image_intr_name("llvm.amdgcn.image.load",
    576 				       emit_data->dst_type,		/* vdata */
    577 				       LLVMTypeOf(emit_data->args[0]), /* coords */
    578 				       LLVMTypeOf(emit_data->args[1]), /* rsrc */
    579 				       intrinsic_name, sizeof(intrinsic_name));
    580 
    581 		emit_data->output[emit_data->chan] =
    582 			lp_build_intrinsic(
    583 				builder, intrinsic_name, emit_data->dst_type,
    584 				emit_data->args, emit_data->arg_count,
    585 				ac_get_load_intr_attribs(can_speculate));
    586 	}
    587 }
    588 
    589 static void store_fetch_args(
    590 		struct lp_build_tgsi_context * bld_base,
    591 		struct lp_build_emit_data * emit_data)
    592 {
    593 	struct si_shader_context *ctx = si_shader_context(bld_base);
    594 	const struct tgsi_full_instruction * inst = emit_data->inst;
    595 	struct tgsi_full_src_register memory;
    596 	LLVMValueRef chans[4];
    597 	LLVMValueRef data;
    598 	LLVMValueRef rsrc;
    599 	unsigned chan;
    600 
    601 	emit_data->dst_type = ctx->voidt;
    602 
    603 	for (chan = 0; chan < 4; ++chan) {
    604 		chans[chan] = lp_build_emit_fetch(bld_base, inst, 1, chan);
    605 	}
    606 	data = lp_build_gather_values(&ctx->gallivm, chans, 4);
    607 
    608 	emit_data->args[emit_data->arg_count++] = data;
    609 
    610 	memory = tgsi_full_src_register_from_dst(&inst->Dst[0]);
    611 
    612 	if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
    613 		LLVMValueRef offset;
    614 		LLVMValueRef tmp;
    615 
    616 		rsrc = shader_buffer_fetch_rsrc(ctx, &memory, false);
    617 
    618 		tmp = lp_build_emit_fetch(bld_base, inst, 0, 0);
    619 		offset = ac_to_integer(&ctx->ac, tmp);
    620 
    621 		buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
    622 				   offset, false, false);
    623 	} else if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE ||
    624 		   tgsi_is_bindless_image_file(inst->Dst[0].Register.File)) {
    625 		unsigned target = inst->Memory.Texture;
    626 		LLVMValueRef coords;
    627 
    628 		/* 8bit/16bit TC L1 write corruption bug on SI.
    629 		 * All store opcodes not aligned to a dword are affected.
    630 		 *
    631 		 * The only way to get unaligned stores in radeonsi is through
    632 		 * shader images.
    633 		 */
    634 		bool force_glc = ctx->screen->info.chip_class == SI;
    635 
    636 		image_fetch_rsrc(bld_base, &memory, true, target, &rsrc);
    637 		coords = image_fetch_coords(bld_base, inst, 0, rsrc);
    638 
    639 		if (target == TGSI_TEXTURE_BUFFER) {
    640 			buffer_append_args(ctx, emit_data, rsrc, coords,
    641 					   ctx->i32_0, false, force_glc);
    642 		} else {
    643 			emit_data->args[1] = coords;
    644 			emit_data->args[2] = rsrc;
    645 			emit_data->args[3] = LLVMConstInt(ctx->i32, 15, 0); /* dmask */
    646 			emit_data->arg_count = 4;
    647 
    648 			image_append_args(ctx, emit_data, target, false, force_glc);
    649 		}
    650 	}
    651 }
    652 
    653 static void store_emit_buffer(
    654 		struct si_shader_context *ctx,
    655 		struct lp_build_emit_data *emit_data,
    656 		bool writeonly_memory)
    657 {
    658 	const struct tgsi_full_instruction *inst = emit_data->inst;
    659 	LLVMBuilderRef builder = ctx->ac.builder;
    660 	LLVMValueRef base_data = emit_data->args[0];
    661 	LLVMValueRef base_offset = emit_data->args[3];
    662 	unsigned writemask = inst->Dst[0].Register.WriteMask;
    663 
    664 	while (writemask) {
    665 		int start, count;
    666 		const char *intrinsic_name;
    667 		LLVMValueRef data;
    668 		LLVMValueRef offset;
    669 		LLVMValueRef tmp;
    670 
    671 		u_bit_scan_consecutive_range(&writemask, &start, &count);
    672 
    673 		/* Due to an LLVM limitation, split 3-element writes
    674 		 * into a 2-element and a 1-element write. */
    675 		if (count == 3) {
    676 			writemask |= 1 << (start + 2);
    677 			count = 2;
    678 		}
    679 
    680 		if (count == 4) {
    681 			data = base_data;
    682 			intrinsic_name = "llvm.amdgcn.buffer.store.v4f32";
    683 		} else if (count == 2) {
    684 			LLVMTypeRef v2f32 = LLVMVectorType(ctx->f32, 2);
    685 
    686 			tmp = LLVMBuildExtractElement(
    687 				builder, base_data,
    688 				LLVMConstInt(ctx->i32, start, 0), "");
    689 			data = LLVMBuildInsertElement(
    690 				builder, LLVMGetUndef(v2f32), tmp,
    691 				ctx->i32_0, "");
    692 
    693 			tmp = LLVMBuildExtractElement(
    694 				builder, base_data,
    695 				LLVMConstInt(ctx->i32, start + 1, 0), "");
    696 			data = LLVMBuildInsertElement(
    697 				builder, data, tmp, ctx->i32_1, "");
    698 
    699 			intrinsic_name = "llvm.amdgcn.buffer.store.v2f32";
    700 		} else {
    701 			assert(count == 1);
    702 			data = LLVMBuildExtractElement(
    703 				builder, base_data,
    704 				LLVMConstInt(ctx->i32, start, 0), "");
    705 			intrinsic_name = "llvm.amdgcn.buffer.store.f32";
    706 		}
    707 
    708 		offset = base_offset;
    709 		if (start != 0) {
    710 			offset = LLVMBuildAdd(
    711 				builder, offset,
    712 				LLVMConstInt(ctx->i32, start * 4, 0), "");
    713 		}
    714 
    715 		emit_data->args[0] = data;
    716 		emit_data->args[3] = offset;
    717 
    718 		lp_build_intrinsic(
    719 			builder, intrinsic_name, emit_data->dst_type,
    720 			emit_data->args, emit_data->arg_count,
    721 			ac_get_store_intr_attribs(writeonly_memory));
    722 	}
    723 }
    724 
    725 static void store_emit_memory(
    726 		struct si_shader_context *ctx,
    727 		struct lp_build_emit_data *emit_data)
    728 {
    729 	const struct tgsi_full_instruction *inst = emit_data->inst;
    730 	LLVMBuilderRef builder = ctx->ac.builder;
    731 	unsigned writemask = inst->Dst[0].Register.WriteMask;
    732 	LLVMValueRef ptr, derived_ptr, data, index;
    733 	int chan;
    734 
    735 	ptr = get_memory_ptr(ctx, inst, ctx->f32, 0);
    736 
    737 	for (chan = 0; chan < 4; ++chan) {
    738 		if (!(writemask & (1 << chan))) {
    739 			continue;
    740 		}
    741 		data = lp_build_emit_fetch(&ctx->bld_base, inst, 1, chan);
    742 		index = LLVMConstInt(ctx->i32, chan, 0);
    743 		derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
    744 		LLVMBuildStore(builder, data, derived_ptr);
    745 	}
    746 }
    747 
    748 static void store_emit(
    749 		const struct lp_build_tgsi_action *action,
    750 		struct lp_build_tgsi_context *bld_base,
    751 		struct lp_build_emit_data *emit_data)
    752 {
    753 	struct si_shader_context *ctx = si_shader_context(bld_base);
    754 	LLVMBuilderRef builder = ctx->ac.builder;
    755 	const struct tgsi_full_instruction * inst = emit_data->inst;
    756 	const struct tgsi_shader_info *info = &ctx->shader->selector->info;
    757 	unsigned target = inst->Memory.Texture;
    758 	char intrinsic_name[64];
    759 	bool writeonly_memory = false;
    760 
    761 	if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) {
    762 		store_emit_memory(ctx, emit_data);
    763 		return;
    764 	}
    765 
    766 	if (inst->Memory.Qualifier & TGSI_MEMORY_VOLATILE)
    767 		ac_build_waitcnt(&ctx->ac, VM_CNT);
    768 
    769 	writeonly_memory = is_oneway_access_only(inst, info,
    770 						 info->shader_buffers_load |
    771 						 info->shader_buffers_atomic,
    772 						 info->images_load |
    773 						 info->images_atomic);
    774 
    775 	if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) {
    776 		store_emit_buffer(ctx, emit_data, writeonly_memory);
    777 		return;
    778 	}
    779 
    780 	if (target == TGSI_TEXTURE_BUFFER) {
    781 		emit_data->output[emit_data->chan] = lp_build_intrinsic(
    782 			builder, "llvm.amdgcn.buffer.store.format.v4f32",
    783 			emit_data->dst_type, emit_data->args,
    784 			emit_data->arg_count,
    785 			ac_get_store_intr_attribs(writeonly_memory));
    786 	} else {
    787 		ac_get_image_intr_name("llvm.amdgcn.image.store",
    788 				       LLVMTypeOf(emit_data->args[0]), /* vdata */
    789 				       LLVMTypeOf(emit_data->args[1]), /* coords */
    790 				       LLVMTypeOf(emit_data->args[2]), /* rsrc */
    791 				       intrinsic_name, sizeof(intrinsic_name));
    792 
    793 		emit_data->output[emit_data->chan] =
    794 			lp_build_intrinsic(
    795 				builder, intrinsic_name, emit_data->dst_type,
    796 				emit_data->args, emit_data->arg_count,
    797 				ac_get_store_intr_attribs(writeonly_memory));
    798 	}
    799 }
    800 
    801 static void atomic_fetch_args(
    802 		struct lp_build_tgsi_context * bld_base,
    803 		struct lp_build_emit_data * emit_data)
    804 {
    805 	struct si_shader_context *ctx = si_shader_context(bld_base);
    806 	const struct tgsi_full_instruction * inst = emit_data->inst;
    807 	LLVMValueRef data1, data2;
    808 	LLVMValueRef rsrc;
    809 	LLVMValueRef tmp;
    810 
    811 	emit_data->dst_type = ctx->f32;
    812 
    813 	tmp = lp_build_emit_fetch(bld_base, inst, 2, 0);
    814 	data1 = ac_to_integer(&ctx->ac, tmp);
    815 
    816 	if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
    817 		tmp = lp_build_emit_fetch(bld_base, inst, 3, 0);
    818 		data2 = ac_to_integer(&ctx->ac, tmp);
    819 	}
    820 
    821 	/* llvm.amdgcn.image/buffer.atomic.cmpswap reflect the hardware order
    822 	 * of arguments, which is reversed relative to TGSI (and GLSL)
    823 	 */
    824 	if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
    825 		emit_data->args[emit_data->arg_count++] = data2;
    826 	emit_data->args[emit_data->arg_count++] = data1;
    827 
    828 	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
    829 		LLVMValueRef offset;
    830 
    831 		rsrc = shader_buffer_fetch_rsrc(ctx, &inst->Src[0], false);
    832 
    833 		tmp = lp_build_emit_fetch(bld_base, inst, 1, 0);
    834 		offset = ac_to_integer(&ctx->ac, tmp);
    835 
    836 		buffer_append_args(ctx, emit_data, rsrc, ctx->i32_0,
    837 				   offset, true, false);
    838 	} else if (inst->Src[0].Register.File == TGSI_FILE_IMAGE ||
    839 		   tgsi_is_bindless_image_file(inst->Src[0].Register.File)) {
    840 		unsigned target = inst->Memory.Texture;
    841 		LLVMValueRef coords;
    842 
    843 		image_fetch_rsrc(bld_base, &inst->Src[0], true, target, &rsrc);
    844 		coords = image_fetch_coords(bld_base, inst, 1, rsrc);
    845 
    846 		if (target == TGSI_TEXTURE_BUFFER) {
    847 			buffer_append_args(ctx, emit_data, rsrc, coords,
    848 					   ctx->i32_0, true, false);
    849 		} else {
    850 			emit_data->args[emit_data->arg_count++] = coords;
    851 			emit_data->args[emit_data->arg_count++] = rsrc;
    852 
    853 			image_append_args(ctx, emit_data, target, true, false);
    854 		}
    855 	}
    856 }
    857 
    858 static void atomic_emit_memory(struct si_shader_context *ctx,
    859                                struct lp_build_emit_data *emit_data) {
    860 	LLVMBuilderRef builder = ctx->ac.builder;
    861 	const struct tgsi_full_instruction * inst = emit_data->inst;
    862 	LLVMValueRef ptr, result, arg;
    863 
    864 	ptr = get_memory_ptr(ctx, inst, ctx->i32, 1);
    865 
    866 	arg = lp_build_emit_fetch(&ctx->bld_base, inst, 2, 0);
    867 	arg = ac_to_integer(&ctx->ac, arg);
    868 
    869 	if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
    870 		LLVMValueRef new_data;
    871 		new_data = lp_build_emit_fetch(&ctx->bld_base,
    872 		                               inst, 3, 0);
    873 
    874 		new_data = ac_to_integer(&ctx->ac, new_data);
    875 
    876 		result = LLVMBuildAtomicCmpXchg(builder, ptr, arg, new_data,
    877 		                       LLVMAtomicOrderingSequentiallyConsistent,
    878 		                       LLVMAtomicOrderingSequentiallyConsistent,
    879 		                       false);
    880 
    881 		result = LLVMBuildExtractValue(builder, result, 0, "");
    882 	} else {
    883 		LLVMAtomicRMWBinOp op;
    884 
    885 		switch(inst->Instruction.Opcode) {
    886 			case TGSI_OPCODE_ATOMUADD:
    887 				op = LLVMAtomicRMWBinOpAdd;
    888 				break;
    889 			case TGSI_OPCODE_ATOMXCHG:
    890 				op = LLVMAtomicRMWBinOpXchg;
    891 				break;
    892 			case TGSI_OPCODE_ATOMAND:
    893 				op = LLVMAtomicRMWBinOpAnd;
    894 				break;
    895 			case TGSI_OPCODE_ATOMOR:
    896 				op = LLVMAtomicRMWBinOpOr;
    897 				break;
    898 			case TGSI_OPCODE_ATOMXOR:
    899 				op = LLVMAtomicRMWBinOpXor;
    900 				break;
    901 			case TGSI_OPCODE_ATOMUMIN:
    902 				op = LLVMAtomicRMWBinOpUMin;
    903 				break;
    904 			case TGSI_OPCODE_ATOMUMAX:
    905 				op = LLVMAtomicRMWBinOpUMax;
    906 				break;
    907 			case TGSI_OPCODE_ATOMIMIN:
    908 				op = LLVMAtomicRMWBinOpMin;
    909 				break;
    910 			case TGSI_OPCODE_ATOMIMAX:
    911 				op = LLVMAtomicRMWBinOpMax;
    912 				break;
    913 			default:
    914 				unreachable("unknown atomic opcode");
    915 		}
    916 
    917 		result = LLVMBuildAtomicRMW(builder, op, ptr, arg,
    918 		                       LLVMAtomicOrderingSequentiallyConsistent,
    919 		                       false);
    920 	}
    921 	emit_data->output[emit_data->chan] = LLVMBuildBitCast(builder, result, emit_data->dst_type, "");
    922 }
    923 
    924 static void atomic_emit(
    925 		const struct lp_build_tgsi_action *action,
    926 		struct lp_build_tgsi_context *bld_base,
    927 		struct lp_build_emit_data *emit_data)
    928 {
    929 	struct si_shader_context *ctx = si_shader_context(bld_base);
    930 	LLVMBuilderRef builder = ctx->ac.builder;
    931 	const struct tgsi_full_instruction * inst = emit_data->inst;
    932 	char intrinsic_name[40];
    933 	LLVMValueRef tmp;
    934 
    935 	if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) {
    936 		atomic_emit_memory(ctx, emit_data);
    937 		return;
    938 	}
    939 
    940 	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER ||
    941 	    inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
    942 		snprintf(intrinsic_name, sizeof(intrinsic_name),
    943 			 "llvm.amdgcn.buffer.atomic.%s", action->intr_name);
    944 	} else {
    945 		LLVMValueRef coords;
    946 		char coords_type[8];
    947 
    948 		if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
    949 			coords = emit_data->args[2];
    950 		else
    951 			coords = emit_data->args[1];
    952 
    953 		ac_build_type_name_for_intr(LLVMTypeOf(coords), coords_type, sizeof(coords_type));
    954 		snprintf(intrinsic_name, sizeof(intrinsic_name),
    955 			 "llvm.amdgcn.image.atomic.%s.%s",
    956 			 action->intr_name, coords_type);
    957 	}
    958 
    959 	tmp = lp_build_intrinsic(
    960 		builder, intrinsic_name, ctx->i32,
    961 		emit_data->args, emit_data->arg_count, 0);
    962 	emit_data->output[emit_data->chan] = ac_to_float(&ctx->ac, tmp);
    963 }
    964 
    965 static void set_tex_fetch_args(struct si_shader_context *ctx,
    966 			       struct lp_build_emit_data *emit_data,
    967 			       unsigned target,
    968 			       LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
    969 			       LLVMValueRef *param, unsigned count,
    970 			       unsigned dmask)
    971 {
    972 	struct ac_image_args args = {};
    973 
    974 	/* Pad to power of two vector */
    975 	while (count < util_next_power_of_two(count))
    976 		param[count++] = LLVMGetUndef(ctx->i32);
    977 
    978 	if (count > 1)
    979 		args.addr = lp_build_gather_values(&ctx->gallivm, param, count);
    980 	else
    981 		args.addr = param[0];
    982 
    983 	args.resource = res_ptr;
    984 	args.sampler = samp_ptr;
    985 	args.dmask = dmask;
    986 	args.unorm = target == TGSI_TEXTURE_RECT ||
    987 		     target == TGSI_TEXTURE_SHADOWRECT;
    988 	args.da = tgsi_is_array_sampler(target);
    989 
    990 	/* Ugly, but we seem to have no other choice right now. */
    991 	STATIC_ASSERT(sizeof(args) <= sizeof(emit_data->args));
    992 	memcpy(emit_data->args, &args, sizeof(args));
    993 }
    994 
    995 static LLVMValueRef fix_resinfo(struct si_shader_context *ctx,
    996 				unsigned target, LLVMValueRef out)
    997 {
    998 	LLVMBuilderRef builder = ctx->ac.builder;
    999 
   1000 	/* 1D textures are allocated and used as 2D on GFX9. */
   1001         if (ctx->screen->info.chip_class >= GFX9 &&
   1002 	    (target == TGSI_TEXTURE_1D_ARRAY ||
   1003 	     target == TGSI_TEXTURE_SHADOW1D_ARRAY)) {
   1004 		LLVMValueRef layers =
   1005 			LLVMBuildExtractElement(builder, out,
   1006 						LLVMConstInt(ctx->i32, 2, 0), "");
   1007 		out = LLVMBuildInsertElement(builder, out, layers,
   1008 					     ctx->i32_1, "");
   1009 	}
   1010 
   1011 	/* Divide the number of layers by 6 to get the number of cubes. */
   1012 	if (target == TGSI_TEXTURE_CUBE_ARRAY ||
   1013 	    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
   1014 		LLVMValueRef imm2 = LLVMConstInt(ctx->i32, 2, 0);
   1015 
   1016 		LLVMValueRef z = LLVMBuildExtractElement(builder, out, imm2, "");
   1017 		z = LLVMBuildSDiv(builder, z, LLVMConstInt(ctx->i32, 6, 0), "");
   1018 
   1019 		out = LLVMBuildInsertElement(builder, out, z, imm2, "");
   1020 	}
   1021 	return out;
   1022 }
   1023 
   1024 static void resq_fetch_args(
   1025 		struct lp_build_tgsi_context * bld_base,
   1026 		struct lp_build_emit_data * emit_data)
   1027 {
   1028 	struct si_shader_context *ctx = si_shader_context(bld_base);
   1029 	const struct tgsi_full_instruction *inst = emit_data->inst;
   1030 	const struct tgsi_full_src_register *reg = &inst->Src[0];
   1031 
   1032 	emit_data->dst_type = ctx->v4i32;
   1033 
   1034 	if (reg->Register.File == TGSI_FILE_BUFFER) {
   1035 		emit_data->args[0] = shader_buffer_fetch_rsrc(ctx, reg, false);
   1036 		emit_data->arg_count = 1;
   1037 	} else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
   1038 		image_fetch_rsrc(bld_base, reg, false, inst->Memory.Texture,
   1039 				 &emit_data->args[0]);
   1040 		emit_data->arg_count = 1;
   1041 	} else {
   1042 		LLVMValueRef res_ptr;
   1043 		unsigned image_target;
   1044 
   1045 		if (inst->Memory.Texture == TGSI_TEXTURE_3D)
   1046 			image_target = TGSI_TEXTURE_2D_ARRAY;
   1047 		else
   1048 			image_target = inst->Memory.Texture;
   1049 
   1050 		image_fetch_rsrc(bld_base, reg, false, inst->Memory.Texture,
   1051 				 &res_ptr);
   1052 		set_tex_fetch_args(ctx, emit_data, image_target,
   1053 				   res_ptr, NULL, &ctx->i32_0, 1,
   1054 				   0xf);
   1055 	}
   1056 }
   1057 
   1058 static void resq_emit(
   1059 		const struct lp_build_tgsi_action *action,
   1060 		struct lp_build_tgsi_context *bld_base,
   1061 		struct lp_build_emit_data *emit_data)
   1062 {
   1063 	struct si_shader_context *ctx = si_shader_context(bld_base);
   1064 	LLVMBuilderRef builder = ctx->ac.builder;
   1065 	const struct tgsi_full_instruction *inst = emit_data->inst;
   1066 	LLVMValueRef out;
   1067 
   1068 	if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
   1069 		out = LLVMBuildExtractElement(builder, emit_data->args[0],
   1070 					      LLVMConstInt(ctx->i32, 2, 0), "");
   1071 	} else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
   1072 		out = get_buffer_size(bld_base, emit_data->args[0]);
   1073 	} else {
   1074 		struct ac_image_args args;
   1075 
   1076 		memcpy(&args, emit_data->args, sizeof(args)); /* ugly */
   1077 		args.opcode = ac_image_get_resinfo;
   1078 		out = ac_build_image_opcode(&ctx->ac, &args);
   1079 
   1080 		out = fix_resinfo(ctx, inst->Memory.Texture, out);
   1081 	}
   1082 
   1083 	emit_data->output[emit_data->chan] = out;
   1084 }
   1085 
   1086 /**
   1087  * Load an image view, fmask view. or sampler state descriptor.
   1088  */
   1089 LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
   1090 				  LLVMValueRef list, LLVMValueRef index,
   1091 				  enum ac_descriptor_type type)
   1092 {
   1093 	LLVMBuilderRef builder = ctx->ac.builder;
   1094 
   1095 	switch (type) {
   1096 	case AC_DESC_IMAGE:
   1097 		/* The image is at [0:7]. */
   1098 		index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
   1099 		break;
   1100 	case AC_DESC_BUFFER:
   1101 		/* The buffer is in [4:7]. */
   1102 		index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
   1103 		index = LLVMBuildAdd(builder, index, ctx->i32_1, "");
   1104 		list = LLVMBuildPointerCast(builder, list,
   1105 					    si_const_array(ctx->v4i32, 0), "");
   1106 		break;
   1107 	case AC_DESC_FMASK:
   1108 		/* The FMASK is at [8:15]. */
   1109 		index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
   1110 		index = LLVMBuildAdd(builder, index, ctx->i32_1, "");
   1111 		break;
   1112 	case AC_DESC_SAMPLER:
   1113 		/* The sampler state is at [12:15]. */
   1114 		index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
   1115 		index = LLVMBuildAdd(builder, index, LLVMConstInt(ctx->i32, 3, 0), "");
   1116 		list = LLVMBuildPointerCast(builder, list,
   1117 					    si_const_array(ctx->v4i32, 0), "");
   1118 		break;
   1119 	}
   1120 
   1121 	return ac_build_load_to_sgpr(&ctx->ac, list, index);
   1122 }
   1123 
   1124 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
   1125  *
   1126  * SI-CI:
   1127  *   If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
   1128  *   filtering manually. The driver sets img7 to a mask clearing
   1129  *   MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
   1130  *     s_and_b32 samp0, samp0, img7
   1131  *
   1132  * VI:
   1133  *   The ANISO_OVERRIDE sampler field enables this fix in TA.
   1134  */
   1135 static LLVMValueRef sici_fix_sampler_aniso(struct si_shader_context *ctx,
   1136 					   LLVMValueRef res, LLVMValueRef samp)
   1137 {
   1138 	LLVMValueRef img7, samp0;
   1139 
   1140 	if (ctx->screen->info.chip_class >= VI)
   1141 		return samp;
   1142 
   1143 	img7 = LLVMBuildExtractElement(ctx->ac.builder, res,
   1144 				       LLVMConstInt(ctx->i32, 7, 0), "");
   1145 	samp0 = LLVMBuildExtractElement(ctx->ac.builder, samp,
   1146 					ctx->i32_0, "");
   1147 	samp0 = LLVMBuildAnd(ctx->ac.builder, samp0, img7, "");
   1148 	return LLVMBuildInsertElement(ctx->ac.builder, samp, samp0,
   1149 				      ctx->i32_0, "");
   1150 }
   1151 
   1152 static void tex_fetch_ptrs(
   1153 	struct lp_build_tgsi_context *bld_base,
   1154 	struct lp_build_emit_data *emit_data,
   1155 	LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr)
   1156 {
   1157 	struct si_shader_context *ctx = si_shader_context(bld_base);
   1158 	LLVMValueRef list = LLVMGetParam(ctx->main_fn, ctx->param_samplers_and_images);
   1159 	const struct tgsi_full_instruction *inst = emit_data->inst;
   1160 	const struct tgsi_full_src_register *reg;
   1161 	unsigned target = inst->Texture.Texture;
   1162 	unsigned sampler_src;
   1163 	LLVMValueRef index;
   1164 
   1165 	sampler_src = emit_data->inst->Instruction.NumSrcRegs - 1;
   1166 	reg = &emit_data->inst->Src[sampler_src];
   1167 
   1168 	if (reg->Register.Indirect) {
   1169 		index = si_get_bounded_indirect_index(ctx,
   1170 						      &reg->Indirect,
   1171 						      reg->Register.Index,
   1172 						      ctx->num_samplers);
   1173 		index = LLVMBuildAdd(ctx->ac.builder, index,
   1174 				     LLVMConstInt(ctx->i32, SI_NUM_IMAGES / 2, 0), "");
   1175 	} else {
   1176 		index = LLVMConstInt(ctx->i32,
   1177 				     si_get_sampler_slot(reg->Register.Index), 0);
   1178 	}
   1179 
   1180 	if (reg->Register.File != TGSI_FILE_SAMPLER) {
   1181 		/* Bindless descriptors are accessible from a different pair of
   1182 		 * user SGPR indices.
   1183 		 */
   1184 		list = LLVMGetParam(ctx->main_fn,
   1185 				    ctx->param_bindless_samplers_and_images);
   1186 		index = lp_build_emit_fetch_src(bld_base, reg,
   1187 						TGSI_TYPE_UNSIGNED, 0);
   1188 	}
   1189 
   1190 	if (target == TGSI_TEXTURE_BUFFER)
   1191 		*res_ptr = si_load_sampler_desc(ctx, list, index, AC_DESC_BUFFER);
   1192 	else
   1193 		*res_ptr = si_load_sampler_desc(ctx, list, index, AC_DESC_IMAGE);
   1194 
   1195 	if (samp_ptr)
   1196 		*samp_ptr = NULL;
   1197 	if (fmask_ptr)
   1198 		*fmask_ptr = NULL;
   1199 
   1200 	if (target == TGSI_TEXTURE_2D_MSAA ||
   1201 	    target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
   1202 		if (fmask_ptr)
   1203 			*fmask_ptr = si_load_sampler_desc(ctx, list, index,
   1204 						          AC_DESC_FMASK);
   1205 	} else if (target != TGSI_TEXTURE_BUFFER) {
   1206 		if (samp_ptr) {
   1207 			*samp_ptr = si_load_sampler_desc(ctx, list, index,
   1208 						         AC_DESC_SAMPLER);
   1209 			*samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
   1210 		}
   1211 	}
   1212 }
   1213 
   1214 static void txq_fetch_args(
   1215 	struct lp_build_tgsi_context *bld_base,
   1216 	struct lp_build_emit_data *emit_data)
   1217 {
   1218 	struct si_shader_context *ctx = si_shader_context(bld_base);
   1219 	const struct tgsi_full_instruction *inst = emit_data->inst;
   1220 	unsigned target = inst->Texture.Texture;
   1221 	LLVMValueRef res_ptr;
   1222 	LLVMValueRef address;
   1223 
   1224 	tex_fetch_ptrs(bld_base, emit_data, &res_ptr, NULL, NULL);
   1225 
   1226 	if (target == TGSI_TEXTURE_BUFFER) {
   1227 		/* Read the size from the buffer descriptor directly. */
   1228 		emit_data->args[0] = get_buffer_size(bld_base, res_ptr);
   1229 		return;
   1230 	}
   1231 
   1232 	/* Textures - set the mip level. */
   1233 	address = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
   1234 
   1235 	set_tex_fetch_args(ctx, emit_data, target, res_ptr,
   1236 			   NULL, &address, 1, 0xf);
   1237 }
   1238 
   1239 static void txq_emit(const struct lp_build_tgsi_action *action,
   1240 		     struct lp_build_tgsi_context *bld_base,
   1241 		     struct lp_build_emit_data *emit_data)
   1242 {
   1243 	struct si_shader_context *ctx = si_shader_context(bld_base);
   1244 	struct ac_image_args args;
   1245 	unsigned target = emit_data->inst->Texture.Texture;
   1246 
   1247 	if (target == TGSI_TEXTURE_BUFFER) {
   1248 		/* Just return the buffer size. */
   1249 		emit_data->output[emit_data->chan] = emit_data->args[0];
   1250 		return;
   1251 	}
   1252 
   1253 	memcpy(&args, emit_data->args, sizeof(args)); /* ugly */
   1254 
   1255 	args.opcode = ac_image_get_resinfo;
   1256 	LLVMValueRef result = ac_build_image_opcode(&ctx->ac, &args);
   1257 
   1258 	emit_data->output[emit_data->chan] = fix_resinfo(ctx, target, result);
   1259 }
   1260 
   1261 static void tex_fetch_args(
   1262 	struct lp_build_tgsi_context *bld_base,
   1263 	struct lp_build_emit_data *emit_data)
   1264 {
   1265 	struct si_shader_context *ctx = si_shader_context(bld_base);
   1266 	const struct tgsi_full_instruction *inst = emit_data->inst;
   1267 	unsigned opcode = inst->Instruction.Opcode;
   1268 	unsigned target = inst->Texture.Texture;
   1269 	LLVMValueRef coords[5], derivs[6];
   1270 	LLVMValueRef address[16];
   1271 	unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
   1272 	int ref_pos = tgsi_util_get_shadow_ref_src_index(target);
   1273 	unsigned count = 0;
   1274 	unsigned chan;
   1275 	unsigned num_deriv_channels = 0;
   1276 	bool has_offset = inst->Texture.NumOffsets > 0;
   1277 	LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
   1278 	unsigned dmask = 0xf;
   1279 
   1280 	tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);
   1281 
   1282 	if (target == TGSI_TEXTURE_BUFFER) {
   1283 		emit_data->dst_type = ctx->v4f32;
   1284 		emit_data->args[0] = res_ptr;
   1285 		emit_data->args[1] = ctx->i32_0;
   1286 		emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
   1287 		emit_data->arg_count = 3;
   1288 		return;
   1289 	}
   1290 
   1291 	/* Fetch and project texture coordinates */
   1292 	coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
   1293 	for (chan = 0; chan < 3; chan++) {
   1294 		coords[chan] = lp_build_emit_fetch(bld_base,
   1295 						   emit_data->inst, 0,
   1296 						   chan);
   1297 		if (opcode == TGSI_OPCODE_TXP)
   1298 			coords[chan] = lp_build_emit_llvm_binary(bld_base,
   1299 								 TGSI_OPCODE_DIV,
   1300 								 coords[chan],
   1301 								 coords[3]);
   1302 	}
   1303 
   1304 	if (opcode == TGSI_OPCODE_TXP)
   1305 		coords[3] = ctx->ac.f32_1;
   1306 
   1307 	/* Pack offsets. */
   1308 	if (has_offset &&
   1309 	    opcode != TGSI_OPCODE_TXF &&
   1310 	    opcode != TGSI_OPCODE_TXF_LZ) {
   1311 		/* The offsets are six-bit signed integers packed like this:
   1312 		 *   X=[5:0], Y=[13:8], and Z=[21:16].
   1313 		 */
   1314 		LLVMValueRef offset[3], pack;
   1315 
   1316 		assert(inst->Texture.NumOffsets == 1);
   1317 
   1318 		for (chan = 0; chan < 3; chan++) {
   1319 			offset[chan] = lp_build_emit_fetch_texoffset(bld_base,
   1320 								     emit_data->inst, 0, chan);
   1321 			offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
   1322 						    LLVMConstInt(ctx->i32, 0x3f, 0), "");
   1323 			if (chan)
   1324 				offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
   1325 							    LLVMConstInt(ctx->i32, chan*8, 0), "");
   1326 		}
   1327 
   1328 		pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
   1329 		pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
   1330 		address[count++] = pack;
   1331 	}
   1332 
   1333 	/* Pack LOD bias value */
   1334 	if (opcode == TGSI_OPCODE_TXB)
   1335 		address[count++] = coords[3];
   1336 	if (opcode == TGSI_OPCODE_TXB2)
   1337 		address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
   1338 
   1339 	/* Pack depth comparison value */
   1340 	if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
   1341 		LLVMValueRef z;
   1342 
   1343 		if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
   1344 			z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
   1345 		} else {
   1346 			assert(ref_pos >= 0);
   1347 			z = coords[ref_pos];
   1348 		}
   1349 
   1350 		/* Section 8.23.1 (Depth Texture Comparison Mode) of the
   1351 		 * OpenGL 4.5 spec says:
   1352 		 *
   1353 		 *    "If the textures internal format indicates a fixed-point
   1354 		 *     depth texture, then D_t and D_ref are clamped to the
   1355 		 *     range [0, 1]; otherwise no clamping is performed."
   1356 		 *
   1357 		 * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
   1358 		 * so the depth comparison value isn't clamped for Z16 and
   1359 		 * Z24 anymore. Do it manually here.
   1360 		 */
   1361 		if (ctx->screen->info.chip_class >= VI) {
   1362 			LLVMValueRef upgraded;
   1363 			LLVMValueRef clamped;
   1364 			upgraded = LLVMBuildExtractElement(ctx->ac.builder, samp_ptr,
   1365 							   LLVMConstInt(ctx->i32, 3, false), "");
   1366 			upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded,
   1367 						 LLVMConstInt(ctx->i32, 29, false), "");
   1368 			upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->i1, "");
   1369 			clamped = ac_build_clamp(&ctx->ac, z);
   1370 			z = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped, z, "");
   1371 		}
   1372 
   1373 		address[count++] = z;
   1374 	}
   1375 
   1376 	/* Pack user derivatives */
   1377 	if (opcode == TGSI_OPCODE_TXD) {
   1378 		int param, num_src_deriv_channels, num_dst_deriv_channels;
   1379 
   1380 		switch (target) {
   1381 		case TGSI_TEXTURE_3D:
   1382 			num_src_deriv_channels = 3;
   1383 			num_dst_deriv_channels = 3;
   1384 			num_deriv_channels = 3;
   1385 			break;
   1386 		case TGSI_TEXTURE_2D:
   1387 		case TGSI_TEXTURE_SHADOW2D:
   1388 		case TGSI_TEXTURE_RECT:
   1389 		case TGSI_TEXTURE_SHADOWRECT:
   1390 		case TGSI_TEXTURE_2D_ARRAY:
   1391 		case TGSI_TEXTURE_SHADOW2D_ARRAY:
   1392 			num_src_deriv_channels = 2;
   1393 			num_dst_deriv_channels = 2;
   1394 			num_deriv_channels = 2;
   1395 			break;
   1396 		case TGSI_TEXTURE_CUBE:
   1397 		case TGSI_TEXTURE_SHADOWCUBE:
   1398 		case TGSI_TEXTURE_CUBE_ARRAY:
   1399 		case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
   1400 			/* Cube derivatives will be converted to 2D. */
   1401 			num_src_deriv_channels = 3;
   1402 			num_dst_deriv_channels = 3;
   1403 			num_deriv_channels = 2;
   1404 			break;
   1405 		case TGSI_TEXTURE_1D:
   1406 		case TGSI_TEXTURE_SHADOW1D:
   1407 		case TGSI_TEXTURE_1D_ARRAY:
   1408 		case TGSI_TEXTURE_SHADOW1D_ARRAY:
   1409 			num_src_deriv_channels = 1;
   1410 
   1411 			/* 1D textures are allocated and used as 2D on GFX9. */
   1412 			if (ctx->screen->info.chip_class >= GFX9) {
   1413 				num_dst_deriv_channels = 2;
   1414 				num_deriv_channels = 2;
   1415 			} else {
   1416 				num_dst_deriv_channels = 1;
   1417 				num_deriv_channels = 1;
   1418 			}
   1419 			break;
   1420 		default:
   1421 			unreachable("invalid target");
   1422 		}
   1423 
   1424 		for (param = 0; param < 2; param++) {
   1425 			for (chan = 0; chan < num_src_deriv_channels; chan++)
   1426 				derivs[param * num_dst_deriv_channels + chan] =
   1427 					lp_build_emit_fetch(bld_base, inst, param+1, chan);
   1428 
   1429 			/* Fill in the rest with zeros. */
   1430 			for (chan = num_src_deriv_channels;
   1431 			     chan < num_dst_deriv_channels; chan++)
   1432 				derivs[param * num_dst_deriv_channels + chan] =
   1433 					ctx->ac.f32_0;
   1434 		}
   1435 	}
   1436 
   1437 	if (target == TGSI_TEXTURE_CUBE ||
   1438 	    target == TGSI_TEXTURE_CUBE_ARRAY ||
   1439 	    target == TGSI_TEXTURE_SHADOWCUBE ||
   1440 	    target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
   1441 		ac_prepare_cube_coords(&ctx->ac,
   1442 				       opcode == TGSI_OPCODE_TXD,
   1443 				       target == TGSI_TEXTURE_CUBE_ARRAY ||
   1444 				       target == TGSI_TEXTURE_SHADOWCUBE_ARRAY,
   1445 				       opcode == TGSI_OPCODE_LODQ,
   1446 				       coords, derivs);
   1447 	} else if (tgsi_is_array_sampler(target) &&
   1448 		   opcode != TGSI_OPCODE_TXF &&
   1449 		   opcode != TGSI_OPCODE_TXF_LZ &&
   1450 		   ctx->screen->info.chip_class <= VI) {
   1451 		unsigned array_coord = target == TGSI_TEXTURE_1D_ARRAY ? 1 : 2;
   1452 		coords[array_coord] =
   1453 			ac_build_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32,
   1454 					   &coords[array_coord], 1, 0);
   1455 	}
   1456 
   1457 	if (opcode == TGSI_OPCODE_TXD)
   1458 		for (int i = 0; i < num_deriv_channels * 2; i++)
   1459 			address[count++] = derivs[i];
   1460 
   1461 	/* Pack texture coordinates */
   1462 	address[count++] = coords[0];
   1463 	if (num_coords > 1)
   1464 		address[count++] = coords[1];
   1465 	if (num_coords > 2)
   1466 		address[count++] = coords[2];
   1467 
   1468 	/* 1D textures are allocated and used as 2D on GFX9. */
   1469 	if (ctx->screen->info.chip_class >= GFX9) {
   1470 		LLVMValueRef filler;
   1471 
   1472 		/* Use 0.5, so that we don't sample the border color. */
   1473 		if (opcode == TGSI_OPCODE_TXF ||
   1474 		    opcode == TGSI_OPCODE_TXF_LZ)
   1475 			filler = ctx->i32_0;
   1476 		else
   1477 			filler = LLVMConstReal(ctx->f32, 0.5);
   1478 
   1479 		if (target == TGSI_TEXTURE_1D ||
   1480 		    target == TGSI_TEXTURE_SHADOW1D) {
   1481 			address[count++] = filler;
   1482 		} else if (target == TGSI_TEXTURE_1D_ARRAY ||
   1483 			   target == TGSI_TEXTURE_SHADOW1D_ARRAY) {
   1484 			address[count] = address[count - 1];
   1485 			address[count - 1] = filler;
   1486 			count++;
   1487 		}
   1488 	}
   1489 
   1490 	/* Pack LOD or sample index */
   1491 	if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXF)
   1492 		address[count++] = coords[3];
   1493 	else if (opcode == TGSI_OPCODE_TXL2)
   1494 		address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
   1495 
   1496 	if (count > 16) {
   1497 		assert(!"Cannot handle more than 16 texture address parameters");
   1498 		count = 16;
   1499 	}
   1500 
   1501 	for (chan = 0; chan < count; chan++)
   1502 		address[chan] = ac_to_integer(&ctx->ac, address[chan]);
   1503 
   1504 	/* Adjust the sample index according to FMASK.
   1505 	 *
   1506 	 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
   1507 	 * which is the identity mapping. Each nibble says which physical sample
   1508 	 * should be fetched to get that sample.
   1509 	 *
   1510 	 * For example, 0x11111100 means there are only 2 samples stored and
   1511 	 * the second sample covers 3/4 of the pixel. When reading samples 0
   1512 	 * and 1, return physical sample 0 (determined by the first two 0s
   1513 	 * in FMASK), otherwise return physical sample 1.
   1514 	 *
   1515 	 * The sample index should be adjusted as follows:
   1516 	 *   sample_index = (fmask >> (sample_index * 4)) & 0xF;
   1517 	 */
   1518 	if (target == TGSI_TEXTURE_2D_MSAA ||
   1519 	    target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
   1520 		struct lp_build_emit_data txf_emit_data = *emit_data;
   1521 		LLVMValueRef txf_address[4];
   1522 		/* We only need .xy for non-arrays, and .xyz for arrays. */
   1523 		unsigned txf_count = target == TGSI_TEXTURE_2D_MSAA ? 2 : 3;
   1524 		struct tgsi_full_instruction inst = {};
   1525 
   1526 		memcpy(txf_address, address, sizeof(txf_address));
   1527 
   1528 		/* Read FMASK using TXF_LZ. */
   1529 		inst.Instruction.Opcode = TGSI_OPCODE_TXF_LZ;
   1530 		inst.Texture.Texture = target;
   1531 		txf_emit_data.inst = &inst;
   1532 		txf_emit_data.chan = 0;
   1533 		set_tex_fetch_args(ctx, &txf_emit_data,
   1534 				   target, fmask_ptr, NULL,
   1535 				   txf_address, txf_count, 0xf);
   1536 		build_tex_intrinsic(&tex_action, bld_base, &txf_emit_data);
   1537 
   1538 		/* Initialize some constants. */
   1539 		LLVMValueRef four = LLVMConstInt(ctx->i32, 4, 0);
   1540 		LLVMValueRef F = LLVMConstInt(ctx->i32, 0xF, 0);
   1541 
   1542 		/* Apply the formula. */
   1543 		LLVMValueRef fmask =
   1544 			LLVMBuildExtractElement(ctx->ac.builder,
   1545 						txf_emit_data.output[0],
   1546 						ctx->i32_0, "");
   1547 
   1548 		unsigned sample_chan = txf_count; /* the sample index is last */
   1549 
   1550 		LLVMValueRef sample_index4 =
   1551 			LLVMBuildMul(ctx->ac.builder, address[sample_chan], four, "");
   1552 
   1553 		LLVMValueRef shifted_fmask =
   1554 			LLVMBuildLShr(ctx->ac.builder, fmask, sample_index4, "");
   1555 
   1556 		LLVMValueRef final_sample =
   1557 			LLVMBuildAnd(ctx->ac.builder, shifted_fmask, F, "");
   1558 
   1559 		/* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
   1560 		 * resource descriptor is 0 (invalid),
   1561 		 */
   1562 		LLVMValueRef fmask_desc =
   1563 			LLVMBuildBitCast(ctx->ac.builder, fmask_ptr,
   1564 					 ctx->v8i32, "");
   1565 
   1566 		LLVMValueRef fmask_word1 =
   1567 			LLVMBuildExtractElement(ctx->ac.builder, fmask_desc,
   1568 						ctx->i32_1, "");
   1569 
   1570 		LLVMValueRef word1_is_nonzero =
   1571 			LLVMBuildICmp(ctx->ac.builder, LLVMIntNE,
   1572 				      fmask_word1, ctx->i32_0, "");
   1573 
   1574 		/* Replace the MSAA sample index. */
   1575 		address[sample_chan] =
   1576 			LLVMBuildSelect(ctx->ac.builder, word1_is_nonzero,
   1577 					final_sample, address[sample_chan], "");
   1578 	}
   1579 
   1580 	if (opcode == TGSI_OPCODE_TXF ||
   1581 	    opcode == TGSI_OPCODE_TXF_LZ) {
   1582 		/* add tex offsets */
   1583 		if (inst->Texture.NumOffsets) {
   1584 			struct lp_build_context *uint_bld = &bld_base->uint_bld;
   1585 			const struct tgsi_texture_offset *off = inst->TexOffsets;
   1586 
   1587 			assert(inst->Texture.NumOffsets == 1);
   1588 
   1589 			switch (target) {
   1590 			case TGSI_TEXTURE_3D:
   1591 				address[2] = lp_build_add(uint_bld, address[2],
   1592 						ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleZ]);
   1593 				/* fall through */
   1594 			case TGSI_TEXTURE_2D:
   1595 			case TGSI_TEXTURE_SHADOW2D:
   1596 			case TGSI_TEXTURE_RECT:
   1597 			case TGSI_TEXTURE_SHADOWRECT:
   1598 			case TGSI_TEXTURE_2D_ARRAY:
   1599 			case TGSI_TEXTURE_SHADOW2D_ARRAY:
   1600 				address[1] =
   1601 					lp_build_add(uint_bld, address[1],
   1602 						ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleY]);
   1603 				/* fall through */
   1604 			case TGSI_TEXTURE_1D:
   1605 			case TGSI_TEXTURE_SHADOW1D:
   1606 			case TGSI_TEXTURE_1D_ARRAY:
   1607 			case TGSI_TEXTURE_SHADOW1D_ARRAY:
   1608 				address[0] =
   1609 					lp_build_add(uint_bld, address[0],
   1610 						ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleX]);
   1611 				break;
   1612 				/* texture offsets do not apply to other texture targets */
   1613 			}
   1614 		}
   1615 	}
   1616 
   1617 	if (opcode == TGSI_OPCODE_TG4) {
   1618 		unsigned gather_comp = 0;
   1619 
   1620 		/* DMASK was repurposed for GATHER4. 4 components are always
   1621 		 * returned and DMASK works like a swizzle - it selects
   1622 		 * the component to fetch. The only valid DMASK values are
   1623 		 * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
   1624 		 * (red,red,red,red) etc.) The ISA document doesn't mention
   1625 		 * this.
   1626 		 */
   1627 
   1628 		/* Get the component index from src1.x for Gather4. */
   1629 		if (!tgsi_is_shadow_target(target)) {
   1630 			LLVMValueRef comp_imm;
   1631 			struct tgsi_src_register src1 = inst->Src[1].Register;
   1632 
   1633 			assert(src1.File == TGSI_FILE_IMMEDIATE);
   1634 
   1635 			comp_imm = ctx->imms[src1.Index * TGSI_NUM_CHANNELS + src1.SwizzleX];
   1636 			gather_comp = LLVMConstIntGetZExtValue(comp_imm);
   1637 			gather_comp = CLAMP(gather_comp, 0, 3);
   1638 		}
   1639 
   1640 		dmask = 1 << gather_comp;
   1641 	}
   1642 
   1643 	set_tex_fetch_args(ctx, emit_data, target, res_ptr,
   1644 			   samp_ptr, address, count, dmask);
   1645 }
   1646 
   1647 /* Gather4 should follow the same rules as bilinear filtering, but the hardware
   1648  * incorrectly forces nearest filtering if the texture format is integer.
   1649  * The only effect it has on Gather4, which always returns 4 texels for
   1650  * bilinear filtering, is that the final coordinates are off by 0.5 of
   1651  * the texel size.
   1652  *
   1653  * The workaround is to subtract 0.5 from the unnormalized coordinates,
   1654  * or (0.5 / size) from the normalized coordinates.
   1655  *
   1656  * However, cube textures with 8_8_8_8 data formats require a different
   1657  * workaround of overriding the num format to USCALED/SSCALED. This would lose
   1658  * precision in 32-bit data formats, so it needs to be applied dynamically at
   1659  * runtime. In this case, return an i1 value that indicates whether the
   1660  * descriptor was overridden (and hence a fixup of the sampler result is needed).
   1661  */
   1662 static LLVMValueRef
   1663 si_lower_gather4_integer(struct si_shader_context *ctx,
   1664 			 struct ac_image_args *args,
   1665 			 unsigned target,
   1666 			 enum tgsi_return_type return_type)
   1667 {
   1668 	LLVMBuilderRef builder = ctx->ac.builder;
   1669 	LLVMValueRef wa_8888 = NULL;
   1670 	LLVMValueRef coord = args->addr;
   1671 	LLVMValueRef half_texel[2];
   1672 	/* Texture coordinates start after:
   1673 	 *   {offset, bias, z-compare, derivatives}
   1674 	 * Only the offset and z-compare can occur here.
   1675 	 */
   1676 	unsigned coord_vgpr_index = (int)args->offset + (int)args->compare;
   1677 	int c;
   1678 
   1679 	assert(return_type == TGSI_RETURN_TYPE_SINT ||
   1680 	       return_type == TGSI_RETURN_TYPE_UINT);
   1681 
   1682 	if (target == TGSI_TEXTURE_CUBE ||
   1683 	    target == TGSI_TEXTURE_CUBE_ARRAY) {
   1684 		LLVMValueRef formats;
   1685 		LLVMValueRef data_format;
   1686 		LLVMValueRef wa_formats;
   1687 
   1688 		formats = LLVMBuildExtractElement(builder, args->resource, ctx->i32_1, "");
   1689 
   1690 		data_format = LLVMBuildLShr(builder, formats,
   1691 					    LLVMConstInt(ctx->i32, 20, false), "");
   1692 		data_format = LLVMBuildAnd(builder, data_format,
   1693 					   LLVMConstInt(ctx->i32, (1u << 6) - 1, false), "");
   1694 		wa_8888 = LLVMBuildICmp(
   1695 			builder, LLVMIntEQ, data_format,
   1696 			LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false),
   1697 			"");
   1698 
   1699 		uint32_t wa_num_format =
   1700 			return_type == TGSI_RETURN_TYPE_UINT ?
   1701 			S_008F14_NUM_FORMAT_GFX6(V_008F14_IMG_NUM_FORMAT_USCALED) :
   1702 			S_008F14_NUM_FORMAT_GFX6(V_008F14_IMG_NUM_FORMAT_SSCALED);
   1703 		wa_formats = LLVMBuildAnd(builder, formats,
   1704 					  LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false),
   1705 					  "");
   1706 		wa_formats = LLVMBuildOr(builder, wa_formats,
   1707 					LLVMConstInt(ctx->i32, wa_num_format, false), "");
   1708 
   1709 		formats = LLVMBuildSelect(builder, wa_8888, wa_formats, formats, "");
   1710 		args->resource = LLVMBuildInsertElement(
   1711 			builder, args->resource, formats, ctx->i32_1, "");
   1712 	}
   1713 
   1714 	if (target == TGSI_TEXTURE_RECT ||
   1715 	    target == TGSI_TEXTURE_SHADOWRECT) {
   1716 		assert(!wa_8888);
   1717 		half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5);
   1718 	} else {
   1719 		struct tgsi_full_instruction txq_inst = {};
   1720 		struct lp_build_emit_data txq_emit_data = {};
   1721 		struct lp_build_if_state if_ctx;
   1722 
   1723 		if (wa_8888) {
   1724 			/* Skip the texture size query entirely if we don't need it. */
   1725 			lp_build_if(&if_ctx, &ctx->gallivm, LLVMBuildNot(builder, wa_8888, ""));
   1726 		}
   1727 
   1728 		/* Query the texture size. */
   1729 		txq_inst.Texture.Texture = target;
   1730 		txq_emit_data.inst = &txq_inst;
   1731 		txq_emit_data.dst_type = ctx->v4i32;
   1732 		set_tex_fetch_args(ctx, &txq_emit_data, target,
   1733 				   args->resource, NULL, &ctx->i32_0,
   1734 				   1, 0xf);
   1735 		txq_emit(NULL, &ctx->bld_base, &txq_emit_data);
   1736 
   1737 		/* Compute -0.5 / size. */
   1738 		for (c = 0; c < 2; c++) {
   1739 			half_texel[c] =
   1740 				LLVMBuildExtractElement(builder, txq_emit_data.output[0],
   1741 							LLVMConstInt(ctx->i32, c, 0), "");
   1742 			half_texel[c] = LLVMBuildUIToFP(builder, half_texel[c], ctx->f32, "");
   1743 			half_texel[c] =
   1744 				lp_build_emit_llvm_unary(&ctx->bld_base,
   1745 							 TGSI_OPCODE_RCP, half_texel[c]);
   1746 			half_texel[c] = LLVMBuildFMul(builder, half_texel[c],
   1747 						      LLVMConstReal(ctx->f32, -0.5), "");
   1748 		}
   1749 
   1750 		if (wa_8888) {
   1751 			lp_build_endif(&if_ctx);
   1752 
   1753 			LLVMBasicBlockRef bb[2] = { if_ctx.true_block, if_ctx.entry_block };
   1754 
   1755 			for (c = 0; c < 2; c++) {
   1756 				LLVMValueRef values[2] = { half_texel[c], ctx->ac.f32_0 };
   1757 				half_texel[c] = ac_build_phi(&ctx->ac, ctx->f32, 2,
   1758 							     values, bb);
   1759 			}
   1760 		}
   1761 	}
   1762 
   1763 	for (c = 0; c < 2; c++) {
   1764 		LLVMValueRef tmp;
   1765 		LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
   1766 
   1767 		tmp = LLVMBuildExtractElement(builder, coord, index, "");
   1768 		tmp = ac_to_float(&ctx->ac, tmp);
   1769 		tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], "");
   1770 		tmp = ac_to_integer(&ctx->ac, tmp);
   1771 		coord = LLVMBuildInsertElement(builder, coord, tmp, index, "");
   1772 	}
   1773 
   1774 	args->addr = coord;
   1775 
   1776 	return wa_8888;
   1777 }
   1778 
   1779 /* The second half of the cube texture 8_8_8_8 integer workaround: adjust the
   1780  * result after the gather operation.
   1781  */
   1782 static LLVMValueRef
   1783 si_fix_gather4_integer_result(struct si_shader_context *ctx,
   1784 			   LLVMValueRef result,
   1785 			   enum tgsi_return_type return_type,
   1786 			   LLVMValueRef wa)
   1787 {
   1788 	LLVMBuilderRef builder = ctx->ac.builder;
   1789 
   1790 	assert(return_type == TGSI_RETURN_TYPE_SINT ||
   1791 	       return_type == TGSI_RETURN_TYPE_UINT);
   1792 
   1793 	for (unsigned chan = 0; chan < 4; ++chan) {
   1794 		LLVMValueRef chanv = LLVMConstInt(ctx->i32, chan, false);
   1795 		LLVMValueRef value;
   1796 		LLVMValueRef wa_value;
   1797 
   1798 		value = LLVMBuildExtractElement(builder, result, chanv, "");
   1799 
   1800 		if (return_type == TGSI_RETURN_TYPE_UINT)
   1801 			wa_value = LLVMBuildFPToUI(builder, value, ctx->i32, "");
   1802 		else
   1803 			wa_value = LLVMBuildFPToSI(builder, value, ctx->i32, "");
   1804 		wa_value = ac_to_float(&ctx->ac, wa_value);
   1805 		value = LLVMBuildSelect(builder, wa, wa_value, value, "");
   1806 
   1807 		result = LLVMBuildInsertElement(builder, result, value, chanv, "");
   1808 	}
   1809 
   1810 	return result;
   1811 }
   1812 
   1813 static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
   1814 				struct lp_build_tgsi_context *bld_base,
   1815 				struct lp_build_emit_data *emit_data)
   1816 {
   1817 	struct si_shader_context *ctx = si_shader_context(bld_base);
   1818 	const struct tgsi_full_instruction *inst = emit_data->inst;
   1819 	struct ac_image_args args;
   1820 	unsigned opcode = inst->Instruction.Opcode;
   1821 	unsigned target = inst->Texture.Texture;
   1822 
   1823 	if (target == TGSI_TEXTURE_BUFFER) {
   1824 		emit_data->output[emit_data->chan] =
   1825 			ac_build_buffer_load_format(&ctx->ac,
   1826 						    emit_data->args[0],
   1827 						    emit_data->args[2],
   1828 						    emit_data->args[1],
   1829 						    true);
   1830 		return;
   1831 	}
   1832 
   1833 	memcpy(&args, emit_data->args, sizeof(args)); /* ugly */
   1834 
   1835 	args.opcode = ac_image_sample;
   1836 	args.compare = tgsi_is_shadow_target(target);
   1837 	args.offset = inst->Texture.NumOffsets > 0;
   1838 
   1839 	switch (opcode) {
   1840 	case TGSI_OPCODE_TXF:
   1841 	case TGSI_OPCODE_TXF_LZ:
   1842 		args.opcode = opcode == TGSI_OPCODE_TXF_LZ ||
   1843 			      target == TGSI_TEXTURE_2D_MSAA ||
   1844 			      target == TGSI_TEXTURE_2D_ARRAY_MSAA ?
   1845 				      ac_image_load : ac_image_load_mip;
   1846 		args.compare = false;
   1847 		args.offset = false;
   1848 		break;
   1849 	case TGSI_OPCODE_LODQ:
   1850 		args.opcode = ac_image_get_lod;
   1851 		args.compare = false;
   1852 		args.offset = false;
   1853 		break;
   1854 	case TGSI_OPCODE_TEX:
   1855 	case TGSI_OPCODE_TEX2:
   1856 	case TGSI_OPCODE_TXP:
   1857 		if (ctx->type != PIPE_SHADER_FRAGMENT)
   1858 			args.level_zero = true;
   1859 		break;
   1860 	case TGSI_OPCODE_TEX_LZ:
   1861 		args.level_zero = true;
   1862 		break;
   1863 	case TGSI_OPCODE_TXB:
   1864 	case TGSI_OPCODE_TXB2:
   1865 		assert(ctx->type == PIPE_SHADER_FRAGMENT);
   1866 		args.bias = true;
   1867 		break;
   1868 	case TGSI_OPCODE_TXL:
   1869 	case TGSI_OPCODE_TXL2:
   1870 		args.lod = true;
   1871 		break;
   1872 	case TGSI_OPCODE_TXD:
   1873 		args.deriv = true;
   1874 		break;
   1875 	case TGSI_OPCODE_TG4:
   1876 		args.opcode = ac_image_gather4;
   1877 		args.level_zero = true;
   1878 		break;
   1879 	default:
   1880 		assert(0);
   1881 		return;
   1882 	}
   1883 
   1884 	/* The hardware needs special lowering for Gather4 with integer formats. */
   1885 	LLVMValueRef gather4_int_result_workaround = NULL;
   1886 
   1887 	if (ctx->screen->info.chip_class <= VI &&
   1888 	    opcode == TGSI_OPCODE_TG4) {
   1889 		assert(inst->Texture.ReturnType != TGSI_RETURN_TYPE_UNKNOWN);
   1890 
   1891 		if (inst->Texture.ReturnType == TGSI_RETURN_TYPE_SINT ||
   1892 		    inst->Texture.ReturnType == TGSI_RETURN_TYPE_UINT) {
   1893 			gather4_int_result_workaround =
   1894 				si_lower_gather4_integer(ctx, &args, target,
   1895 							 inst->Texture.ReturnType);
   1896 		}
   1897 	}
   1898 
   1899 	LLVMValueRef result =
   1900 		ac_build_image_opcode(&ctx->ac, &args);
   1901 
   1902 	if (gather4_int_result_workaround) {
   1903 		result = si_fix_gather4_integer_result(ctx, result,
   1904 						       inst->Texture.ReturnType,
   1905 						       gather4_int_result_workaround);
   1906 	}
   1907 
   1908 	emit_data->output[emit_data->chan] = result;
   1909 }
   1910 
   1911 static void si_llvm_emit_txqs(
   1912 	const struct lp_build_tgsi_action *action,
   1913 	struct lp_build_tgsi_context *bld_base,
   1914 	struct lp_build_emit_data *emit_data)
   1915 {
   1916 	struct si_shader_context *ctx = si_shader_context(bld_base);
   1917 	LLVMValueRef res, samples;
   1918 	LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
   1919 
   1920 	tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);
   1921 
   1922 
   1923 	/* Read the samples from the descriptor directly. */
   1924 	res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->v8i32, "");
   1925 	samples = LLVMBuildExtractElement(ctx->ac.builder, res,
   1926 					  LLVMConstInt(ctx->i32, 3, 0), "");
   1927 	samples = LLVMBuildLShr(ctx->ac.builder, samples,
   1928 				LLVMConstInt(ctx->i32, 16, 0), "");
   1929 	samples = LLVMBuildAnd(ctx->ac.builder, samples,
   1930 			       LLVMConstInt(ctx->i32, 0xf, 0), "");
   1931 	samples = LLVMBuildShl(ctx->ac.builder, ctx->i32_1,
   1932 			       samples, "");
   1933 
   1934 	emit_data->output[emit_data->chan] = samples;
   1935 }
   1936 
   1937 static const struct lp_build_tgsi_action tex_action = {
   1938 	.fetch_args = tex_fetch_args,
   1939 	.emit = build_tex_intrinsic,
   1940 };
   1941 
   1942 /**
   1943  * Setup actions for TGSI memory opcode, including texture opcodes.
   1944  */
   1945 void si_shader_context_init_mem(struct si_shader_context *ctx)
   1946 {
   1947 	struct lp_build_tgsi_context *bld_base;
   1948 	struct lp_build_tgsi_action tmpl = {};
   1949 
   1950 	bld_base = &ctx->bld_base;
   1951 
   1952 	bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
   1953 	bld_base->op_actions[TGSI_OPCODE_TEX_LZ] = tex_action;
   1954 	bld_base->op_actions[TGSI_OPCODE_TEX2] = tex_action;
   1955 	bld_base->op_actions[TGSI_OPCODE_TXB] = tex_action;
   1956 	bld_base->op_actions[TGSI_OPCODE_TXB2] = tex_action;
   1957 	bld_base->op_actions[TGSI_OPCODE_TXD] = tex_action;
   1958 	bld_base->op_actions[TGSI_OPCODE_TXF] = tex_action;
   1959 	bld_base->op_actions[TGSI_OPCODE_TXF_LZ] = tex_action;
   1960 	bld_base->op_actions[TGSI_OPCODE_TXL] = tex_action;
   1961 	bld_base->op_actions[TGSI_OPCODE_TXL2] = tex_action;
   1962 	bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
   1963 	bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = txq_fetch_args;
   1964 	bld_base->op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
   1965 	bld_base->op_actions[TGSI_OPCODE_TG4] = tex_action;
   1966 	bld_base->op_actions[TGSI_OPCODE_LODQ] = tex_action;
   1967 	bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs;
   1968 
   1969 	bld_base->op_actions[TGSI_OPCODE_LOAD].fetch_args = load_fetch_args;
   1970 	bld_base->op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
   1971 	bld_base->op_actions[TGSI_OPCODE_STORE].fetch_args = store_fetch_args;
   1972 	bld_base->op_actions[TGSI_OPCODE_STORE].emit = store_emit;
   1973 	bld_base->op_actions[TGSI_OPCODE_RESQ].fetch_args = resq_fetch_args;
   1974 	bld_base->op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
   1975 
   1976 	tmpl.fetch_args = atomic_fetch_args;
   1977 	tmpl.emit = atomic_emit;
   1978 	bld_base->op_actions[TGSI_OPCODE_ATOMUADD] = tmpl;
   1979 	bld_base->op_actions[TGSI_OPCODE_ATOMUADD].intr_name = "add";
   1980 	bld_base->op_actions[TGSI_OPCODE_ATOMXCHG] = tmpl;
   1981 	bld_base->op_actions[TGSI_OPCODE_ATOMXCHG].intr_name = "swap";
   1982 	bld_base->op_actions[TGSI_OPCODE_ATOMCAS] = tmpl;
   1983 	bld_base->op_actions[TGSI_OPCODE_ATOMCAS].intr_name = "cmpswap";
   1984 	bld_base->op_actions[TGSI_OPCODE_ATOMAND] = tmpl;
   1985 	bld_base->op_actions[TGSI_OPCODE_ATOMAND].intr_name = "and";
   1986 	bld_base->op_actions[TGSI_OPCODE_ATOMOR] = tmpl;
   1987 	bld_base->op_actions[TGSI_OPCODE_ATOMOR].intr_name = "or";
   1988 	bld_base->op_actions[TGSI_OPCODE_ATOMXOR] = tmpl;
   1989 	bld_base->op_actions[TGSI_OPCODE_ATOMXOR].intr_name = "xor";
   1990 	bld_base->op_actions[TGSI_OPCODE_ATOMUMIN] = tmpl;
   1991 	bld_base->op_actions[TGSI_OPCODE_ATOMUMIN].intr_name = "umin";
   1992 	bld_base->op_actions[TGSI_OPCODE_ATOMUMAX] = tmpl;
   1993 	bld_base->op_actions[TGSI_OPCODE_ATOMUMAX].intr_name = "umax";
   1994 	bld_base->op_actions[TGSI_OPCODE_ATOMIMIN] = tmpl;
   1995 	bld_base->op_actions[TGSI_OPCODE_ATOMIMIN].intr_name = "smin";
   1996 	bld_base->op_actions[TGSI_OPCODE_ATOMIMAX] = tmpl;
   1997 	bld_base->op_actions[TGSI_OPCODE_ATOMIMAX].intr_name = "smax";
   1998 }
   1999